commit 160e8241088e8afbc5c8db2529e6a058f51f72db
parent c35ddcc358ddc727c4cdfbc6a27bf9a40bd3ff1e
Author: NunoSempere <nuno.sempere@protonmail.com>
Date: Mon, 29 May 2023 19:40:03 -0400
time measuring tweaks.
Diffstat:
8 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/C-optimized/README.md b/C-optimized/README.md
@@ -7,13 +7,18 @@ The main changes are:
- an optimization of the mixture function (it passes the functions instead of the whole arrays, reducing in great measure the memory usage and the computation time) and
- the implementation of multi-threading with OpenMP.
+## Performance
+
The mean time of execution is 6 ms. With the following distribution:

The hardware used has been an AMD 5800x3D and 16GB of DDR4-3200 MHz.
-Take into account that the multi-threading introduces a bit of dispersion in the execution time due to the creation and destruction of threads.
-
Also, the time data has been collected by executing the interior of the main() function 1000 times in a for loop, not executing the program itself 1000 times.
+## Multithreading
+
+Take into account that the multi-threading introduces a bit of dispersion in the execution time due to the creation and destruction of threads.
+
+In Nuño's machine, multithreading actually introduces a noticeable slowdown factor.
diff --git a/C-optimized/makefile b/C-optimized/makefile
@@ -37,27 +37,17 @@ format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
- OMP_NUM_THREADS=4 ./$(OUTPUT)
-
-test: $(SRC) $(OUTPUT)
OMP_NUM_THREADS=1 ./$(OUTPUT)
- echo ""
- OMP_NUM_THREADS=2 ./$(OUTPUT)
- echo ""
+
+multi: $(SRC) $(OUTPUT)
+ OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
+ OMP_NUM_THREADS=2 ./$(OUTPUT) && echo
OMP_NUM_THREADS=4 ./$(OUTPUT)
-# echo "Increasing stack size limit, because we are dealing with 1M samples"
-# # ulimit: increase stack size limit
-# # -Ss: the soft limit. If you set the hard limit, you then can't raise it
-# # 256000: around 250Mbs, if I'm reading it correctly.
-# # Then run the program
-# ulimit -Ss 256000 && ./$(OUTPUT)
+time:
+ OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
+ OMP_NUM_THREADS=2 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
+ OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
linux-install:
sudo apt-get install libomp-dev
-
-# Old:
-# Link libraries, for good measure
-# LD_LIBRARY_PATH=/usr/local/lib
-# export LD_LIBRARY_PATH
-
diff --git a/C-optimized/out/samples b/C-optimized/out/samples
Binary files differ.
diff --git a/C-optimized/samples.c b/C-optimized/samples.c
@@ -245,8 +245,8 @@ int main()
//initialize randomness
srand(time(NULL));
- clock_t start, end;
- start = clock();
+ // clock_t start, end;
+ // start = clock();
// Toy example
// Declare variables in play
@@ -269,10 +269,14 @@ int main()
mixture_f(samplers, weights, n_dists, dist_mixture, n_threads);
printf("Sum(dist_mixture, N)/N = %f\n", split_array_sum(dist_mixture, N, n_threads) / N);
-
+ // array_print(dist_mixture[0], N);
split_array_free(dist_mixture, n_threads);
- end = clock();
- printf("Time (ms): %f\n", ((double)(end - start)) / (CLOCKS_PER_SEC * 10) * 1000);
+ // end = clock();
+ // printf("Time (ms): %f\n", ((double)(end - start)) / (CLOCKS_PER_SEC * 10) * 1000);
+ // ^ Will only measure how long it takes the inner main to run, not the whole program,
+ // including e.g., loading the program into memory or smth.
+ // Also CLOCKS_PER_SEC in POSIX is a constant equal to 1000000.
+ // See: https://stackoverflow.com/questions/10455905/why-is-clocks-per-sec-not-the-actual-number-of-clocks-per-second
return 0;
}
diff --git a/C/samples b/C/samples
Binary files differ.
diff --git a/C/samples.c b/C/samples.c
@@ -3,6 +3,7 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
+#include <time.h>
#define N 1000000
/*
@@ -111,6 +112,10 @@ void mixture(gsl_rng* r, double* dists[], double* weights, int n, double* result
/* Main */
int main(void)
{
+ // Start clock
+ clock_t start, end;
+ start = clock();
+
/* Initialize GNU Statistical Library (GSL) stuff */
const gsl_rng_type* T;
gsl_rng* r;
@@ -143,7 +148,10 @@ int main(void)
/* Clean up GSL */
gsl_rng_free(r);
-
+
+ // End clock
+ end = clock();
+ printf("Total time (ms): %f\n", ((double)(end - start)) / CLOCKS_PER_SEC * 1000);
/* Return success*/
return EXIT_SUCCESS;
}
diff --git a/README.md b/README.md
@@ -29,14 +29,15 @@ As of now, it may be useful for checking the validity of simple estimations. The
## Comparison table
-| Language | Time | Lines of code |
-|----------------------|-----------|---------------|
-| Nim | 0m0.068s | 84 |
-| C | 0m0.292s | 149 |
-| Javascript (NodeJS) | 0m0,732s | 69 |
-| Squiggle | 0m1,536s | 14 |
-| R | 0m7,000s | 49 |
-| Python (CPython) | 0m16,641s | 56 |
+| Language | Time | Lines of code |
+|--------------------------|-----------|---------------|
+| C (optimized, 1 thread) | ~30ms | 282 |
+| Nim | 68ms | 84 |
+| C | 292ms | 149 |
+| Javascript (NodeJS) | 732ms | 69 |
+| Squiggle | 1,536s | 14 |
+| R | 7,000s | 49 |
+| Python (CPython) | 16,641s | 56 |
Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples:
@@ -51,7 +52,9 @@ I was really happy trying [Nim](https://nim-lang.org/), and as a result the Nim
Without 1. and 2., the nim code takes 0m0.183s instead. But I don't think that these are unfair advantages: I liked trying out nim and therefore put in more love into the code, and this seems like it could be a recurring factor.
-For C, I enabled the `-Ofast` compilation flag. Without it, it instead takes ~0.4 seconds. Initially, before I enabled the `-Ofast` flag, I was surprised that the Node and Squiggle code were comparable to the C code. Using [bun](https://bun.sh/) instead of node is actually a bit slower.
+For the initial C code, I enabled the `-Ofast` compilation flag. Without it, it instead takes ~0.4 seconds. Initially, before I enabled the `-Ofast` flag, I was surprised that the Node and Squiggle code were comparable to the C code. Using [bun](https://bun.sh/) instead of node is actually a bit slower.
+
+For the optimized C code, see [that folder's README](./C-optimized/README.md).
For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org).
diff --git a/time.txt b/time.txt
@@ -1,3 +1,17 @@
+# Optimized C
+
+OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./out/samples && echo
+Sum(dist_mixture, N)/N = 0.885837
+Time: 0.02s
+
+OMP_NUM_THREADS=2 /bin/time -f "Time: %es" ./out/samples && echo
+Sum(dist_mixture, N)/N = 0.885123
+Time: 0.14s
+
+OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./out/samples && echo
+Sum(dist_mixture, N)/N = 0.886255
+Time: 0.11s
+
# C
## normal compilation