improve nim code, change README - time-to-botec - Benchmark sampling in different programming languages

commit 47e2a25490f28c54ec0022cda3d421a8db89d7fb
parent 8acdc283a2f531f3d927e17dd706ff9915bb73d4
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Sun, 21 May 2023 12:05:15 -0400

improve nim code, change README

Diffstat:
M README.md  | 27 +++++++++++++++++++--------
M nim/samples  | 0 
M nim/samples.nim  | 39 +++++++++++++++++----------------------
M time.txt  | 30 +++++++++++++++++++-----------

4 files changed, 55 insertions(+), 41 deletions(-)
diff --git a/README.md b/README.md
@@ -2,17 +2,17 @@
 
 ## About
 
-This repository contains example of very simple code to manipulate samples in various programming languages. It implements this estimate:
+This repository contains example of very simple code to manipulate samples in various programming languages. It implements this platonic estimate:
 
 ```
 p_a = 0.8
 p_b = 0.5
 p_c = p_a * p_b
 
-dists = [0, 1, 1 to 3, 2 to 10] # each dist represented as 1M samples
+dists = [0, 1, 1 to 3, 2 to 10]
 weights = [(1 - p_c), p_c/2, p_c/4, p_c/4 ]
 
-result = mixture(dists, weights)
+result = mixture(dists, weights) # should be 1M samples
 mean(result)
 ```
 
@@ -33,16 +33,27 @@ With the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1
 
 | Language             | Time      |
 |----------------------|-----------|
-| Nim                  | 0m0.153s  |
-| C                    | 0m0,442s  |
-| Node                 | 0m0,732s  |
+| Nim                  | 0m0.068s  |
+| C                    | 0m0.292s  |
+| Javascript (NodeJS)  | 0m0,732s  |
 | Squiggle             | 0m1,536s  |
 | R                    | 0m7,000s  |
 | Python (CPython)     | 0m16,641s |
 
-I was very surprised that Node/Squiggle code was almost as fast as the raw C code. For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org).
+## Notes
 
-I was also really happy with trying [Nim](https://nim-lang.org/). The version which beats all others is just the fastest "danger" compilation of Nim (the "release" compilation is 0m0.183s instead). The Nim version has the particularity that I define the normal function from scratch, using the [Box–Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form). For Nim I also have a version of the code which takes around 4 seconds, where I define some very inefficient sine & logarithm functions to feed into the Box-Muller method, because it felt like fun to really write a botec tool really from scratch.
+I was really happy trying [Nim](https://nim-lang.org/), and as a result the Nim code is a bit more optimized and engineered:
+
+1. It is using the fastest "danger" compilation mode.
+2. It has some optimizations: I don't compute 1M samples for each dist, but instead pass functions around and compute the 1M samples at the end
+3. I define the normal function from scratch, using the [Box–Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form).
+4. I also have a version in which I define the logarithm and sine functions themselves in nim to feed into the Box-Muller method. But it is much slower.
+
+Without 1. and 2., the nim code takes 0m0.183s instead. But I don't think that these are unfair advantages: I liked trying out nim and therefore put in more love into the code, and this seems like it could be a recurring factor.
+
+For C, I enabled the `-Ofast` compilation flag. Without it, it instead takes ~0.4 seconds. Initially, before I enabled the `-Ofast` flag, I was surprised that the Node and Squiggle code were comparable to the C code. Using [bun](https://bun.sh/) instead of node is actually a bit slower.
+
+For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org).
 
 ## Languages I may add later
 
diff --git a/nim/samples b/nim/samples
Binary files differ.
diff --git a/nim/samples.nim b/nim/samples.nim
@@ -37,13 +37,9 @@ proc to(low: float, high: float): float =
 
 ## Manipulate samples
 
-proc make_samples(f: () -> float, n: int): seq[float] = 
-  result = toSeq(1..n).map(_ => f())
-  return result
-
-proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] = 
+proc mixture(fs: seq[proc (): float{.nimcall.}], ps: seq[float], n: int): seq[float] = 
   
-  assert sxs.len == ps.len
+  assert fs.len == ps.len
 
   var ws: seq[float]
   var sum = 0.0
@@ -52,23 +48,23 @@ proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] =
     ws.add(sum)
   ws = ws.map(w => w/sum)
   
-  proc get_mixture_sample(): float = 
-    let r = rand(1.0)
-    var i = ws.len - 1
-    for j, w in ws:
+  var samples: seq[float]
+  let rs = toSeq(1..n).map(_=>rand(1.0))
+  for i in 0..(n-1):
+    let r = rs[i]
+    var j = ws.len - 1
+    for k, w in ws:
       if r < w:
-        i = j
+        j = k
         break
-    ## only occasion when ^ doesn't assign i
+    ## only occasion when ^ doesn't assign j
     ## is when r is exactly 1
     ## which would correspond to choosing the last item in ws
-    ## which is why i is initialized to ws.len
-    let xs = sxs[i]
-    let l = xs.len-1
-    let k = rand(0..l)
-    return xs[k]
-  
-  return toSeq(1..n).map(_ => get_mixture_sample())
+    ## which is why j is initialized to ws.len - 1
+    let f = fs[j]
+    samples.add(f())
+  return samples
+
 
 ## Actual model
 
@@ -80,9 +76,8 @@ let p_c = p_a * p_b
 
 let weights = @[ 1.0 - p_c, p_c/2.0, p_c/4.0, p_c/4.0 ]
 
-let fs = [ () => 0.0, () => 1.0, () => to(1.0, 3.0), () => to(2.0, 10.0)  ]
-let dists = fs.map(f => make_samples(f, n))
-let result = mixture(dists, weights, n)
+let fs = @[ proc (): float = 0.0, proc (): float = 1.0, proc (): float = to(1.0, 3.0), proc (): float = to(2.0, 10.0)]
+let result = mixture(fs, weights, n)
 let mean_result = foldl(result, a + b, 0.0) / float(result.len)
 
 # echo result
diff --git a/time.txt b/time.txt
@@ -1,11 +1,19 @@
 # C
 
+## normal compilation
 0.888458
 
 real    0m0,442s
 user    0m0,378s
 sys     0m0,064s
 
+## -Ofast
+0.888458
+
+real    0m0.292s
+user    0m0.266s
+sys     0m0.026s
+
 # Squiggle
 
 real    0m1,536s
@@ -39,22 +47,22 @@ sys     0m0,052s
 ## Nim
 
 nim c --verbosity:0 samples.nim && time ./samples --verbosity:0 && echo
-0.8881633539025908
+0.8860780498240779
 
-real    0m0.706s
-user    0m0.685s
+real    0m0.234s
+user    0m0.214s
 sys     0m0.020s
 
 nim c --verbosity:0 -d:release samples.nim && time ./samples --verbosity:0 && echo
-0.8861663545062978
+0.884035098700204
 
-real    0m0.184s
-user    0m0.151s
-sys     0m0.032s
+real    0m0.074s
+user    0m0.043s
+sys     0m0.031s
 
 nim c --verbosity:0 -d:danger samples.nim && time ./samples --verbosity:0
-0.8879220244477399
+0.8892827195895541
 
-real    0m0.158s
-user    0m0.130s
-sys     0m0.028s
+real    0m0.068s
+user    0m0.048s
+sys     0m0.020s

	time-to-botec Benchmark sampling in different programming languages
	Log \| Files \| Refs \| README

M	README.md	\|	27	+++++++++++++++++++--------
M	nim/samples	\|	0
M	nim/samples.nim	\|	39	+++++++++++++++++----------------------
M	time.txt	\|	30	+++++++++++++++++++-----------