move nim to top level, add to README - time-to-botec - Benchmark sampling in different programming languages

commit 2cf684da56832f24b1e02a3696cc23b578791318
parent a84b6b9cc0e5bda509dffa1e86b3b6c9112943f9
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Sun, 21 May 2023 01:46:45 -0400

move nim to top level, add to README

Diffstat:
M README.md  | 32 +++++++++++++++++++-------------
R wip/nim/hardcore/README.md -> nim/hardcore/README.md  | 0 
R wip/nim/hardcore/makefile -> nim/hardcore/makefile  | 0 
A nim/hardcore/samples  | 0 
A nim/hardcore/samples.nim  | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R wip/nim/hello_world/hello_world -> nim/hello_world/hello_world  | 0 
R wip/nim/hello_world/hello_world.nim -> nim/hello_world/hello_world.nim  | 0 
R wip/nim/hello_world/makefile -> nim/hello_world/makefile  | 0 
A nim/makefile  | 12 ++++++++++++
A nim/samples  | 0 
R wip/nim/samples.nim -> nim/samples.nim  | 0 
R wip/nim/sums/makefile -> nim/sums/makefile  | 0 
R wip/nim/sums/sums -> nim/sums/sums  | 0 
R wip/nim/sums/sums.nim -> nim/sums/sums.nim  | 0 
M time.txt  | 15 +++++++++++++++
D wip/nim/hardcore/samples  | 0 
D wip/nim/hardcore/samples.nim  | 145 -------------------------------------------------------------------------------
D wip/nim/makefile  | 11 -----------
D wip/nim/samples  | 0

19 files changed, 191 insertions(+), 169 deletions(-)
diff --git a/README.md b/README.md
@@ -13,29 +13,35 @@ The title of this repository is a pun on two meanings of "time to": "how much ti
 - [x] Squiggle 
 - [x] Javascript (NodeJS)
 - [x] C
+- [x] Nim 
 
 ## Performance table
 
 With the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples:
 
-| Language | Time      |
-|----------|-----------|
-| C        | 0m0,442s  |
-| Node     | 0m0,732s  |
-| Squiggle | 0m1,536s  |
-| R        | 0m7,000s  |
-| Python (CPython)  | 0m16,641s |
+| Language             | Time      |
+|----------------------|-----------|
+| Nim                  | 0m0.183s  |
+| C                    | 0m0,442s  |
+| Node                 | 0m0,732s  |
+| Squiggle             | 0m1,536s  |
+| R                    | 0m7,000s  |
+| Python (CPython)     | 0m16,641s |
 
+I was very surprised that Node/Squiggle code was almost as fast as the raw C code. For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org).
+
+I was also really happy with trying [Nim](https://nim-lang.org/). The version which beats all others is just the normal usage of Nim, in the "release" compilation mode (the "danger" compilation mode shaves of a few more miliseconds). The Nim version has the particularity that I define the normal function from scratch, using the [Box–Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form). For Nim I also have a version of the code which takes around 4 seconds, where I define some very inefficient sine & logarithm functions to feed into the Box-Muller method, because it felt like fun to really write a botec tool really from scratch.
 
-I was very surprised that Node/Squiggle code was almost as fast as the raw C code. For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org)
- 
 ## Languages I may add later
 
-- Julia (TuringML) 
-- Rust
-- Lisp
+- [ ] Julia (TuringML) 
+- [ ] Rust
+- [ ] Lisp
+- [ ] Stan
+- [ ] Go 
+- [ ] Zig
+- [ ] Forth
 - ... and suggestions welcome
-- Stan
 
 ## Roadmap
 
diff --git a/wip/nim/hardcore/README.md b/nim/hardcore/README.md
diff --git a/wip/nim/hardcore/makefile b/nim/hardcore/makefile
diff --git a/nim/hardcore/samples b/nim/hardcore/samples
Binary files differ.
diff --git a/nim/hardcore/samples.nim b/nim/hardcore/samples.nim
@@ -0,0 +1,145 @@
+import std/math
+import std/sugar
+import std/random
+import std/sequtils
+
+randomize()
+
+## Basic math functions
+proc factorial(n: int): int = 
+  if n == 0 or n < 0: 
+    return 1
+  else:
+    return n * factorial(n - 1)
+
+proc sine(x: float): float = 
+  let n = 8
+  # ^ Taylor will converge really quickly
+  # notice that the factorial of 17 is 
+  # already pretty gigantic
+  var acc = 0.0
+  for i in 0..n:
+    var k = 2*i + 1
+    var taylor = pow(-1, i.float) * pow(x, k.float) / factorial(k).float 
+    acc = acc + taylor
+  return acc 
+
+## Log function
+## <https://en.wikipedia.org/wiki/Natural_logarithm#High_precision>
+
+## Arithmetic-geomtric mean
+proc ag(x: float, y: float): float = 
+  let n = 32 # just some high number
+  var a = (x + y)/2.0
+  var b = sqrt(x * y)
+  for i in 0..n:
+    let temp = a
+    a = (a+b)/2.0
+    b = sqrt(b*temp)
+  return a
+
+## Find m such that x * 2^m > 2^precision/2
+proc find_m(x:float): float = 
+  var m = 0.0;
+  let precision = 64 # bits
+  let c = pow(2.0, precision.float / 2.0)
+  while x * pow(2.0, m) < c:
+    m = m + 1
+  return m
+
+proc log(x: float): float = 
+  let m = find_m(x)
+  let s = x * pow(2.0, m)
+  let ln2 = 0.6931471805599453
+  return ( PI / (2.0 * ag(1, 4.0/s)) ) - m * ln2
+
+## Test these functions
+## echo factorial(5)
+## echo sine(1.0)
+## echo log(0.1)
+## echo log(2.0)
+## echo log(3.0)
+## echo pow(2.0, 32.float)
+   
+## Distribution functions
+
+## Normal
+## <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form>
+proc ur_normal(): float = 
+  let u1 = rand(1.0)
+  let u2 = rand(1.0)
+  let z = sqrt(-2.0 * log(u1)) * sine(2 * PI * u2)
+  return z
+
+proc normal(mean: float, sigma: float): float = 
+  return (mean + sigma * ur_normal())
+
+proc lognormal(logmean: float, logsigma: float): float =
+  let answer = pow(E, normal(logmean, logsigma))
+  return answer
+
+proc to(low: float, high: float): float = 
+  let normal95confidencePoint = 1.6448536269514722
+  let loglow = log(low)
+  let loghigh = log(high)
+  let logmean = (loglow + loghigh)/2
+  let logsigma = (loghigh - loglow) / (2.0 * normal95confidencePoint);
+  return lognormal(logmean, logsigma)
+
+## echo ur_normal()
+## echo normal(10, 20)
+## echo lognormal(2, 4)
+## echo to(10, 90)
+
+## Manipulate samples
+
+proc make_samples(f: () -> float, n: int): seq[float] = 
+  result = toSeq(1..n).map(_ => f())
+  return result
+
+proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] = 
+  
+  assert sxs.len == ps.len
+
+  var ws: seq[float]
+  var sum = 0.0
+  for i, p in ps:
+    sum = sum + p
+    ws.add(sum)
+  ws = ws.map(w => w/sum)
+  
+  proc get_mixture_sample(): float = 
+    let r = rand(1.0)
+    var i = ws.len - 1
+    for j, w in ws:
+      if r < w:
+        i = j
+        break
+    ## only occasion when ^ doesn't assign i
+    ## is when r is exactly 1
+    ## which would correspond to choosing the last item in ws
+    ## which is why i is initialized to ws.len
+    let xs = sxs[i]
+    let l = xs.len-1
+    let k = rand(0..l)
+    return xs[k]
+  
+  return toSeq(1..n).map(_ => get_mixture_sample())
+
+## Actual model
+
+let n = 1000000
+
+let p_a = 0.8
+let p_b = 0.5
+let p_c = p_a * p_b
+
+let weights = @[ 1.0 - p_c, p_c/2.0, p_c/4.0, p_c/4.0 ]
+
+let fs = [ () => 0.0, () => 1.0, () => to(1.0, 3.0), () => to(2.0, 10.0)  ]
+let dists = fs.map(f => make_samples(f, n))
+let result = mixture(dists, weights, n)
+let mean_result = foldl(result, a + b, 0.0) / float(result.len)
+
+# echo result
+echo mean_result
diff --git a/wip/nim/hello_world/hello_world b/nim/hello_world/hello_world
Binary files differ.
diff --git a/wip/nim/hello_world/hello_world.nim b/nim/hello_world/hello_world.nim
diff --git a/wip/nim/hello_world/makefile b/nim/hello_world/makefile
diff --git a/nim/makefile b/nim/makefile
@@ -0,0 +1,12 @@
+SHELL := /bin/bash ## <= required to use time
+VERBOSE=--verbosity:0
+
+build: samples.nim
+	nim c $(VERBOSE) samples.nim
+
+run: samples 
+	./samples $(VERBOSE)
+
+examine: samples
+	nim c $(VERBOSE) -d:release samples.nim && time ./samples $(VERBOSE) && echo
+	nim c $(VERBOSE) -d:danger samples.nim && time ./samples $(VERBOSE)
diff --git a/nim/samples b/nim/samples
Binary files differ.
diff --git a/wip/nim/samples.nim b/nim/samples.nim
diff --git a/wip/nim/sums/makefile b/nim/sums/makefile
diff --git a/wip/nim/sums/sums b/nim/sums/sums
Binary files differ.
diff --git a/wip/nim/sums/sums.nim b/nim/sums/sums.nim
diff --git a/time.txt b/time.txt
@@ -36,3 +36,18 @@ real    0m7,000s
 user    0m6,944s
 sys     0m0,052s
 
+## Nim
+
+nim c --verbosity:0 -d:release samples.nim && time ./samples --verbosity:0 && echo
+0.8873815480558296
+
+real    0m0.183s
+user    0m0.150s
+sys     0m0.032s
+
+nim c --verbosity:0 -d:danger samples.nim && time ./samples --verbosity:0
+0.8886260086130379
+
+real    0m0.157s
+user    0m0.136s
+sys     0m0.020s
diff --git a/wip/nim/hardcore/samples b/wip/nim/hardcore/samples
Binary files differ.
diff --git a/wip/nim/hardcore/samples.nim b/wip/nim/hardcore/samples.nim
@@ -1,145 +0,0 @@
-import std/math
-import std/sugar
-import std/random
-import std/sequtils
-
-randomize()
-
-## Basic math functions
-proc factorial(n: int): int = 
-  if n == 0 or n < 0: 
-    return 1
-  else:
-    return n * factorial(n - 1)
-
-proc sine(x: float): float = 
-  let n = 8
-  # ^ Taylor will converge really quickly
-  # notice that the factorial of 17 is 
-  # already pretty gigantic
-  var acc = 0.0
-  for i in 0..n:
-    var k = 2*i + 1
-    var taylor = pow(-1, i.float) * pow(x, k.float) / factorial(k).float 
-    acc = acc + taylor
-  return acc 
-
-## Log function
-## <https://en.wikipedia.org/wiki/Natural_logarithm#High_precision>
-
-## Arithmetic-geomtric mean
-proc ag(x: float, y: float): float = 
-  let n = 16 # just some high number
-  var a = (x + y)/2.0
-  var b = sqrt(x * y)
-  for i in 0..n:
-    let temp = a
-    a = (a+b)/2.0
-    b = sqrt(b*temp)
-  return a
-
-## Find m such that x * 2^m > 2^precision/2
-proc find_m(x:float): float = 
-  var m = 0.0;
-  let precision = 32 # bits
-  let c = pow(2.0, precision.float / 2.0)
-  while x * pow(2.0, m) < c:
-    m = m + 1
-  return m
-
-proc log(x: float): float = 
-  let m = find_m(x)
-  let s = x * pow(2.0, m)
-  let ln2 = 0.6931471805599453
-  return ( PI / (2.0 * ag(1, 4.0/s)) ) - m * ln2
-
-## Test these functions
-## echo factorial(5)
-## echo sine(1.0)
-## echo log(0.1)
-## echo log(2.0)
-## echo log(3.0)
-## echo pow(2.0, 32.float)
-   
-## Distribution functions
-
-## Normal
-## <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form>
-proc ur_normal(): float = 
-  let u1 = rand(1.0)
-  let u2 = rand(1.0)
-  let z = sqrt(-2.0 * log(u1)) * sine(2 * PI * u2)
-  return z
-
-proc normal(mean: float, sigma: float): float = 
-  return (mean + sigma * ur_normal())
-
-proc lognormal(logmean: float, logsigma: float): float =
-  let answer = pow(E, normal(logmean, logsigma))
-  return answer
-
-proc to(low: float, high: float): float = 
-  let normal95confidencePoint = 1.6448536269514722
-  let loglow = log(low)
-  let loghigh = log(high)
-  let logmean = (loglow + loghigh)/2
-  let logsigma = (loghigh - loglow) / (2.0 * normal95confidencePoint);
-  return lognormal(logmean, logsigma)
-
-## echo ur_normal()
-## echo normal(10, 20)
-## echo lognormal(2, 4)
-## echo to(10, 90)
-
-## Manipulate samples
-
-proc make_samples(f: () -> float, n: int): seq[float] = 
-  result = toSeq(1..n).map(_ => f())
-  return result
-
-proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] = 
-  
-  assert sxs.len == ps.len
-
-  var ws: seq[float]
-  var sum = 0.0
-  for i, p in ps:
-    sum = sum + p
-    ws.add(sum)
-  ws = ws.map(w => w/sum)
-  
-  proc get_mixture_sample(): float = 
-    let r = rand(1.0)
-    var i = ws.len - 1
-    for j, w in ws:
-      if r < w:
-        i = j
-        break
-    ## only occasion when ^ doesn't assign i
-    ## is when r is exactly 1
-    ## which would correspond to choosing the last item in ws
-    ## which is why i is initialized to ws.len
-    let xs = sxs[i]
-    let l = xs.len-1
-    let k = rand(0..l)
-    return xs[k]
-  
-  return toSeq(1..n).map(_ => get_mixture_sample())
-
-## Actual model
-
-let n = 1000000
-
-let p_a = 0.8
-let p_b = 0.5
-let p_c = p_a * p_b
-
-let weights = @[ 1.0 - p_c, p_c/2.0, p_c/4.0, p_c/4.0 ]
-
-let fs = [ () => 0.0, () => 1.0, () => to(1.0, 3.0), () => to(2.0, 10.0)  ]
-let dists = fs.map(f => make_samples(f, n))
-let result = mixture(dists, weights, n)
-let mean_result = foldl(result, a + b, 0.0) / float(result.len)
-
-# echo result
-echo mean_result
diff --git a/wip/nim/makefile b/wip/nim/makefile
@@ -1,11 +0,0 @@
-SHELL := /bin/bash ## <= required to use time
-VERBOSE=--verbosity:0
-
-build: samples.nim
-	nim c $(VERBOSE) samples.nim
-
-run: samples 
-	./samples $(VERBOSE)
-
-examine: samples
-	nim c $(VERBOSE) -d:release samples.nim && time ./samples $(VERBOSE)
diff --git a/wip/nim/samples b/wip/nim/samples
Binary files differ.

	time-to-botec Benchmark sampling in different programming languages
	Log \| Files \| Refs \| README

M	README.md	\|	32	+++++++++++++++++++-------------
R	wip/nim/hardcore/README.md -> nim/hardcore/README.md	\|	0
R	wip/nim/hardcore/makefile -> nim/hardcore/makefile	\|	0
A	nim/hardcore/samples	\|	0
A	nim/hardcore/samples.nim	\|	145	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R	wip/nim/hello_world/hello_world -> nim/hello_world/hello_world	\|	0
R	wip/nim/hello_world/hello_world.nim -> nim/hello_world/hello_world.nim	\|	0
R	wip/nim/hello_world/makefile -> nim/hello_world/makefile	\|	0
A	nim/makefile	\|	12	++++++++++++
A	nim/samples	\|	0
R	wip/nim/samples.nim -> nim/samples.nim	\|	0
R	wip/nim/sums/makefile -> nim/sums/makefile	\|	0
R	wip/nim/sums/sums -> nim/sums/sums	\|	0
R	wip/nim/sums/sums.nim -> nim/sums/sums.nim	\|	0
M	time.txt	\|	15	+++++++++++++++
D	wip/nim/hardcore/samples	\|	0
D	wip/nim/hardcore/samples.nim	\|	145	-------------------------------------------------------------------------------
D	wip/nim/makefile	\|	11	-----------
D	wip/nim/samples	\|	0