time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

commit 2cf684da56832f24b1e02a3696cc23b578791318
parent a84b6b9cc0e5bda509dffa1e86b3b6c9112943f9
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Sun, 21 May 2023 01:46:45 -0400

move nim to top level, add to README

Diffstat:
MREADME.md | 32+++++++++++++++++++-------------
Rwip/nim/hardcore/README.md -> nim/hardcore/README.md | 0
Rwip/nim/hardcore/makefile -> nim/hardcore/makefile | 0
Anim/hardcore/samples | 0
Anim/hardcore/samples.nim | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rwip/nim/hello_world/hello_world -> nim/hello_world/hello_world | 0
Rwip/nim/hello_world/hello_world.nim -> nim/hello_world/hello_world.nim | 0
Rwip/nim/hello_world/makefile -> nim/hello_world/makefile | 0
Anim/makefile | 12++++++++++++
Anim/samples | 0
Rwip/nim/samples.nim -> nim/samples.nim | 0
Rwip/nim/sums/makefile -> nim/sums/makefile | 0
Rwip/nim/sums/sums -> nim/sums/sums | 0
Rwip/nim/sums/sums.nim -> nim/sums/sums.nim | 0
Mtime.txt | 15+++++++++++++++
Dwip/nim/hardcore/samples | 0
Dwip/nim/hardcore/samples.nim | 145-------------------------------------------------------------------------------
Dwip/nim/makefile | 11-----------
Dwip/nim/samples | 0
19 files changed, 191 insertions(+), 169 deletions(-)

diff --git a/README.md b/README.md @@ -13,29 +13,35 @@ The title of this repository is a pun on two meanings of "time to": "how much ti - [x] Squiggle - [x] Javascript (NodeJS) - [x] C +- [x] Nim ## Performance table With the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples: -| Language | Time | -|----------|-----------| -| C | 0m0,442s | -| Node | 0m0,732s | -| Squiggle | 0m1,536s | -| R | 0m7,000s | -| Python (CPython) | 0m16,641s | +| Language | Time | +|----------------------|-----------| +| Nim | 0m0.183s | +| C | 0m0,442s | +| Node | 0m0,732s | +| Squiggle | 0m1,536s | +| R | 0m7,000s | +| Python (CPython) | 0m16,641s | +I was very surprised that Node/Squiggle code was almost as fast as the raw C code. For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org). + +I was also really happy with trying [Nim](https://nim-lang.org/). The version which beats all others is just the normal usage of Nim, in the "release" compilation mode (the "danger" compilation mode shaves of a few more miliseconds). The Nim version has the particularity that I define the normal function from scratch, using the [Box–Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form). For Nim I also have a version of the code which takes around 4 seconds, where I define some very inefficient sine & logarithm functions to feed into the Box-Muller method, because it felt like fun to really write a botec tool really from scratch. -I was very surprised that Node/Squiggle code was almost as fast as the raw C code. For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org) - ## Languages I may add later -- Julia (TuringML) -- Rust -- Lisp +- [ ] Julia (TuringML) +- [ ] Rust +- [ ] Lisp +- [ ] Stan +- [ ] Go +- [ ] Zig +- [ ] Forth - ... and suggestions welcome -- Stan ## Roadmap diff --git a/wip/nim/hardcore/README.md b/nim/hardcore/README.md diff --git a/wip/nim/hardcore/makefile b/nim/hardcore/makefile diff --git a/nim/hardcore/samples b/nim/hardcore/samples Binary files differ. diff --git a/nim/hardcore/samples.nim b/nim/hardcore/samples.nim @@ -0,0 +1,145 @@ +import std/math +import std/sugar +import std/random +import std/sequtils + +randomize() + +## Basic math functions +proc factorial(n: int): int = + if n == 0 or n < 0: + return 1 + else: + return n * factorial(n - 1) + +proc sine(x: float): float = + let n = 8 + # ^ Taylor will converge really quickly + # notice that the factorial of 17 is + # already pretty gigantic + var acc = 0.0 + for i in 0..n: + var k = 2*i + 1 + var taylor = pow(-1, i.float) * pow(x, k.float) / factorial(k).float + acc = acc + taylor + return acc + +## Log function +## <https://en.wikipedia.org/wiki/Natural_logarithm#High_precision> + +## Arithmetic-geomtric mean +proc ag(x: float, y: float): float = + let n = 32 # just some high number + var a = (x + y)/2.0 + var b = sqrt(x * y) + for i in 0..n: + let temp = a + a = (a+b)/2.0 + b = sqrt(b*temp) + return a + +## Find m such that x * 2^m > 2^precision/2 +proc find_m(x:float): float = + var m = 0.0; + let precision = 64 # bits + let c = pow(2.0, precision.float / 2.0) + while x * pow(2.0, m) < c: + m = m + 1 + return m + +proc log(x: float): float = + let m = find_m(x) + let s = x * pow(2.0, m) + let ln2 = 0.6931471805599453 + return ( PI / (2.0 * ag(1, 4.0/s)) ) - m * ln2 + +## Test these functions +## echo factorial(5) +## echo sine(1.0) +## echo log(0.1) +## echo log(2.0) +## echo log(3.0) +## echo pow(2.0, 32.float) + +## Distribution functions + +## Normal +## <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form> +proc ur_normal(): float = + let u1 = rand(1.0) + let u2 = rand(1.0) + let z = sqrt(-2.0 * log(u1)) * sine(2 * PI * u2) + return z + +proc normal(mean: float, sigma: float): float = + return (mean + sigma * ur_normal()) + +proc lognormal(logmean: float, logsigma: float): float = + let answer = pow(E, normal(logmean, logsigma)) + return answer + +proc to(low: float, high: float): float = + let normal95confidencePoint = 1.6448536269514722 + let loglow = log(low) + let loghigh = log(high) + let logmean = (loglow + loghigh)/2 + let logsigma = (loghigh - loglow) / (2.0 * normal95confidencePoint); + return lognormal(logmean, logsigma) + +## echo ur_normal() +## echo normal(10, 20) +## echo lognormal(2, 4) +## echo to(10, 90) + +## Manipulate samples + +proc make_samples(f: () -> float, n: int): seq[float] = + result = toSeq(1..n).map(_ => f()) + return result + +proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] = + + assert sxs.len == ps.len + + var ws: seq[float] + var sum = 0.0 + for i, p in ps: + sum = sum + p + ws.add(sum) + ws = ws.map(w => w/sum) + + proc get_mixture_sample(): float = + let r = rand(1.0) + var i = ws.len - 1 + for j, w in ws: + if r < w: + i = j + break + ## only occasion when ^ doesn't assign i + ## is when r is exactly 1 + ## which would correspond to choosing the last item in ws + ## which is why i is initialized to ws.len + let xs = sxs[i] + let l = xs.len-1 + let k = rand(0..l) + return xs[k] + + return toSeq(1..n).map(_ => get_mixture_sample()) + +## Actual model + +let n = 1000000 + +let p_a = 0.8 +let p_b = 0.5 +let p_c = p_a * p_b + +let weights = @[ 1.0 - p_c, p_c/2.0, p_c/4.0, p_c/4.0 ] + +let fs = [ () => 0.0, () => 1.0, () => to(1.0, 3.0), () => to(2.0, 10.0) ] +let dists = fs.map(f => make_samples(f, n)) +let result = mixture(dists, weights, n) +let mean_result = foldl(result, a + b, 0.0) / float(result.len) + +# echo result +echo mean_result diff --git a/wip/nim/hello_world/hello_world b/nim/hello_world/hello_world Binary files differ. diff --git a/wip/nim/hello_world/hello_world.nim b/nim/hello_world/hello_world.nim diff --git a/wip/nim/hello_world/makefile b/nim/hello_world/makefile diff --git a/nim/makefile b/nim/makefile @@ -0,0 +1,12 @@ +SHELL := /bin/bash ## <= required to use time +VERBOSE=--verbosity:0 + +build: samples.nim + nim c $(VERBOSE) samples.nim + +run: samples + ./samples $(VERBOSE) + +examine: samples + nim c $(VERBOSE) -d:release samples.nim && time ./samples $(VERBOSE) && echo + nim c $(VERBOSE) -d:danger samples.nim && time ./samples $(VERBOSE) diff --git a/nim/samples b/nim/samples Binary files differ. diff --git a/wip/nim/samples.nim b/nim/samples.nim diff --git a/wip/nim/sums/makefile b/nim/sums/makefile diff --git a/wip/nim/sums/sums b/nim/sums/sums Binary files differ. diff --git a/wip/nim/sums/sums.nim b/nim/sums/sums.nim diff --git a/time.txt b/time.txt @@ -36,3 +36,18 @@ real 0m7,000s user 0m6,944s sys 0m0,052s +## Nim + +nim c --verbosity:0 -d:release samples.nim && time ./samples --verbosity:0 && echo +0.8873815480558296 + +real 0m0.183s +user 0m0.150s +sys 0m0.032s + +nim c --verbosity:0 -d:danger samples.nim && time ./samples --verbosity:0 +0.8886260086130379 + +real 0m0.157s +user 0m0.136s +sys 0m0.020s diff --git a/wip/nim/hardcore/samples b/wip/nim/hardcore/samples Binary files differ. diff --git a/wip/nim/hardcore/samples.nim b/wip/nim/hardcore/samples.nim @@ -1,145 +0,0 @@ -import std/math -import std/sugar -import std/random -import std/sequtils - -randomize() - -## Basic math functions -proc factorial(n: int): int = - if n == 0 or n < 0: - return 1 - else: - return n * factorial(n - 1) - -proc sine(x: float): float = - let n = 8 - # ^ Taylor will converge really quickly - # notice that the factorial of 17 is - # already pretty gigantic - var acc = 0.0 - for i in 0..n: - var k = 2*i + 1 - var taylor = pow(-1, i.float) * pow(x, k.float) / factorial(k).float - acc = acc + taylor - return acc - -## Log function -## <https://en.wikipedia.org/wiki/Natural_logarithm#High_precision> - -## Arithmetic-geomtric mean -proc ag(x: float, y: float): float = - let n = 16 # just some high number - var a = (x + y)/2.0 - var b = sqrt(x * y) - for i in 0..n: - let temp = a - a = (a+b)/2.0 - b = sqrt(b*temp) - return a - -## Find m such that x * 2^m > 2^precision/2 -proc find_m(x:float): float = - var m = 0.0; - let precision = 32 # bits - let c = pow(2.0, precision.float / 2.0) - while x * pow(2.0, m) < c: - m = m + 1 - return m - -proc log(x: float): float = - let m = find_m(x) - let s = x * pow(2.0, m) - let ln2 = 0.6931471805599453 - return ( PI / (2.0 * ag(1, 4.0/s)) ) - m * ln2 - -## Test these functions -## echo factorial(5) -## echo sine(1.0) -## echo log(0.1) -## echo log(2.0) -## echo log(3.0) -## echo pow(2.0, 32.float) - -## Distribution functions - -## Normal -## <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form> -proc ur_normal(): float = - let u1 = rand(1.0) - let u2 = rand(1.0) - let z = sqrt(-2.0 * log(u1)) * sine(2 * PI * u2) - return z - -proc normal(mean: float, sigma: float): float = - return (mean + sigma * ur_normal()) - -proc lognormal(logmean: float, logsigma: float): float = - let answer = pow(E, normal(logmean, logsigma)) - return answer - -proc to(low: float, high: float): float = - let normal95confidencePoint = 1.6448536269514722 - let loglow = log(low) - let loghigh = log(high) - let logmean = (loglow + loghigh)/2 - let logsigma = (loghigh - loglow) / (2.0 * normal95confidencePoint); - return lognormal(logmean, logsigma) - -## echo ur_normal() -## echo normal(10, 20) -## echo lognormal(2, 4) -## echo to(10, 90) - -## Manipulate samples - -proc make_samples(f: () -> float, n: int): seq[float] = - result = toSeq(1..n).map(_ => f()) - return result - -proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] = - - assert sxs.len == ps.len - - var ws: seq[float] - var sum = 0.0 - for i, p in ps: - sum = sum + p - ws.add(sum) - ws = ws.map(w => w/sum) - - proc get_mixture_sample(): float = - let r = rand(1.0) - var i = ws.len - 1 - for j, w in ws: - if r < w: - i = j - break - ## only occasion when ^ doesn't assign i - ## is when r is exactly 1 - ## which would correspond to choosing the last item in ws - ## which is why i is initialized to ws.len - let xs = sxs[i] - let l = xs.len-1 - let k = rand(0..l) - return xs[k] - - return toSeq(1..n).map(_ => get_mixture_sample()) - -## Actual model - -let n = 1000000 - -let p_a = 0.8 -let p_b = 0.5 -let p_c = p_a * p_b - -let weights = @[ 1.0 - p_c, p_c/2.0, p_c/4.0, p_c/4.0 ] - -let fs = [ () => 0.0, () => 1.0, () => to(1.0, 3.0), () => to(2.0, 10.0) ] -let dists = fs.map(f => make_samples(f, n)) -let result = mixture(dists, weights, n) -let mean_result = foldl(result, a + b, 0.0) / float(result.len) - -# echo result -echo mean_result diff --git a/wip/nim/makefile b/wip/nim/makefile @@ -1,11 +0,0 @@ -SHELL := /bin/bash ## <= required to use time -VERBOSE=--verbosity:0 - -build: samples.nim - nim c $(VERBOSE) samples.nim - -run: samples - ./samples $(VERBOSE) - -examine: samples - nim c $(VERBOSE) -d:release samples.nim && time ./samples $(VERBOSE) diff --git a/wip/nim/samples b/wip/nim/samples Binary files differ.