commit 6f4c975bd4324e5c77e9ac1c83123c44bce8623f
parent 829781b8a756fe62569172fec7bd530812abe42b
Author: NunoSempere <nuno.sempere@protonmail.com>
Date: Sat, 24 Feb 2024 14:51:19 -0300
add fast python, other tweaks
Diffstat:
11 files changed, 82 insertions(+), 13 deletions(-)
diff --git a/C/makefile b/C/makefile
@@ -24,7 +24,7 @@ MATH=-lm
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
-OPTIMIZED=-O3 #-O3 actually gives better performance than -Ofast, at least for this version
+OPTIMIZED=-O3 #-O3 actually gives better performance than -Ofast, at least for this version. Could also add -march=native
LOCAL=-march=native
OPENMP=-fopenmp
diff --git a/C/out/samples b/C/out/samples
Binary files differ.
diff --git a/README.md b/README.md
@@ -26,6 +26,7 @@ The name of this repository is a pun on two meanings of "time to": "how much tim
|-----------------------------|-----------|---------------|
| C | 6.20ms | 252 |
| squiggle.c | 7.20ms | 29* |
+| go | 32.70ms | 150 |
| Nim | 41.10ms | 84 |
| Lua (LuaJIT) | 68.80ms | 82 |
| OCaml (flambda) | 185.50ms | 123 |
@@ -83,6 +84,10 @@ I like the [operator](http://duskos.org/#operator) section of [Dusk OS](http://d
> Dusk OS doesn't have users, but operators. What's the difference? Control. You use a phone, you use a coffee machine, hell you even use a car these days. But you operate a bulldozer, you operate a crane, you operate a plane.
+### Go
+
+Go is reasonably fast, though not as fast as C. Partly this is because it uses a different, slightly more robust random number generator. I have high hopes for go, hopefully it will fit the role for me of a C with fewer warts.
+
### NodeJS and Squiggle
Using [bun](https://bun.sh/) instead of node is actually a bit slower for the raw js code. Also, both the NodeJS and the Squiggle code use [stdlib](https://stdlib.io/) in their innards, which has a bunch of interleaved functions that make the code slower. It's possible that not using that external library could make the code faster. But at the same time, the js approach does seem to be to use external libraries whenever possible.
diff --git a/go/makefile b/go/makefile
@@ -8,6 +8,11 @@ build-complex:
go build -ldflags="-s -w" squiggle.go
# https://stackoverflow.com/questions/45003259/passing-an-optimization-flag-to-a-go-compiler
+build-show:
+ go build -gcflags="-m" squiggle.go
+ # https://pkg.go.dev/cmd/compile
+ # consider pgo: <https://go.dev/doc/pgo>
+
run:
./squiggle
diff --git a/go/squiggle b/go/squiggle
Binary files differ.
diff --git a/go/squiggle.go b/go/squiggle.go
@@ -5,11 +5,11 @@ import "math"
import "sync"
import rand "math/rand/v2"
+// https://pkg.go.dev/math/rand/v2
+
type src = *rand.Rand
type func64 = func(src) float64
-// https://pkg.go.dev/math/rand/v2
-
func sample_unit_uniform(r src) float64 {
return r.Float64()
}
@@ -75,7 +75,6 @@ func sample_mixture(fs []func64, weights []float64, r src) float64 {
break
}
}
- // fmt.Println(cumsummed_normalized_weights)
if flag == 0 {
result = fs[len(fs)-1](r)
@@ -84,12 +83,6 @@ func sample_mixture(fs []func64, weights []float64, r src) float64 {
}
-func slice_fill(xs []float64, fs func64, r src) {
- for i := range xs {
- xs[i] = fs(r)
- }
-}
-
func sample_parallel(f func64, n_samples int) []float64 {
var num_threads = 16
var xs = make([]float64, n_samples)
@@ -135,6 +128,7 @@ func main() {
avg = avg / float64(n_samples)
fmt.Printf("Average: %v\n", avg)
/*
+ // Without concurrency:
n_samples := 1_000_000
var r = rand.New(rand.NewPCG(uint64(1), uint64(2)))
var avg float64 = 0
diff --git a/makefile b/makefile
@@ -20,6 +20,9 @@ time-all:
@echo "# Squiggle (0.8.6)" && cd squiggle && make time-linux && echo && echo
@echo "# SquigglePy (0.27)" && cd squigglepy && make time && echo && echo
@echo "# squiggle.c" && cd squiggle.c && make time-linux && echo && echo
+ @echo "# squiggle.go" && cd go && make time-linux && echo && echo
+
+
record:
make time-all > time.txt 2>&1
diff --git a/python/samples-fast.py b/python/samples-fast.py
@@ -0,0 +1,48 @@
+import numpy as np
+rng = np.random.default_rng(123)
+DEFAULT_N = 1000000
+
+
+def normal(mean, std, n=DEFAULT_N):
+ return rng.normal(mean, std, n)
+
+
+def lognormal(mean, std, n=DEFAULT_N):
+ return rng.lognormal(mean, std, n)
+
+
+def to(low, high, n=DEFAULT_N):
+ normal95confidencePoint = 1.6448536269514722
+ logLow = np.log(low)
+ logHigh = np.log(high)
+ meanlog = (logLow + logHigh) / 2
+ sdlog = (logHigh - logLow) / (2 * normal95confidencePoint)
+ return lognormal(meanlog, sdlog, n)
+
+
+def optimized_mixture(samples_funcs, weights_array, n=DEFAULT_N):
+ normalized_weights = weights_array / sum(weights_array)
+ cummulative_sums = np.cumsum(normalized_weights)
+ helper_probs = rng.random(n)
+ results = np.empty(n)
+ for i, (start, end) in enumerate(zip([0]+list(cummulative_sums[:-1]), cummulative_sums)):
+ idx = np.where((helper_probs >= start) & (helper_probs < end))[0]
+ # Generate only as many samples as needed for each distribution
+ samples_func = samples_funcs[i]
+ results[idx] = samples_func(n=len(idx))
+ return results
+
+
+p_a = 0.8
+p_b = 0.5
+p_c = p_a * p_b
+dists = [
+ lambda n=1: np.zeros(n), # Distribution returning 0
+ lambda n=1: np.ones(n), # Distribution returning 1
+ lambda n=1: to(1, 3, n),
+ lambda n=1: to(2, 10, n)
+]
+weights = np.array([1 - p_c, p_c/2, p_c/4, p_c/4])
+result = optimized_mixture(dists, weights)
+mean_result = np.mean(result)
+print(f'Mean result: {mean_result}')
diff --git a/squiggle.c/makefile b/squiggle.c/makefile
@@ -1,8 +1,18 @@
OUTPUT=./samples
CC=gcc
+OPTIMIZATIONS=-funit-at-a-time -march=native -fno-math-errno -ffast-math -std=gnu99 -fno-unroll-loops -flto
build:
- $(CC) -O3 -march=native samples.c ./squiggle_c/squiggle.c ./squiggle_c/squiggle_more.c -lm -fopenmp -o $(OUTPUT)
+ $(CC) -O3 samples.c ./squiggle_c/squiggle.c ./squiggle_c/squiggle_more.c -lm -fopenmp -o $(OUTPUT)
+
+experimental:
+ # https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+ rm -f *.gcda
+ $(CC) -Ofast -fprofile-generate $(OPTIMIZATIONS) samples.c ./squiggle_c/squiggle.c ./squiggle_c/squiggle_more.c -lm -fopenmp -o $(OUTPUT)
+ ./$(OUTPUT)
+ $(CC) -Ofast -fprofile-use $(OPTIMIZATIONS) samples.c ./squiggle_c/squiggle.c ./squiggle_c/squiggle_more.c -lm -fopenmp -o $(OUTPUT)
+ rm *.gcda
+ # Using -Ofast increases speed a bit, but I don't trust it. <https://stackoverflow.com/questions/61232427/gcc-differences-between-o3-vs-ofast-optimizations>
install:
rm -r squiggle_c
@@ -25,8 +35,8 @@ install-git:
sed -i 's|../../..|squiggle_c|' samples.c
time-linux:
- @echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
- @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
+ @echo "Running 1000x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
+ @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..1001}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 1000" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
install-small:
rm -r squiggle_c
diff --git a/squiggle.c/samples b/squiggle.c/samples
Binary files differ.
diff --git a/time.txt b/time.txt
@@ -92,4 +92,8 @@ sys 0m2.226s
Running 100x and taking avg time: OMP_NUM_THREADS=16 ./samples
Time using 16 threads: 7.20ms
+# go
+make time-linux
+Running 100x and taking avg time: ./squiggle
+Time using 16 threads: 32.70ms