graduate scratchpad to its own example - squiggle.c - Self-contained Monte Carlo estimation in C99

commit d10796cae2665a5819e16dcaae07032f90456f13
parent ee9ed342877423bcce2040d181a52674c297cf56
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Sun, 16 Jul 2023 21:28:03 +0200

graduate scratchpad to its own example

Diffstat:
A examples/04_sample_from_cdf/example  | 0 
A examples/04_sample_from_cdf/example.c  | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A examples/04_sample_from_cdf/makefile  | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D scratchpad/makefile  | 57 ---------------------------------------------------------
D scratchpad/scratchpad  | 0 
D scratchpad/scratchpad.c  | 251 -------------------------------------------------------------------------------

6 files changed, 308 insertions(+), 308 deletions(-)
diff --git a/examples/04_sample_from_cdf/example b/examples/04_sample_from_cdf/example
Binary files differ.
diff --git a/examples/04_sample_from_cdf/example.c b/examples/04_sample_from_cdf/example.c
@@ -0,0 +1,251 @@
+#include <math.h> // erf, sqrt
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include "../../squiggle.h"
+
+#define NUM_SAMPLES 1000000
+#define STOP_BETA 1.0e-8
+#define TINY_BETA 1.0e-30
+
+// Example cdf
+float cdf_uniform_0_1(float x)
+{
+    if (x < 0) {
+        return 0;
+    } else if (x > 1) {
+        return 1;
+    } else {
+        return x;
+    }
+}
+
+float cdf_squared_0_1(float x)
+{
+    if (x < 0) {
+        return 0;
+    } else if (x > 1) {
+        return 1;
+    } else {
+        return x * x;
+    }
+}
+
+float cdf_normal_0_1(float x)
+{
+    float mean = 0;
+    float std = 1;
+    return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
+}
+
+struct box incbeta(float a, float b, float x)
+{
+    // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
+    // <https://codeplea.com/incomplete-beta-function-c>
+    // but modified to return a box struct and floats instead of doubles.
+    // [ ] to do: add attribution in README
+    // Original code under this license:
+    /*
+		 * zlib License
+		 *
+		 * Regularized Incomplete Beta Function
+		 *
+		 * Copyright (c) 2016, 2017 Lewis Van Winkle
+		 * http://CodePlea.com
+		 *
+		 * This software is provided 'as-is', without any express or implied
+		 * warranty. In no event will the authors be held liable for any damages
+		 * arising from the use of this software.
+		 *
+		 * Permission is granted to anyone to use this software for any purpose,
+		 * including commercial applications, and to alter it and redistribute it
+		 * freely, subject to the following restrictions:
+		 *
+		 * 1. The origin of this software must not be misrepresented; you must not
+		 *    claim that you wrote the original software. If you use this software
+		 *    in a product, an acknowledgement in the product documentation would be
+		 *    appreciated but is not required.
+		 * 2. Altered source versions must be plainly marked as such, and must not be
+		 *    misrepresented as being the original software.
+		 * 3. This notice may not be removed or altered from any source distribution.
+		 */
+    if (x < 0.0 || x > 1.0) {
+        PROCESS_ERROR("x out of bounds [0, 1], in function incbeta");
+    }
+
+    /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
+    if (x > (a + 1.0) / (a + b + 2.0)) {
+        struct box symmetric_incbeta = incbeta(b, a, 1.0 - x);
+        if (symmetric_incbeta.empty) {
+            return symmetric_incbeta; // propagate error
+        } else {
+            struct box result = {
+                .empty = 0,
+                .content = 1 - symmetric_incbeta.content
+            };
+            return result;
+        }
+    }
+
+    /*Find the first part before the continued fraction.*/
+    const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b);
+    const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a;
+
+    /*Use Lentz's algorithm to evaluate the continued fraction.*/
+    float f = 1.0, c = 1.0, d = 0.0;
+
+    int i, m;
+    for (i = 0; i <= 200; ++i) {
+        m = i / 2;
+
+        float numerator;
+        if (i == 0) {
+            numerator = 1.0; /*First numerator is 1.0.*/
+        } else if (i % 2 == 0) {
+            numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/
+        } else {
+            numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/
+        }
+
+        /*Do an iteration of Lentz's algorithm.*/
+        d = 1.0 + numerator * d;
+        if (fabs(d) < TINY_BETA)
+            d = TINY_BETA;
+        d = 1.0 / d;
+
+        c = 1.0 + numerator / c;
+        if (fabs(c) < TINY_BETA)
+            c = TINY_BETA;
+
+        const float cd = c * d;
+        f *= cd;
+
+        /*Check for stop.*/
+        if (fabs(1.0 - cd) < STOP_BETA) {
+            struct box result = {
+                .empty = 0,
+                .content = front * (f - 1.0)
+            };
+            return result;
+        }
+    }
+
+    PROCESS_ERROR("More loops needed, did not converge, in function incbeta");
+}
+
+struct box cdf_beta(float x)
+{
+    if (x < 0) {
+        struct box result = { .empty = 0, .content = 0 };
+        return result;
+    } else if (x > 1) {
+        struct box result = { .empty = 0, .content = 1 };
+        return result;
+    } else {
+        float successes = 1, failures = (2023 - 1945);
+        return incbeta(successes, failures, x);
+    }
+}
+
+// Some testers
+void test_inverse_cdf_float(char* cdf_name, float cdf_float(float))
+{
+    struct box result = inverse_cdf_float(cdf_float, 0.5);
+    if (result.empty) {
+        printf("Inverse for %s not calculated\n", cdf_name);
+        exit(1);
+    } else {
+        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
+    }
+}
+void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float))
+{
+    struct box result = inverse_cdf_box(cdf_box, 0.5);
+    if (result.empty) {
+        printf("Inverse for %s not calculated\n", cdf_name);
+        exit(1);
+    } else {
+        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
+    }
+}
+
+void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed)
+{
+    printf("\nGetting some samples from %s:\n", cdf_name);
+    clock_t begin = clock();
+    for (int i = 0; i < NUM_SAMPLES; i++) {
+        struct box sample = sampler_float_cdf(cdf_float, seed);
+        if (sample.empty) {
+            printf("Error in sampler function for %s", cdf_name);
+        } else {
+            // printf("%f\n", sample.content);
+        }
+    }
+    clock_t end = clock();
+    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
+    printf("Time spent: %f\n", time_spent);
+}
+
+void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed)
+{
+    printf("\nGetting some samples from %s:\n", cdf_name);
+    clock_t begin = clock();
+    for (int i = 0; i < NUM_SAMPLES; i++) {
+        struct box sample = sampler_box_cdf(cdf_box, seed);
+        if (sample.empty) {
+            printf("Error in sampler function for %s", cdf_name);
+        } else {
+            // printf("%f\n", sample.content);
+        }
+    }
+    clock_t end = clock();
+    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
+    printf("Time spent: %f\n", time_spent);
+}
+
+int main()
+{
+    // Test inverse cdf float
+    test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1);
+    test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1);
+    test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1);
+
+    // Test inverse cdf box
+    test_inverse_cdf_box("cdf_beta", cdf_beta);
+
+    // Testing samplers
+    // set randomness seed
+    uint32_t* seed = malloc(sizeof(uint32_t));
+    *seed = 1000; // xorshift can't start with 0
+
+    // Test float sampler
+    test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed);
+    test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed);
+    test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed);
+
+    // Get some normal samples using a previous approach
+    printf("\nGetting some samples from unit_normal\n");
+
+    clock_t begin_2 = clock();
+
+    for (int i = 0; i < NUM_SAMPLES; i++) {
+        float normal_sample = unit_normal(seed);
+        // printf("%f\n", normal_sample);
+    }
+
+    clock_t end_2 = clock();
+    float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC;
+    printf("Time spent: %f\n", time_spent_2);
+
+    // Test box sampler
+    test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
+    // Ok, this is slower than python!!
+    // Partly this is because I am using a more general algorithm,
+    //   which applies to any cdf
+    // But I am also using really anal convergence conditions.
+    // This could be optimized.
+
+    free(seed);
+    return 0;
+}
diff --git a/examples/04_sample_from_cdf/makefile b/examples/04_sample_from_cdf/makefile
@@ -0,0 +1,57 @@
+# Interface: 
+#   make
+#   make build
+#   make format
+#   make run
+
+# Compiler
+CC=gcc # required for nested functions
+# CC=tcc # <= faster compilation
+
+# Main file
+SRC=example.c ../../squiggle.c
+OUTPUT=./example
+
+## Dependencies
+MATH=-lm
+DEPENDENCIES=$(MATH)
+# OPENMP=-fopenmp
+
+## Flags
+DEBUG= #'-g'
+STANDARD=-std=gnu99 ## allows for nested functions.
+EXTENSIONS= #-fnested-functions
+WARNINGS=-Wall
+OPTIMIZED=-O3#-Ofast
+CFLAGS=$(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED)
+
+## Formatter
+STYLE_BLUEPRINT=webkit
+FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
+
+## make build
+build: $(SRC)
+	# gcc -std=gnu99 example.c -lm -o example
+	$(CC) $(CFLAGS) $(SRC) $(DEPENDENCIES) -o $(OUTPUT)
+
+format: $(SRC)
+	$(FORMATTER) $(SRC)
+
+run: $(SRC) $(OUTPUT)
+	./$(OUTPUT) && echo
+
+time-linux: 
+	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
+	
+	@echo "Running 100x and taking avg time $(OUTPUT)"
+	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
+
+## Profiling
+
+profile-linux: 
+	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
+	echo "Must be run as sudo"
+	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
+	sudo perf record $(OUTPUT)
+	sudo perf report
+	rm perf.data
diff --git a/scratchpad/makefile b/scratchpad/makefile
@@ -1,57 +0,0 @@
-# Interface: 
-#   make
-#   make build
-#   make format
-#   make run
-
-# Compiler
-CC=gcc # required for nested functions
-# CC=tcc # <= faster compilation
-
-# Main file
-SRC=scratchpad.c ../squiggle.c
-OUTPUT=./scratchpad
-
-## Dependencies
-MATH=-lm
-DEPENDENCIES=$(MATH)
-# OPENMP=-fopenmp
-
-## Flags
-DEBUG= #'-g'
-STANDARD=-std=gnu99 ## allows for nested functions.
-EXTENSIONS= #-fnested-functions
-WARNINGS=-Wall
-OPTIMIZED=-O3#-Ofast
-CFLAGS=$(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED)
-
-## Formatter
-STYLE_BLUEPRINT=webkit
-FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
-
-## make build
-build: $(SRC)
-	# gcc -std=gnu99 scratchpad.c -lm -o scratchpad
-	$(CC) $(CFLAGS) $(SRC) $(DEPENDENCIES) -o $(OUTPUT)
-
-format: $(SRC)
-	$(FORMATTER) $(SRC)
-
-run: $(SRC) $(OUTPUT)
-	./$(OUTPUT) && echo
-
-time-linux: 
-	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
-	
-	@echo "Running 100x and taking avg time $(OUTPUT)"
-	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
-
-## Profiling
-
-profile-linux: 
-	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
-	echo "Must be run as sudo"
-	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
-	sudo perf record $(OUTPUT)
-	sudo perf report
-	rm perf.data
diff --git a/scratchpad/scratchpad b/scratchpad/scratchpad
Binary files differ.
diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c
@@ -1,251 +0,0 @@
-#include <math.h> // erf, sqrt
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include "../squiggle.h"
-
-#define NUM_SAMPLES 1000000
-#define STOP_BETA 1.0e-8
-#define TINY_BETA 1.0e-30
-
-// Example cdf
-float cdf_uniform_0_1(float x)
-{
-    if (x < 0) {
-        return 0;
-    } else if (x > 1) {
-        return 1;
-    } else {
-        return x;
-    }
-}
-
-float cdf_squared_0_1(float x)
-{
-    if (x < 0) {
-        return 0;
-    } else if (x > 1) {
-        return 1;
-    } else {
-        return x * x;
-    }
-}
-
-float cdf_normal_0_1(float x)
-{
-    float mean = 0;
-    float std = 1;
-    return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
-}
-
-struct box incbeta(float a, float b, float x)
-{
-    // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
-    // <https://codeplea.com/incomplete-beta-function-c>
-    // but modified to return a box struct and floats instead of doubles.
-    // [ ] to do: add attribution in README
-    // Original code under this license:
-    /*
-		 * zlib License
-		 *
-		 * Regularized Incomplete Beta Function
-		 *
-		 * Copyright (c) 2016, 2017 Lewis Van Winkle
-		 * http://CodePlea.com
-		 *
-		 * This software is provided 'as-is', without any express or implied
-		 * warranty. In no event will the authors be held liable for any damages
-		 * arising from the use of this software.
-		 *
-		 * Permission is granted to anyone to use this software for any purpose,
-		 * including commercial applications, and to alter it and redistribute it
-		 * freely, subject to the following restrictions:
-		 *
-		 * 1. The origin of this software must not be misrepresented; you must not
-		 *    claim that you wrote the original software. If you use this software
-		 *    in a product, an acknowledgement in the product documentation would be
-		 *    appreciated but is not required.
-		 * 2. Altered source versions must be plainly marked as such, and must not be
-		 *    misrepresented as being the original software.
-		 * 3. This notice may not be removed or altered from any source distribution.
-		 */
-    if (x < 0.0 || x > 1.0) {
-        PROCESS_ERROR("x out of bounds [0, 1], in function incbeta");
-    }
-
-    /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
-    if (x > (a + 1.0) / (a + b + 2.0)) {
-        struct box symmetric_incbeta = incbeta(b, a, 1.0 - x);
-        if (symmetric_incbeta.empty) {
-            return symmetric_incbeta; // propagate error
-        } else {
-            struct box result = {
-                .empty = 0,
-                .content = 1 - symmetric_incbeta.content
-            };
-            return result;
-        }
-    }
-
-    /*Find the first part before the continued fraction.*/
-    const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b);
-    const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a;
-
-    /*Use Lentz's algorithm to evaluate the continued fraction.*/
-    float f = 1.0, c = 1.0, d = 0.0;
-
-    int i, m;
-    for (i = 0; i <= 200; ++i) {
-        m = i / 2;
-
-        float numerator;
-        if (i == 0) {
-            numerator = 1.0; /*First numerator is 1.0.*/
-        } else if (i % 2 == 0) {
-            numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/
-        } else {
-            numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/
-        }
-
-        /*Do an iteration of Lentz's algorithm.*/
-        d = 1.0 + numerator * d;
-        if (fabs(d) < TINY_BETA)
-            d = TINY_BETA;
-        d = 1.0 / d;
-
-        c = 1.0 + numerator / c;
-        if (fabs(c) < TINY_BETA)
-            c = TINY_BETA;
-
-        const float cd = c * d;
-        f *= cd;
-
-        /*Check for stop.*/
-        if (fabs(1.0 - cd) < STOP_BETA) {
-            struct box result = {
-                .empty = 0,
-                .content = front * (f - 1.0)
-            };
-            return result;
-        }
-    }
-
-    PROCESS_ERROR("More loops needed, did not converge, in function incbeta");
-}
-
-struct box cdf_beta(float x)
-{
-    if (x < 0) {
-        struct box result = { .empty = 0, .content = 0 };
-        return result;
-    } else if (x > 1) {
-        struct box result = { .empty = 0, .content = 1 };
-        return result;
-    } else {
-        float successes = 1, failures = (2023 - 1945);
-        return incbeta(successes, failures, x);
-    }
-}
-
-// Some testers
-void test_inverse_cdf_float(char* cdf_name, float cdf_float(float))
-{
-    struct box result = inverse_cdf_float(cdf_float, 0.5);
-    if (result.empty) {
-        printf("Inverse for %s not calculated\n", cdf_name);
-        exit(1);
-    } else {
-        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
-    }
-}
-void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float))
-{
-    struct box result = inverse_cdf_box(cdf_box, 0.5);
-    if (result.empty) {
-        printf("Inverse for %s not calculated\n", cdf_name);
-        exit(1);
-    } else {
-        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
-    }
-}
-
-void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed)
-{
-    printf("\nGetting some samples from %s:\n", cdf_name);
-    clock_t begin = clock();
-    for (int i = 0; i < NUM_SAMPLES; i++) {
-        struct box sample = sampler_float_cdf(cdf_float, seed);
-        if (sample.empty) {
-            printf("Error in sampler function for %s", cdf_name);
-        } else {
-            // printf("%f\n", sample.content);
-        }
-    }
-    clock_t end = clock();
-    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
-    printf("Time spent: %f\n", time_spent);
-}
-
-void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed)
-{
-    printf("\nGetting some samples from %s:\n", cdf_name);
-    clock_t begin = clock();
-    for (int i = 0; i < NUM_SAMPLES; i++) {
-        struct box sample = sampler_box_cdf(cdf_box, seed);
-        if (sample.empty) {
-            printf("Error in sampler function for %s", cdf_name);
-        } else {
-            // printf("%f\n", sample.content);
-        }
-    }
-    clock_t end = clock();
-    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
-    printf("Time spent: %f\n", time_spent);
-}
-
-int main()
-{
-    // Test inverse cdf float
-    test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1);
-    test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1);
-    test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1);
-
-    // Test inverse cdf box
-    test_inverse_cdf_box("cdf_beta", cdf_beta);
-
-    // Testing samplers
-    // set randomness seed
-    uint32_t* seed = malloc(sizeof(uint32_t));
-    *seed = 1000; // xorshift can't start with 0
-
-    // Test float sampler
-    test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed);
-    test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed);
-    test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed);
-
-    // Get some normal samples using a previous approach
-    printf("\nGetting some samples from unit_normal\n");
-
-    clock_t begin_2 = clock();
-
-    for (int i = 0; i < NUM_SAMPLES; i++) {
-        float normal_sample = unit_normal(seed);
-        // printf("%f\n", normal_sample);
-    }
-
-    clock_t end_2 = clock();
-    float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC;
-    printf("Time spent: %f\n", time_spent_2);
-
-    // Test box sampler
-    test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
-    // Ok, this is slower than python!!
-    // Partly this is because I am using a more general algorithm,
-    //   which applies to any cdf
-    // But I am also using really anal convergence conditions.
-    // This could be optimized.
-
-    free(seed);
-    return 0;
-}

	squiggle.c Self-contained Monte Carlo estimation in C99
	Log \| Files \| Refs \| README

A	examples/04_sample_from_cdf/example	\|	0
A	examples/04_sample_from_cdf/example.c	\|	251	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	examples/04_sample_from_cdf/makefile	\|	57	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	scratchpad/makefile	\|	57	---------------------------------------------------------
D	scratchpad/scratchpad	\|	0
D	scratchpad/scratchpad.c	\|	251	-------------------------------------------------------------------------------