squiggle.c

Self-contained Monte Carlo estimation in C99
Log | Files | Refs | README

commit d10796cae2665a5819e16dcaae07032f90456f13
parent ee9ed342877423bcce2040d181a52674c297cf56
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Sun, 16 Jul 2023 21:28:03 +0200

graduate scratchpad to its own example

Diffstat:
Aexamples/04_sample_from_cdf/example | 0
Aexamples/04_sample_from_cdf/example.c | 251+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexamples/04_sample_from_cdf/makefile | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dscratchpad/makefile | 57---------------------------------------------------------
Dscratchpad/scratchpad | 0
Dscratchpad/scratchpad.c | 251-------------------------------------------------------------------------------
6 files changed, 308 insertions(+), 308 deletions(-)

diff --git a/examples/04_sample_from_cdf/example b/examples/04_sample_from_cdf/example Binary files differ. diff --git a/examples/04_sample_from_cdf/example.c b/examples/04_sample_from_cdf/example.c @@ -0,0 +1,251 @@ +#include <math.h> // erf, sqrt +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "../../squiggle.h" + +#define NUM_SAMPLES 1000000 +#define STOP_BETA 1.0e-8 +#define TINY_BETA 1.0e-30 + +// Example cdf +float cdf_uniform_0_1(float x) +{ + if (x < 0) { + return 0; + } else if (x > 1) { + return 1; + } else { + return x; + } +} + +float cdf_squared_0_1(float x) +{ + if (x < 0) { + return 0; + } else if (x > 1) { + return 1; + } else { + return x * x; + } +} + +float cdf_normal_0_1(float x) +{ + float mean = 0; + float std = 1; + return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h +} + +struct box incbeta(float a, float b, float x) +{ + // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>, + // <https://codeplea.com/incomplete-beta-function-c> + // but modified to return a box struct and floats instead of doubles. + // [ ] to do: add attribution in README + // Original code under this license: + /* + * zlib License + * + * Regularized Incomplete Beta Function + * + * Copyright (c) 2016, 2017 Lewis Van Winkle + * http://CodePlea.com + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + if (x < 0.0 || x > 1.0) { + PROCESS_ERROR("x out of bounds [0, 1], in function incbeta"); + } + + /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/ + if (x > (a + 1.0) / (a + b + 2.0)) { + struct box symmetric_incbeta = incbeta(b, a, 1.0 - x); + if (symmetric_incbeta.empty) { + return symmetric_incbeta; // propagate error + } else { + struct box result = { + .empty = 0, + .content = 1 - symmetric_incbeta.content + }; + return result; + } + } + + /*Find the first part before the continued fraction.*/ + const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b); + const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a; + + /*Use Lentz's algorithm to evaluate the continued fraction.*/ + float f = 1.0, c = 1.0, d = 0.0; + + int i, m; + for (i = 0; i <= 200; ++i) { + m = i / 2; + + float numerator; + if (i == 0) { + numerator = 1.0; /*First numerator is 1.0.*/ + } else if (i % 2 == 0) { + numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/ + } else { + numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/ + } + + /*Do an iteration of Lentz's algorithm.*/ + d = 1.0 + numerator * d; + if (fabs(d) < TINY_BETA) + d = TINY_BETA; + d = 1.0 / d; + + c = 1.0 + numerator / c; + if (fabs(c) < TINY_BETA) + c = TINY_BETA; + + const float cd = c * d; + f *= cd; + + /*Check for stop.*/ + if (fabs(1.0 - cd) < STOP_BETA) { + struct box result = { + .empty = 0, + .content = front * (f - 1.0) + }; + return result; + } + } + + PROCESS_ERROR("More loops needed, did not converge, in function incbeta"); +} + +struct box cdf_beta(float x) +{ + if (x < 0) { + struct box result = { .empty = 0, .content = 0 }; + return result; + } else if (x > 1) { + struct box result = { .empty = 0, .content = 1 }; + return result; + } else { + float successes = 1, failures = (2023 - 1945); + return incbeta(successes, failures, x); + } +} + +// Some testers +void test_inverse_cdf_float(char* cdf_name, float cdf_float(float)) +{ + struct box result = inverse_cdf_float(cdf_float, 0.5); + if (result.empty) { + printf("Inverse for %s not calculated\n", cdf_name); + exit(1); + } else { + printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); + } +} +void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)) +{ + struct box result = inverse_cdf_box(cdf_box, 0.5); + if (result.empty) { + printf("Inverse for %s not calculated\n", cdf_name); + exit(1); + } else { + printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); + } +} + +void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed) +{ + printf("\nGetting some samples from %s:\n", cdf_name); + clock_t begin = clock(); + for (int i = 0; i < NUM_SAMPLES; i++) { + struct box sample = sampler_float_cdf(cdf_float, seed); + if (sample.empty) { + printf("Error in sampler function for %s", cdf_name); + } else { + // printf("%f\n", sample.content); + } + } + clock_t end = clock(); + float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; + printf("Time spent: %f\n", time_spent); +} + +void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed) +{ + printf("\nGetting some samples from %s:\n", cdf_name); + clock_t begin = clock(); + for (int i = 0; i < NUM_SAMPLES; i++) { + struct box sample = sampler_box_cdf(cdf_box, seed); + if (sample.empty) { + printf("Error in sampler function for %s", cdf_name); + } else { + // printf("%f\n", sample.content); + } + } + clock_t end = clock(); + float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; + printf("Time spent: %f\n", time_spent); +} + +int main() +{ + // Test inverse cdf float + test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1); + test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1); + test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1); + + // Test inverse cdf box + test_inverse_cdf_box("cdf_beta", cdf_beta); + + // Testing samplers + // set randomness seed + uint32_t* seed = malloc(sizeof(uint32_t)); + *seed = 1000; // xorshift can't start with 0 + + // Test float sampler + test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed); + test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed); + test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed); + + // Get some normal samples using a previous approach + printf("\nGetting some samples from unit_normal\n"); + + clock_t begin_2 = clock(); + + for (int i = 0; i < NUM_SAMPLES; i++) { + float normal_sample = unit_normal(seed); + // printf("%f\n", normal_sample); + } + + clock_t end_2 = clock(); + float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC; + printf("Time spent: %f\n", time_spent_2); + + // Test box sampler + test_and_time_sampler_box("cdf_beta", cdf_beta, seed); + // Ok, this is slower than python!! + // Partly this is because I am using a more general algorithm, + // which applies to any cdf + // But I am also using really anal convergence conditions. + // This could be optimized. + + free(seed); + return 0; +} diff --git a/examples/04_sample_from_cdf/makefile b/examples/04_sample_from_cdf/makefile @@ -0,0 +1,57 @@ +# Interface: +# make +# make build +# make format +# make run + +# Compiler +CC=gcc # required for nested functions +# CC=tcc # <= faster compilation + +# Main file +SRC=example.c ../../squiggle.c +OUTPUT=./example + +## Dependencies +MATH=-lm +DEPENDENCIES=$(MATH) +# OPENMP=-fopenmp + +## Flags +DEBUG= #'-g' +STANDARD=-std=gnu99 ## allows for nested functions. +EXTENSIONS= #-fnested-functions +WARNINGS=-Wall +OPTIMIZED=-O3#-Ofast +CFLAGS=$(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED) + +## Formatter +STYLE_BLUEPRINT=webkit +FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) + +## make build +build: $(SRC) + # gcc -std=gnu99 example.c -lm -o example + $(CC) $(CFLAGS) $(SRC) $(DEPENDENCIES) -o $(OUTPUT) + +format: $(SRC) + $(FORMATTER) $(SRC) + +run: $(SRC) $(OUTPUT) + ./$(OUTPUT) && echo + +time-linux: + @echo "Requires /bin/time, found on GNU/Linux systems" && echo + + @echo "Running 100x and taking avg time $(OUTPUT)" + @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo + +## Profiling + +profile-linux: + echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar" + echo "Must be run as sudo" + $(CC) $(SRC) $(MATH) -o $(OUTPUT) + sudo perf record $(OUTPUT) + sudo perf report + rm perf.data diff --git a/scratchpad/makefile b/scratchpad/makefile @@ -1,57 +0,0 @@ -# Interface: -# make -# make build -# make format -# make run - -# Compiler -CC=gcc # required for nested functions -# CC=tcc # <= faster compilation - -# Main file -SRC=scratchpad.c ../squiggle.c -OUTPUT=./scratchpad - -## Dependencies -MATH=-lm -DEPENDENCIES=$(MATH) -# OPENMP=-fopenmp - -## Flags -DEBUG= #'-g' -STANDARD=-std=gnu99 ## allows for nested functions. -EXTENSIONS= #-fnested-functions -WARNINGS=-Wall -OPTIMIZED=-O3#-Ofast -CFLAGS=$(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED) - -## Formatter -STYLE_BLUEPRINT=webkit -FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) - -## make build -build: $(SRC) - # gcc -std=gnu99 scratchpad.c -lm -o scratchpad - $(CC) $(CFLAGS) $(SRC) $(DEPENDENCIES) -o $(OUTPUT) - -format: $(SRC) - $(FORMATTER) $(SRC) - -run: $(SRC) $(OUTPUT) - ./$(OUTPUT) && echo - -time-linux: - @echo "Requires /bin/time, found on GNU/Linux systems" && echo - - @echo "Running 100x and taking avg time $(OUTPUT)" - @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo - -## Profiling - -profile-linux: - echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar" - echo "Must be run as sudo" - $(CC) $(SRC) $(MATH) -o $(OUTPUT) - sudo perf record $(OUTPUT) - sudo perf report - rm perf.data diff --git a/scratchpad/scratchpad b/scratchpad/scratchpad Binary files differ. diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c @@ -1,251 +0,0 @@ -#include <math.h> // erf, sqrt -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include "../squiggle.h" - -#define NUM_SAMPLES 1000000 -#define STOP_BETA 1.0e-8 -#define TINY_BETA 1.0e-30 - -// Example cdf -float cdf_uniform_0_1(float x) -{ - if (x < 0) { - return 0; - } else if (x > 1) { - return 1; - } else { - return x; - } -} - -float cdf_squared_0_1(float x) -{ - if (x < 0) { - return 0; - } else if (x > 1) { - return 1; - } else { - return x * x; - } -} - -float cdf_normal_0_1(float x) -{ - float mean = 0; - float std = 1; - return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h -} - -struct box incbeta(float a, float b, float x) -{ - // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>, - // <https://codeplea.com/incomplete-beta-function-c> - // but modified to return a box struct and floats instead of doubles. - // [ ] to do: add attribution in README - // Original code under this license: - /* - * zlib License - * - * Regularized Incomplete Beta Function - * - * Copyright (c) 2016, 2017 Lewis Van Winkle - * http://CodePlea.com - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - if (x < 0.0 || x > 1.0) { - PROCESS_ERROR("x out of bounds [0, 1], in function incbeta"); - } - - /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/ - if (x > (a + 1.0) / (a + b + 2.0)) { - struct box symmetric_incbeta = incbeta(b, a, 1.0 - x); - if (symmetric_incbeta.empty) { - return symmetric_incbeta; // propagate error - } else { - struct box result = { - .empty = 0, - .content = 1 - symmetric_incbeta.content - }; - return result; - } - } - - /*Find the first part before the continued fraction.*/ - const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b); - const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a; - - /*Use Lentz's algorithm to evaluate the continued fraction.*/ - float f = 1.0, c = 1.0, d = 0.0; - - int i, m; - for (i = 0; i <= 200; ++i) { - m = i / 2; - - float numerator; - if (i == 0) { - numerator = 1.0; /*First numerator is 1.0.*/ - } else if (i % 2 == 0) { - numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/ - } else { - numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/ - } - - /*Do an iteration of Lentz's algorithm.*/ - d = 1.0 + numerator * d; - if (fabs(d) < TINY_BETA) - d = TINY_BETA; - d = 1.0 / d; - - c = 1.0 + numerator / c; - if (fabs(c) < TINY_BETA) - c = TINY_BETA; - - const float cd = c * d; - f *= cd; - - /*Check for stop.*/ - if (fabs(1.0 - cd) < STOP_BETA) { - struct box result = { - .empty = 0, - .content = front * (f - 1.0) - }; - return result; - } - } - - PROCESS_ERROR("More loops needed, did not converge, in function incbeta"); -} - -struct box cdf_beta(float x) -{ - if (x < 0) { - struct box result = { .empty = 0, .content = 0 }; - return result; - } else if (x > 1) { - struct box result = { .empty = 0, .content = 1 }; - return result; - } else { - float successes = 1, failures = (2023 - 1945); - return incbeta(successes, failures, x); - } -} - -// Some testers -void test_inverse_cdf_float(char* cdf_name, float cdf_float(float)) -{ - struct box result = inverse_cdf_float(cdf_float, 0.5); - if (result.empty) { - printf("Inverse for %s not calculated\n", cdf_name); - exit(1); - } else { - printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); - } -} -void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)) -{ - struct box result = inverse_cdf_box(cdf_box, 0.5); - if (result.empty) { - printf("Inverse for %s not calculated\n", cdf_name); - exit(1); - } else { - printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); - } -} - -void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed) -{ - printf("\nGetting some samples from %s:\n", cdf_name); - clock_t begin = clock(); - for (int i = 0; i < NUM_SAMPLES; i++) { - struct box sample = sampler_float_cdf(cdf_float, seed); - if (sample.empty) { - printf("Error in sampler function for %s", cdf_name); - } else { - // printf("%f\n", sample.content); - } - } - clock_t end = clock(); - float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; - printf("Time spent: %f\n", time_spent); -} - -void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed) -{ - printf("\nGetting some samples from %s:\n", cdf_name); - clock_t begin = clock(); - for (int i = 0; i < NUM_SAMPLES; i++) { - struct box sample = sampler_box_cdf(cdf_box, seed); - if (sample.empty) { - printf("Error in sampler function for %s", cdf_name); - } else { - // printf("%f\n", sample.content); - } - } - clock_t end = clock(); - float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; - printf("Time spent: %f\n", time_spent); -} - -int main() -{ - // Test inverse cdf float - test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1); - test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1); - test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1); - - // Test inverse cdf box - test_inverse_cdf_box("cdf_beta", cdf_beta); - - // Testing samplers - // set randomness seed - uint32_t* seed = malloc(sizeof(uint32_t)); - *seed = 1000; // xorshift can't start with 0 - - // Test float sampler - test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed); - test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed); - test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed); - - // Get some normal samples using a previous approach - printf("\nGetting some samples from unit_normal\n"); - - clock_t begin_2 = clock(); - - for (int i = 0; i < NUM_SAMPLES; i++) { - float normal_sample = unit_normal(seed); - // printf("%f\n", normal_sample); - } - - clock_t end_2 = clock(); - float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC; - printf("Time spent: %f\n", time_spent_2); - - // Test box sampler - test_and_time_sampler_box("cdf_beta", cdf_beta, seed); - // Ok, this is slower than python!! - // Partly this is because I am using a more general algorithm, - // which applies to any cdf - // But I am also using really anal convergence conditions. - // This could be optimized. - - free(seed); - return 0; -}