squiggle.c

Self-contained Monte Carlo estimation in C99
Log | Files | Refs | README

commit 487de4a731f4af5726d61e6996fdaa12d8bc9e0b
parent e05baa6fee6c6f760297d9cc256bb880ac4ed3b1
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Sun, 16 Jul 2023 17:33:37 +0200

readd inverse cdf for raw floats. rework examples.

Diffstat:
Mscratchpad/scratchpad | 0
Mscratchpad/scratchpad.c | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
2 files changed, 147 insertions(+), 57 deletions(-)

diff --git a/scratchpad/scratchpad b/scratchpad/scratchpad Binary files differ. diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c @@ -4,6 +4,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> +#include <sys/types.h> #include <time.h> #define EXIT_ON_ERROR 0 @@ -20,6 +21,7 @@ return error; \ } \ } while (0) +#define NUM_SAMPLES 10 struct box { int empty; @@ -41,7 +43,6 @@ float cdf_uniform_0_1(float x) float cdf_squared_0_1(float x) { - float result; if (x < 0) { return 0; } else if (x > 1) { @@ -174,7 +175,76 @@ struct box cdf_beta(float x) } // Inverse cdf at point -struct box inverse_cdf(struct box cdf_box(float), float p) +// Two versions of this function: +// - raw, dealing with cdfs that return floats +// - box, dealing with cdfs that return a box. + +// Inverse cdf +struct box inverse_cdf_float(float cdf(float), float p) +{ + // given a cdf: [-Inf, Inf] => [0,1] + // returns a box with either + // x such that cdf(x) = p + // or an error + // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error + + float low = -1.0; + float high = 1.0; + + // 1. Make sure that cdf(low) < p < cdf(high) + int interval_found = 0; + while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { + // ^ Using FLT_MIN and FLT_MAX is overkill + // but it's also the *correct* thing to do. + + int low_condition = (cdf(low) < p); + int high_condition = (p < cdf(high)); + if (low_condition && high_condition) { + interval_found = 1; + } else if (!low_condition) { + low = low * 2; + } else if (!high_condition) { + high = high * 2; + } + } + + if (!interval_found) { + PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); + } else { + + int convergence_condition = 0; + int count = 0; + while (!convergence_condition && (count < (INT_MAX / 2))) { + float mid = (high + low) / 2; + int mid_not_new = (mid == low) || (mid == high); + // float width = high - low; + // if ((width < 1e-8) || mid_not_new){ + if (mid_not_new) { + convergence_condition = 1; + } else { + float mid_sign = cdf(mid) - p; + if (mid_sign < 0) { + low = mid; + } else if (mid_sign > 0) { + high = mid; + } else if (mid_sign == 0) { + low = mid; + high = mid; + } + } + } + + if (convergence_condition) { + struct box result = {.empty = 0, .content = low}; + return result; + } else { + PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); + } + + } +} + +struct box inverse_cdf_box(struct box cdf_box(float), float p) { // given a cdf: [-Inf, Inf] => Box([0,1]) // returns a box with either @@ -277,10 +347,16 @@ float rand_0_to_1(uint32_t* seed) } // Sampler based on inverse cdf and randomness function -struct box sampler(struct box cdf(float), uint32_t* seed) +struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed) +{ + float p = rand_0_to_1(seed); + struct box result = inverse_cdf_box(cdf, p); + return result; +} +struct box sampler_float_cdf(float cdf(float), uint32_t* seed) { float p = rand_0_to_1(seed); - struct box result = inverse_cdf(cdf, p); + struct box result = inverse_cdf_float(cdf, p); return result; } @@ -294,83 +370,97 @@ float sampler_normal_0_1(uint32_t* seed) return z; } -int main() -{ - - // Get the inverse cdf of a [0,1] uniform distribution at 0.5 - struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5); - char* name_1 = "cdf_uniform_0_1"; - if (result_1.empty) { - printf("Inverse for %s not calculated\n", name_1); +// Some testers +void test_inverse_cdf_float(char* cdf_name, float cdf_float(float)){ + struct box result = inverse_cdf_float(cdf_float, 0.5); + if (result.empty) { + printf("Inverse for %s not calculated\n", cdf_name); exit(1); } else { - printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content); + printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); } - // Get the inverse cdf of a [0,1] squared distribution at 0.5 - struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5); - char* name_2 = "cdf_squared_0_1"; - if (result_2.empty) { - printf("Inverse for %s not calculated\n", name_2); +} +void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)){ + struct box result = inverse_cdf_box(cdf_box, 0.5); + if (result.empty) { + printf("Inverse for %s not calculated\n", cdf_name); exit(1); } else { - printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content); + printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); } - // Get the inverse of a normal(0,1) cdf distribution - struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5); - char* name_3 = "cdf_normal_0_1"; - if (result_3.empty) { - printf("Inverse for %s not calculated\n", name_3); - exit(1); - } else { - printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content); - } +} - // Use the sampler on a normal(0,1) - // set randomness seed - uint32_t* seed = malloc(sizeof(uint32_t)); - *seed = 1000; // xorshift can't start with 0 - int n = 100; +void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed){ + printf("\nGetting some samples from %s:\n", cdf_name); + clock_t begin = clock(); + for (int i = 0; i < NUM_SAMPLES; i++) { + struct box sample = sampler_float_cdf(cdf_float, seed); + if (sample.empty) { + printf("Error in sampler function for %s", cdf_name); + } else { + printf("%f\n", sample.content); + } + } + clock_t end = clock(); + float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; + printf("Time spent: %f\n", time_spent); +} - printf("\n\nGetting some samples from %s:\n", name_3); +void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed){ + printf("\nGetting some samples from %s:\n", cdf_name); clock_t begin = clock(); - for (int i = 0; i < n; i++) { - struct box sample = sampler(cdf_normal_0_1, seed); + for (int i = 0; i < NUM_SAMPLES; i++) { + struct box sample = sampler_box_cdf(cdf_box, seed); if (sample.empty) { - printf("Error in sampler function"); + printf("Error in sampler function for %s", cdf_name); } else { printf("%f\n", sample.content); } } clock_t end = clock(); float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; - printf("Time spent: %f", time_spent); + printf("Time spent: %f\n", time_spent); +} - // Get some normal samples using the previous method. +int main() +{ + // Test inverse cdf float + test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1); + test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1); + test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1); + + // Test inverse cdf box + test_inverse_cdf_box("cdf_beta", cdf_beta); + + // Testing samplers + // set randomness seed + uint32_t* seed = malloc(sizeof(uint32_t)); + *seed = 1000; // xorshift can't start with 0 + + // Test float sampler + test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed); + test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed); + test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed); + + // Get some normal samples using a previous approach + printf("\nGetting some samples from sampler_normal_0_1\n"); + clock_t begin_2 = clock(); - printf("\n\nGetting some samples from sampler_normal_0_1\n"); - for (int i = 0; i < n; i++) { + + for (int i = 0; i < NUM_SAMPLES; i++) { float normal_sample = sampler_normal_0_1(seed); printf("%f\n", normal_sample); } - clock_t end_2 = clock(); + + clock_t end_2 = clock(); float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC; - printf("Time spent: %f", time_spent_2); + printf("Time spent: %f\n", time_spent_2); - // Get some beta samples - clock_t begin_3 = clock(); - printf("\n\nGetting some samples from box sampler_dangerous_beta\n"); - for (int i = 0; i < n; i++) { - struct box sample = sampler(cdf_dangerous_beta, seed); - if (sample.empty) { - printf("Error in sampler function"); - } else { - printf("%f\n", sample.content); - } - } - clock_t end_3 = clock(); - float time_spent_3 = (float)(end_3 - begin_3) / CLOCKS_PER_SEC; - printf("Time spent: %f\n", time_spent_3); + // Test box sampler + test_and_time_sampler_box("cdf_beta", cdf_beta, seed); + + free(seed); return 0; }