commit ee9ed342877423bcce2040d181a52674c297cf56
parent 68e7730f24c33268a151dd197534f8c28ed376e0
Author: NunoSempere <nuno.sempere@protonmail.com>
Date: Sun, 16 Jul 2023 21:26:33 +0200
move some functions from scratchpad => squiggle.c, reorg
Diffstat:
5 files changed, 232 insertions(+), 233 deletions(-)
diff --git a/scratchpad/makefile b/scratchpad/makefile
@@ -9,7 +9,7 @@ CC=gcc # required for nested functions
# CC=tcc # <= faster compilation
# Main file
-SRC=scratchpad.c
+SRC=scratchpad.c ../squiggle.c
OUTPUT=./scratchpad
## Dependencies
diff --git a/scratchpad/scratchpad b/scratchpad/scratchpad
Binary files differ.
diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c
@@ -1,33 +1,13 @@
-#include <float.h> // FLT_MAX, FLT_MIN
-#include <limits.h> // INT_MAX
#include <math.h> // erf, sqrt
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-// #include <sys/types.h>
#include <time.h>
+#include "../squiggle.h"
-#define EXIT_ON_ERROR 0
-#define MAX_ERROR_LENGTH 500
-#define PROCESS_ERROR(...) \
- do { \
- if (EXIT_ON_ERROR) { \
- printf("@, in %s (%d)", __FILE__, __LINE__); \
- exit(1); \
- } else { \
- char error_msg[MAX_ERROR_LENGTH]; \
- snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
- struct box error = { .empty = 1, .error_msg = error_msg }; \
- return error; \
- } \
- } while (0)
#define NUM_SAMPLES 1000000
-
-struct box {
- int empty;
- float content;
- char* error_msg;
-};
+#define STOP_BETA 1.0e-8
+#define TINY_BETA 1.0e-30
// Example cdf
float cdf_uniform_0_1(float x)
@@ -59,16 +39,10 @@ float cdf_normal_0_1(float x)
return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
}
-// [x] to do: add beta.
-// [x] for the cdf, use this incomplete beta function implementation, based on continuous fractions:
-// <https://codeplea.com/incomplete-beta-function-c>
-// <https://github.com/codeplea/incbeta>
-
-#define STOP_BETA 1.0e-8
-#define TINY_BETA 1.0e-30
struct box incbeta(float a, float b, float x)
{
// Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
+ // <https://codeplea.com/incomplete-beta-function-c>
// but modified to return a box struct and floats instead of doubles.
// [ ] to do: add attribution in README
// Original code under this license:
@@ -174,200 +148,6 @@ struct box cdf_beta(float x)
}
}
-// Inverse cdf at point
-// Two versions of this function:
-// - raw, dealing with cdfs that return floats
-// - box, dealing with cdfs that return a box.
-
-// Inverse cdf
-struct box inverse_cdf_float(float cdf(float), float p)
-{
- // given a cdf: [-Inf, Inf] => [0,1]
- // returns a box with either
- // x such that cdf(x) = p
- // or an error
- // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
-
- float low = -1.0;
- float high = 1.0;
-
- // 1. Make sure that cdf(low) < p < cdf(high)
- int interval_found = 0;
- while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
- // ^ Using FLT_MIN and FLT_MAX is overkill
- // but it's also the *correct* thing to do.
-
- int low_condition = (cdf(low) < p);
- int high_condition = (p < cdf(high));
- if (low_condition && high_condition) {
- interval_found = 1;
- } else if (!low_condition) {
- low = low * 2;
- } else if (!high_condition) {
- high = high * 2;
- }
- }
-
- if (!interval_found) {
- PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
- } else {
-
- int convergence_condition = 0;
- int count = 0;
- while (!convergence_condition && (count < (INT_MAX / 2))) {
- float mid = (high + low) / 2;
- int mid_not_new = (mid == low) || (mid == high);
- // float width = high - low;
- // if ((width < 1e-8) || mid_not_new){
- if (mid_not_new) {
- convergence_condition = 1;
- } else {
- float mid_sign = cdf(mid) - p;
- if (mid_sign < 0) {
- low = mid;
- } else if (mid_sign > 0) {
- high = mid;
- } else if (mid_sign == 0) {
- low = mid;
- high = mid;
- }
- }
- }
-
- if (convergence_condition) {
- struct box result = { .empty = 0, .content = low };
- return result;
- } else {
- PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
- }
- }
-}
-
-struct box inverse_cdf_box(struct box cdf_box(float), float p)
-{
- // given a cdf: [-Inf, Inf] => Box([0,1])
- // returns a box with either
- // x such that cdf(x) = p
- // or an error
- // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
-
- float low = -1.0;
- float high = 1.0;
-
- // 1. Make sure that cdf(low) < p < cdf(high)
- int interval_found = 0;
- while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
- // ^ Using FLT_MIN and FLT_MAX is overkill
- // but it's also the *correct* thing to do.
- struct box cdf_low = cdf_box(low);
- if (cdf_low.empty) {
- PROCESS_ERROR(cdf_low.error_msg);
- }
-
- struct box cdf_high = cdf_box(high);
- if (cdf_high.empty) {
- PROCESS_ERROR(cdf_low.error_msg);
- }
-
- int low_condition = (cdf_low.content < p);
- int high_condition = (p < cdf_high.content);
- if (low_condition && high_condition) {
- interval_found = 1;
- } else if (!low_condition) {
- low = low * 2;
- } else if (!high_condition) {
- high = high * 2;
- }
- }
-
- if (!interval_found) {
- PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
- } else {
-
- int convergence_condition = 0;
- int count = 0;
- while (!convergence_condition && (count < (INT_MAX / 2))) {
- float mid = (high + low) / 2;
- int mid_not_new = (mid == low) || (mid == high);
- // float width = high - low;
- if (mid_not_new) {
- // if ((width < 1e-8) || mid_not_new){
- convergence_condition = 1;
- } else {
- struct box cdf_mid = cdf_box(mid);
- if (cdf_mid.empty) {
- PROCESS_ERROR(cdf_mid.error_msg);
- }
- float mid_sign = cdf_mid.content - p;
- if (mid_sign < 0) {
- low = mid;
- } else if (mid_sign > 0) {
- high = mid;
- } else if (mid_sign == 0) {
- low = mid;
- high = mid;
- }
- }
- }
-
- if (convergence_condition) {
- struct box result = { .empty = 0, .content = low };
- return result;
- } else {
- PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
- }
- }
-}
-
-// Some randomness functions for:
-// - Sampling from a cdf
-// - Benchmarking against a previous approach, which will be faster, but less general
-
-// Get random number between 0 and 1
-uint32_t xorshift32(uint32_t* seed)
-{
- // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
- // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
- // https://en.wikipedia.org/wiki/Xorshift
- // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
-
- uint32_t x = *seed;
- x ^= x << 13;
- x ^= x >> 17;
- x ^= x << 5;
- return *seed = x;
-}
-
-// Distribution & sampling functions
-float rand_0_to_1(uint32_t* seed)
-{
- return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
-}
-
-// Sampler based on inverse cdf and randomness function
-struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed)
-{
- float p = rand_0_to_1(seed);
- struct box result = inverse_cdf_box(cdf, p);
- return result;
-}
-struct box sampler_float_cdf(float cdf(float), uint32_t* seed)
-{
- float p = rand_0_to_1(seed);
- struct box result = inverse_cdf_float(cdf, p);
- return result;
-}
-
-// Comparison point with raw normal sampler
-const float PI = 3.14159265358979323846;
-float sampler_normal_0_1(uint32_t* seed)
-{
- float u1 = rand_0_to_1(seed);
- float u2 = rand_0_to_1(seed);
- float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
- return z;
-}
-
// Some testers
void test_inverse_cdf_float(char* cdf_name, float cdf_float(float))
{
@@ -445,12 +225,12 @@ int main()
test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed);
// Get some normal samples using a previous approach
- printf("\nGetting some samples from sampler_normal_0_1\n");
+ printf("\nGetting some samples from unit_normal\n");
clock_t begin_2 = clock();
for (int i = 0; i < NUM_SAMPLES; i++) {
- float normal_sample = sampler_normal_0_1(seed);
+ float normal_sample = unit_normal(seed);
// printf("%f\n", normal_sample);
}
@@ -460,11 +240,11 @@ int main()
// Test box sampler
test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
- // Ok, this is slower than python!!
- // Partly this is because I am using a more general algorithm,
- // which applies to any cdf
- // But I am also using really anal convergence conditions.
- // This could be optimized.
+ // Ok, this is slower than python!!
+ // Partly this is because I am using a more general algorithm,
+ // which applies to any cdf
+ // But I am also using really anal convergence conditions.
+ // This could be optimized.
free(seed);
return 0;
diff --git a/squiggle.c b/squiggle.c
@@ -1,6 +1,25 @@
#include <math.h>
#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <float.h>
+#include <limits.h>
+#include <time.h>
+// #include <sys/types.h>
+#define EXIT_ON_ERROR 0
+#define MAX_ERROR_LENGTH 500
+#define PROCESS_ERROR(...) \
+ do { \
+ if (EXIT_ON_ERROR) { \
+ printf("@, in %s (%d)", __FILE__, __LINE__); \
+ exit(1); \
+ } else { \
+ char error_msg[MAX_ERROR_LENGTH]; \
+ snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
+ struct box error = { .empty = 1, .error_msg = error_msg }; \
+ return error; \
+ } \
+ } while (0)
// PI constant
const float PI = M_PI; // 3.14159265358979323846;
@@ -112,3 +131,171 @@ float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint3
free(cumsummed_normalized_weights);
return result;
}
+
+// Sample from an arbitrary cdf
+struct box {
+ int empty;
+ float content;
+ char* error_msg;
+};
+
+// Inverse cdf at point
+// Two versions of this function:
+// - raw, dealing with cdfs that return floats
+// - input: cdf: float => float, p
+// - output: Box(number|error)
+// - box, dealing with cdfs that return a box.
+// - input: cdf: float => Box(number|error), p
+// - output: Box(number|error)
+struct box inverse_cdf_float(float cdf(float), float p)
+{
+ // given a cdf: [-Inf, Inf] => [0,1]
+ // returns a box with either
+ // x such that cdf(x) = p
+ // or an error
+ // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
+
+ float low = -1.0;
+ float high = 1.0;
+
+ // 1. Make sure that cdf(low) < p < cdf(high)
+ int interval_found = 0;
+ while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
+ // ^ Using FLT_MIN and FLT_MAX is overkill
+ // but it's also the *correct* thing to do.
+
+ int low_condition = (cdf(low) < p);
+ int high_condition = (p < cdf(high));
+ if (low_condition && high_condition) {
+ interval_found = 1;
+ } else if (!low_condition) {
+ low = low * 2;
+ } else if (!high_condition) {
+ high = high * 2;
+ }
+ }
+
+ if (!interval_found) {
+ PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
+ } else {
+
+ int convergence_condition = 0;
+ int count = 0;
+ while (!convergence_condition && (count < (INT_MAX / 2))) {
+ float mid = (high + low) / 2;
+ int mid_not_new = (mid == low) || (mid == high);
+ // float width = high - low;
+ // if ((width < 1e-8) || mid_not_new){
+ if (mid_not_new) {
+ convergence_condition = 1;
+ } else {
+ float mid_sign = cdf(mid) - p;
+ if (mid_sign < 0) {
+ low = mid;
+ } else if (mid_sign > 0) {
+ high = mid;
+ } else if (mid_sign == 0) {
+ low = mid;
+ high = mid;
+ }
+ }
+ }
+
+ if (convergence_condition) {
+ struct box result = { .empty = 0, .content = low };
+ return result;
+ } else {
+ PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
+ }
+ }
+}
+
+struct box inverse_cdf_box(struct box cdf_box(float), float p)
+{
+ // given a cdf: [-Inf, Inf] => Box([0,1])
+ // returns a box with either
+ // x such that cdf(x) = p
+ // or an error
+ // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
+
+ float low = -1.0;
+ float high = 1.0;
+
+ // 1. Make sure that cdf(low) < p < cdf(high)
+ int interval_found = 0;
+ while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
+ // ^ Using FLT_MIN and FLT_MAX is overkill
+ // but it's also the *correct* thing to do.
+ struct box cdf_low = cdf_box(low);
+ if (cdf_low.empty) {
+ PROCESS_ERROR(cdf_low.error_msg);
+ }
+
+ struct box cdf_high = cdf_box(high);
+ if (cdf_high.empty) {
+ PROCESS_ERROR(cdf_low.error_msg);
+ }
+
+ int low_condition = (cdf_low.content < p);
+ int high_condition = (p < cdf_high.content);
+ if (low_condition && high_condition) {
+ interval_found = 1;
+ } else if (!low_condition) {
+ low = low * 2;
+ } else if (!high_condition) {
+ high = high * 2;
+ }
+ }
+
+ if (!interval_found) {
+ PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
+ } else {
+
+ int convergence_condition = 0;
+ int count = 0;
+ while (!convergence_condition && (count < (INT_MAX / 2))) {
+ float mid = (high + low) / 2;
+ int mid_not_new = (mid == low) || (mid == high);
+ // float width = high - low;
+ if (mid_not_new) {
+ // if ((width < 1e-8) || mid_not_new){
+ convergence_condition = 1;
+ } else {
+ struct box cdf_mid = cdf_box(mid);
+ if (cdf_mid.empty) {
+ PROCESS_ERROR(cdf_mid.error_msg);
+ }
+ float mid_sign = cdf_mid.content - p;
+ if (mid_sign < 0) {
+ low = mid;
+ } else if (mid_sign > 0) {
+ high = mid;
+ } else if (mid_sign == 0) {
+ low = mid;
+ high = mid;
+ }
+ }
+ }
+
+ if (convergence_condition) {
+ struct box result = { .empty = 0, .content = low };
+ return result;
+ } else {
+ PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
+ }
+ }
+}
+
+// Sampler based on inverse cdf and randomness function
+struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed)
+{
+ float p = rand_0_to_1(seed);
+ struct box result = inverse_cdf_box(cdf, p);
+ return result;
+}
+struct box sampler_float_cdf(float cdf(float), uint32_t* seed)
+{
+ float p = rand_0_to_1(seed);
+ struct box result = inverse_cdf_float(cdf, p);
+ return result;
+}
diff --git a/squiggle.h b/squiggle.h
@@ -4,13 +4,29 @@
// uint32_t header
#include <stdint.h>
+// Macros
+#define EXIT_ON_ERROR 0
+#define MAX_ERROR_LENGTH 500
+#define PROCESS_ERROR(...) \
+ do { \
+ if (EXIT_ON_ERROR) { \
+ printf("@, in %s (%d)", __FILE__, __LINE__); \
+ exit(1); \
+ } else { \
+ char error_msg[MAX_ERROR_LENGTH]; \
+ snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
+ struct box error = { .empty = 1, .error_msg = error_msg }; \
+ return error; \
+ } \
+ } while (0)
+
// Pseudo Random number generator
uint32_t xorshift32(uint32_t* seed);
// Distribution & sampling functions
float rand_0_to_1(uint32_t* seed);
float rand_float(float max, uint32_t* seed);
-float ur_normal(uint32_t* seed);
+float unit_normal(uint32_t* seed);
float random_uniform(float from, float to, uint32_t* seed);
float random_normal(float mean, float sigma, uint32_t* seed);
float random_lognormal(float logmean, float logsigma, uint32_t* seed);
@@ -23,4 +39,20 @@ void array_cumsum(float* array_to_sum, float* array_cumsummed, int length);
// Mixture function
float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed);
+// Box
+struct box {
+ int empty;
+ float content;
+ char* error_msg;
+};
+
+// Inverse cdf
+struct box inverse_cdf_float(float cdf(float), float p);
+struct box inverse_cdf_box(struct box cdf_box(float), float p);
+
+// Samplers from cdf
+struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed);
+struct box sampler_float_cdf(float cdf(float), uint32_t* seed);
+
#endif
+