reorder scratchpad stuff - squiggle.c - Self-contained Monte Carlo estimation in C99

commit e05baa6fee6c6f760297d9cc256bb880ac4ed3b1
parent 78e1838569da7d4ded5c680475b85644e9317ed8
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Sun, 16 Jul 2023 17:10:36 +0200

reorder scratchpad stuff

Diffstat:
M scratchpad/scratchpad.c  | 318 ++++++++++++++++++++++++++++---------------------------------------------------

1 file changed, 112 insertions(+), 206 deletions(-)
diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c
@@ -41,6 +41,7 @@ float cdf_uniform_0_1(float x)
 
 float cdf_squared_0_1(float x)
 {
+	  float result;
     if (x < 0) {
         return 0;
     } else if (x > 1) {
@@ -57,73 +58,123 @@ float cdf_normal_0_1(float x)
     return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
 }
 
-// Inverse cdf
-struct box inverse_cdf(float cdf(float), float p)
-{
-    // given a cdf: [-Inf, Inf] => [0,1]
-    // returns a box with either
-    // x such that cdf(x) = p
-    // or an error
-    // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
-
-    float low = -1.0;
-    float high = 1.0;
+// [x] to do: add beta.
+// [x] for the cdf, use this incomplete beta function implementation, based on continuous fractions:
+// <https://codeplea.com/incomplete-beta-function-c>
+// <https://github.com/codeplea/incbeta>
 
-    // 1. Make sure that cdf(low) < p < cdf(high)
-    int interval_found = 0;
-    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
-        // ^ Using FLT_MIN and FLT_MAX is overkill
-        // but it's also the *correct* thing to do.
+#define STOP_BETA 1.0e-8
+#define TINY_BETA 1.0e-30
+struct box incbeta(float a, float b, float x)
+{
+    // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
+    // but modified to return a box struct and floats instead of doubles.
+    // [ ] to do: add attribution in README
+    // Original code under this license:
+    /*
+		 * zlib License
+		 *
+		 * Regularized Incomplete Beta Function
+		 *
+		 * Copyright (c) 2016, 2017 Lewis Van Winkle
+		 * http://CodePlea.com
+		 *
+		 * This software is provided 'as-is', without any express or implied
+		 * warranty. In no event will the authors be held liable for any damages
+		 * arising from the use of this software.
+		 *
+		 * Permission is granted to anyone to use this software for any purpose,
+		 * including commercial applications, and to alter it and redistribute it
+		 * freely, subject to the following restrictions:
+		 *
+		 * 1. The origin of this software must not be misrepresented; you must not
+		 *    claim that you wrote the original software. If you use this software
+		 *    in a product, an acknowledgement in the product documentation would be
+		 *    appreciated but is not required.
+		 * 2. Altered source versions must be plainly marked as such, and must not be
+		 *    misrepresented as being the original software.
+		 * 3. This notice may not be removed or altered from any source distribution.
+		 */
+    if (x < 0.0 || x > 1.0) {
+        PROCESS_ERROR("x out of bounds [0, 1], in function incbeta");
+    }
 
-        int low_condition = (cdf(low) < p);
-        int high_condition = (p < cdf(high));
-        if (low_condition && high_condition) {
-            interval_found = 1;
-        } else if (!low_condition) {
-            low = low * 2;
-        } else if (!high_condition) {
-            high = high * 2;
+    /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
+    if (x > (a + 1.0) / (a + b + 2.0)) {
+        struct box symmetric_incbeta = incbeta(b, a, 1.0 - x);
+        if (symmetric_incbeta.empty) {
+            return symmetric_incbeta; // propagate error
+        } else {
+						struct box result = {
+							.empty = 0, 
+							.content = 1 - symmetric_incbeta.content
+						};
+            return result;
         }
     }
 
-    if (!interval_found) {
-        PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
-    } else {
+    /*Find the first part before the continued fraction.*/
+    const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b);
+    const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a;
 
-        int convergence_condition = 0;
-        int count = 0;
-        while (!convergence_condition && (count < (INT_MAX / 2))) {
-            float mid = (high + low) / 2;
-            int mid_not_new = (mid == low) || (mid == high);
-            // float width = high - low;
-						// if ((width < 1e-8) || mid_not_new){
-            if (mid_not_new) {
-                convergence_condition = 1;
-            } else {
-                float mid_sign = cdf(mid) - p;
-                if (mid_sign < 0) {
-                    low = mid;
-                } else if (mid_sign > 0) {
-                    high = mid;
-                } else if (mid_sign == 0) {
-                    low = mid;
-                    high = mid;
-                }
-            }
-        }
+    /*Use Lentz's algorithm to evaluate the continued fraction.*/
+    float f = 1.0, c = 1.0, d = 0.0;
 
-        if (convergence_condition) {
-						struct box result = {.empty = 0, .content = low};
-						return result;
+    int i, m;
+    for (i = 0; i <= 200; ++i) {
+        m = i / 2;
+
+        float numerator;
+        if (i == 0) {
+            numerator = 1.0; /*First numerator is 1.0.*/
+        } else if (i % 2 == 0) {
+            numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/
         } else {
-            PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
+            numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/
         }
 
+        /*Do an iteration of Lentz's algorithm.*/
+        d = 1.0 + numerator * d;
+        if (fabs(d) < TINY_BETA)
+            d = TINY_BETA;
+        d = 1.0 / d;
+
+        c = 1.0 + numerator / c;
+        if (fabs(c) < TINY_BETA)
+            c = TINY_BETA;
+
+        const float cd = c * d;
+        f *= cd;
+
+        /*Check for stop.*/
+        if (fabs(1.0 - cd) < STOP_BETA) {
+						struct box result = {
+							.empty = 0, 
+							.content = front * (f - 1.0)
+						};
+            return result;
+        }
     }
+
+    PROCESS_ERROR("More loops needed, did not converge, in function incbeta");
 }
 
-// Inverse cdf at point, but this time taking a struct box.
-struct box inverse_cdf_box(struct box cdf_box(float), float p)
+struct box cdf_beta(float x)
+{
+    if (x < 0) {
+        struct box result = { .empty = 0, .content = 0 };
+        return result;
+    } else if (x > 1) {
+        struct box result = { .empty = 0, .content = 1 };
+        return result;
+    } else {
+        float successes = 1, failures = (2023 - 1945);
+        return incbeta(successes, failures, x);
+    }
+}
+
+// Inverse cdf at point
+struct box inverse_cdf(struct box cdf_box(float), float p)
 {
     // given a cdf: [-Inf, Inf] => Box([0,1])
     // returns a box with either
@@ -200,6 +251,10 @@ struct box inverse_cdf_box(struct box cdf_box(float), float p)
     }
 }
 
+// Some randomness functions for:
+//   - Sampling from a cdf
+//   - Benchmarking against a previous approach, which will be faster, but less general
+
 // Get random number between 0 and 1
 uint32_t xorshift32(uint32_t* seed)
 {
@@ -221,15 +276,15 @@ float rand_0_to_1(uint32_t* seed)
     return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
 }
 
-// Sampler based on inverse cdf
-struct box sampler(float cdf(float), uint32_t* seed)
+// Sampler based on inverse cdf and randomness function
+struct box sampler(struct box cdf(float), uint32_t* seed)
 {
     float p = rand_0_to_1(seed);
     struct box result = inverse_cdf(cdf, p);
     return result;
 }
 
-// For comparison, raw sampler
+// Comparison point with raw normal sampler
 const float PI = 3.14159265358979323846;
 float sampler_normal_0_1(uint32_t* seed)
 {
@@ -239,155 +294,6 @@ float sampler_normal_0_1(uint32_t* seed)
     return z;
 }
 
-// to do: add beta.
-// for the cdf, use this incomplete beta function implementation, based on continuous fractions:
-// <https://codeplea.com/incomplete-beta-function-c>
-// <https://github.com/codeplea/incbeta>
-
-#define STOP 1.0e-8
-#define TINY 1.0e-30
-
-struct box incbeta(float a, float b, float x)
-{
-    // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
-    // but modified to return a box struct and floats instead of doubles.
-    // [x] to do: add attribution in README
-    // Original code under this license:
-    /*
-		 * zlib License
-		 *
-		 * Regularized Incomplete Beta Function
-		 *
-		 * Copyright (c) 2016, 2017 Lewis Van Winkle
-		 * http://CodePlea.com
-		 *
-		 * This software is provided 'as-is', without any express or implied
-		 * warranty. In no event will the authors be held liable for any damages
-		 * arising from the use of this software.
-		 *
-		 * Permission is granted to anyone to use this software for any purpose,
-		 * including commercial applications, and to alter it and redistribute it
-		 * freely, subject to the following restrictions:
-		 *
-		 * 1. The origin of this software must not be misrepresented; you must not
-		 *    claim that you wrote the original software. If you use this software
-		 *    in a product, an acknowledgement in the product documentation would be
-		 *    appreciated but is not required.
-		 * 2. Altered source versions must be plainly marked as such, and must not be
-		 *    misrepresented as being the original software.
-		 * 3. This notice may not be removed or altered from any source distribution.
-		 */
-    if (x < 0.0 || x > 1.0) {
-        PROCESS_ERROR("x out of bounds [0, 1], in function incbeta");
-    }
-
-    /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
-    if (x > (a + 1.0) / (a + b + 2.0)) {
-        struct box symmetric_incbeta = incbeta(b, a, 1.0 - x);
-        if (symmetric_incbeta.empty) {
-            return symmetric_incbeta; // propagate error
-        } else {
-						struct box result = {
-							.empty = 0, 
-							.content = 1 - symmetric_incbeta.content
-						};
-            return result;
-        }
-    }
-
-    /*Find the first part before the continued fraction.*/
-    const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b);
-    const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a;
-
-    /*Use Lentz's algorithm to evaluate the continued fraction.*/
-    float f = 1.0, c = 1.0, d = 0.0;
-
-    int i, m;
-    for (i = 0; i <= 200; ++i) {
-        m = i / 2;
-
-        float numerator;
-        if (i == 0) {
-            numerator = 1.0; /*First numerator is 1.0.*/
-        } else if (i % 2 == 0) {
-            numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/
-        } else {
-            numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/
-        }
-
-        /*Do an iteration of Lentz's algorithm.*/
-        d = 1.0 + numerator * d;
-        if (fabs(d) < TINY)
-            d = TINY;
-        d = 1.0 / d;
-
-        c = 1.0 + numerator / c;
-        if (fabs(c) < TINY)
-            c = TINY;
-
-        const float cd = c * d;
-        f *= cd;
-
-        /*Check for stop.*/
-        if (fabs(1.0 - cd) < STOP) {
-						struct box result = {
-							.empty = 0, 
-							.content = front * (f - 1.0)
-						};
-            return result;
-        }
-    }
-
-    PROCESS_ERROR("More loops needed, did not converge, in function incbeta");
-}
-
-struct box cdf_beta(float x)
-{
-    if (x < 0) {
-        struct box result = { .empty = 0, .content = 0 };
-        return result;
-    } else if (x > 1) {
-        struct box result = { .empty = 0, .content = 1 };
-        return result;
-    } else {
-        float successes = 1, failures = (2023 - 1945);
-        return incbeta(successes, failures, x);
-    }
-}
-
-float cdf_dangerous_beta(float x)
-{
-    // So the thing is, this works
-    // But it will propagate through the code
-    // So it doesn't feel like a great architectural choice;
-    // I prefer my choice of setting a variable which will determine whether to exit on failure or not.
-    // Ok, so the proper thing to do would be to refactor inverse_cdf
-    // but, I could also use a GOTO? <https://stackoverflow.com/questions/245742/examples-of-good-gotos-in-c-or-c>
-    // Ok, alternatives are:
-    // - Refactor inverse_cdf to take a box, take the small complexity + penalty. Add a helper
-    // - Duplicate the code, have a refactored inverse_cdf as well as a normal cdf
-    // - Do something hacky
-    //   a. dangerous beta, which exits
-    //   b. clever & hacky go-to statements
-    //     i. They actually look fun to implement
-    //     ii. But they would be hard for others to use.
-    if (x < 0) {
-        return 0;
-    } else if (x > 1) {
-        return 1;
-    } else {
-        float successes = 100, failures = 100;
-        struct box result = incbeta(successes, failures, x);
-        if (result.empty) {
-            printf("%s\n", result.error_msg);
-            exit(1);
-            return 1;
-        } else {
-            return result.content;
-        }
-    }
-}
-
 int main()
 {

	squiggle.c Self-contained Monte Carlo estimation in C99
	Log \| Files \| Refs \| README