readme tweaks; add 90% histogram function - squiggle.c - Self-contained Monte Carlo estimation in C99

commit c676a22ba8243998edaa9e323962f717f87f4d00
parent e62a840625ac55c1916aaca3b49a16bb9cf870aa
Author: NunoSempere <nuno.sempere@protonmail.com>
Date:   Wed, 31 Jan 2024 15:15:56 +0100

readme tweaks; add 90% histogram function

Diffstat:
M README.md  | 2 ++
M squiggle_more.c  | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-

2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
@@ -401,6 +401,8 @@ Overall, I'd describe the error handling capabilities of this library as pretty 
 
 ### To do
 
+- [ ] Come up with a better headline example; fermi paradox paper is too complicated
+- [ ] Post on suckless subreddit
 - [ ] Drive in a few more real-life applications
   - [ ] US election modelling?
 - [ ] Look into using size_t instead of int for sample numbers
diff --git a/squiggle_more.c b/squiggle_more.c
@@ -215,8 +215,8 @@ void array_print_stats(double xs[], int n){
 
 
 void array_print_histogram(double* xs, int n_samples, int n_bins) {
+    // Interface inspired by <https://github.com/red-data-tools/YouPlot>
     // Generated with the help of an llm; there might be subtle off-by-one errors
-    // interface inspired by <https://github.com/red-data-tools/YouPlot>
     if (n_bins <= 1) {
         fprintf(stderr, "Number of bins must be greater than 1.\n");
         return;
@@ -305,6 +305,90 @@ void array_print_histogram(double* xs, int n_samples, int n_bins) {
     free(bins);
 }
 
+void array_print_90_ci_histogram(double* xs, int n){
+    // Code duplicated from previous function
+    // I'll consider simplifying it at some future point
+    // Possible ideas:
+    // - having only one function that takes any confidence interval?
+    // - having a utility function that is called by both functions?
+    ci ci_90 = array_get_90_ci(xs, n);
+
+    if (n_bins <= 1) {
+        fprintf(stderr, "Number of bins must be greater than 1.\n");
+        return;
+    } else if (n_samples <= 10) {
+        fprintf(stderr, "Number of samples must be higher than 10.\n");
+        return;
+    }
+
+    int *bins = (int*) calloc((size_t)n_bins, sizeof(int));
+    if (bins == NULL) {
+        fprintf(stderr, "Memory allocation for bins failed.\n");
+        return;
+    }
+
+    double min_value = ci_90.low, max_value = ci_90.high;
+
+    // Avoid division by zero for a single unique value
+    if (min_value == max_value) {
+        max_value++;
+    }
+
+    // Calculate bin width
+    double range = max_value - min_value;
+    double bin_width = range / n_bins;
+
+    // Fill the bins with sample counts
+    for (int i = 0; i < n_samples; i++) {
+        if((x[i] > min_value) && (x[i] < max_value)){
+            int bin_index = (int)((xs[i] - min_value) / bin_width);
+            if (bin_index == n_bins) {
+                bin_index--; // Last bin includes max_value
+            }
+            bins[bin_index]++;
+        }
+    }
+
+    // Calculate the scaling factor based on the maximum bin count
+    int max_bin_count = 0;
+    for (int i = 0; i < n_bins; i++) {
+        if (bins[i] > max_bin_count) {
+            max_bin_count = bins[i];
+        }
+    }
+    const int MAX_WIDTH = 50; // Adjust this to your terminal width
+    double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
+
+    // Print the histogram
+    for (int i = 0; i < n_bins; i++) {
+        double bin_start = min_value + i * bin_width;
+        double bin_end = bin_start + bin_width;
+
+        int decimalPlaces = 1;
+        if((0 < bin_width) && (bin_width < 1)){
+            int magnitude = (int) floor(log10(bin_width));
+            decimalPlaces = -magnitude;
+            decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces;  
+        }
+        printf("  [%*.*f, %*.*f", 4+decimalPlaces, decimalPlaces, bin_start, 4+decimalPlaces, decimalPlaces, bin_end);
+        char interval_delimiter = ')';
+        if(i == (n_bins-1)){
+            interval_delimiter = ']'; // last bucket is inclusive
+        }
+        printf("%c: ", interval_delimiter);
+
+        int marks = (int)(bins[i] * scale);
+        for (int j = 0; j < marks; j++) {
+            printf("█");
+        }
+        printf(" %d\n", bins[i]);
+    }
+
+    // Free the allocated memory for bins
+    free(bins);
+
+}
+
 // Replicate some of the above functions over samplers
 // However, in the future I'll delete this
 // There should be a clear boundary between working with samplers and working with an array of samples

	squiggle.c Self-contained Monte Carlo estimation in C99
	Log \| Files \| Refs \| README

M	README.md	\|	2	++
M	squiggle_more.c	\|	86	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-