squiggle_more.c (15606B)
1 #include "squiggle.h" 2 #include <float.h> 3 #include <limits.h> 4 #include <math.h> 5 #include <omp.h> 6 #include <stdint.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> // memcpy 10 11 /* Cache optimizations */ 12 #define CACHE_LINE_SIZE 64 13 // getconf LEVEL1_DCACHE_LINESIZE 14 // <https://stackoverflow.com/questions/794632/programmatically-get-the-cache-line-size> 15 typedef struct seed_cache_box_t { 16 uint64_t seed; 17 char padding[CACHE_LINE_SIZE - sizeof(uint64_t)]; 18 // Cache line size is 64 *bytes*, uint64_t is 64 *bits* (8 bytes). Different units! 19 } seed_cache_box; 20 // This avoids "false sharing", i.e., different threads competing for the same cache line 21 // Dealing with this shaves 4ms from a 12ms process, or a third of runtime 22 // <http://www.nic.uoregon.edu/~khuck/ts/acumem-report/manual_html/ch06s07.html> 23 24 /* Parallel sampler */ 25 void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples) 26 { 27 28 // Terms of the division: 29 // a = b * quotient + reminder 30 // a = b * (a/b) + (a%b) 31 // dividend: a 32 // divisor: b 33 // quotient = a/b 34 // reminder = a%b 35 // "divisor's multiple" := b*(a/b) 36 37 // now, we have n_samples and n_threads 38 // to make our life easy, each thread will have a number of samples of: a/b (quotient) 39 // and we'll compute the remainder of samples separately 40 // to possibly do by Jorge: improve so that the remainder is included in the threads 41 42 int quotient = n_samples / n_threads; 43 int divisor_multiple = quotient * n_threads; 44 45 // uint64_t** seeds = malloc((size_t)n_threads * sizeof(uint64_t*)); 46 seed_cache_box* cache_box = (seed_cache_box*)malloc(sizeof(seed_cache_box) * (size_t)n_threads); 47 // seed_cache_box cache_box[n_threads]; // we could use the C stack. On normal linux machines, it's 8MB ($ ulimit -s). However, it doesn't quite feel right. 48 srand(1); 49 for (int i = 0; i < n_threads; i++) { 50 // Constraints: 51 // - xorshift can't start with 0 52 // - the seeds should be reasonably separated and not correlated 53 cache_box[i].seed = (uint64_t)rand() * (UINT64_MAX / RAND_MAX); 54 55 // Other initializations tried: 56 // *seeds[i] = 1 + i; 57 // *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads); 58 // *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i; 59 } 60 61 int i; 62 #pragma omp parallel private(i) 63 { 64 #pragma omp for 65 for (i = 0; i < n_threads; i++) { 66 // It's possible I don't need the for, and could instead call omp 67 // in some different way and get the thread number with omp_get_thread_num() 68 int lower_bound_inclusive = i * quotient; 69 int upper_bound_not_inclusive = ((i + 1) * quotient); // note the < in the for loop below, 70 71 for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) { 72 results[j] = sampler(&(cache_box[i].seed)); 73 /* 74 t starts at 0 and ends at T 75 at t=0, 76 thread i accesses: results[i*quotient +0], 77 thread i+1 acccesses: results[(i+1)*quotient +0] 78 at t=T 79 thread i accesses: results[(i+1)*quotient -1] 80 thread i+1 acccesses: results[(i+2)*quotient -1] 81 The results[j] that are directly adjacent are 82 results[(i+1)*quotient -1] (accessed by thread i at time T) 83 results[(i+1)*quotient +0] (accessed by thread i+1 at time 0) 84 and these are themselves adjacent to 85 results[(i+1)*quotient -2] (accessed by thread i at time T-1) 86 results[(i+1)*quotient +1] (accessed by thread i+1 at time 2) 87 If T is large enough, which it is, two threads won't access the same 88 cache line at the same time. 89 Pictorially: 90 at t=0 ....i.........I......... 91 at t=T .............i.........I 92 and the two never overlap 93 Note that results[j] is a double, a double has 8 bytes (64 bits) 94 8 doubles fill a cache line of 64 bytes. 95 So we specifically won't get problems as long as n_samples/n_threads > 8 96 n_threads is normally 16, so n_samples > 128 97 Note also that this is only a problem in terms of speed, if n_samples<128 98 the results are still computed, it'll just be slower 99 */ 100 } 101 } 102 } 103 for (int j = divisor_multiple; j < n_samples; j++) { 104 results[j] = sampler(&(cache_box[0].seed)); 105 // we can just reuse a seed, 106 // this isn't problematic because we;ve now stopped doing multithreading 107 } 108 109 free(cache_box); 110 } 111 112 /* Get confidence intervals, given a sampler */ 113 114 typedef struct ci_t { 115 double low; 116 double high; 117 } ci; 118 119 inline static void swp(int i, int j, double xs[]) 120 { 121 double tmp = xs[i]; 122 xs[i] = xs[j]; 123 xs[j] = tmp; 124 } 125 126 static int partition(int low, int high, double xs[], int length) 127 { 128 if (low > high || high >= length) { 129 printf("Invariant violated for function partition in %s (%d)", __FILE__, __LINE__); 130 exit(1); 131 } 132 // Note: the scratchpad/ folder in commit 578bfa27 has printfs sprinkled throughout 133 int pivot = low + (int)floor((high - low) / 2); 134 double pivot_value = xs[pivot]; 135 swp(pivot, high, xs); 136 int gt = low; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */ 137 for (int i = low; i < high; i++) { 138 if (xs[i] < pivot_value) { 139 swp(gt, i, xs); 140 gt++; 141 } 142 } 143 swp(high, gt, xs); 144 return gt; 145 } 146 147 static double quickselect(int k, double xs[], int n) 148 { 149 // https://en.wikipedia.org/wiki/Quickselect 150 151 double* ys = malloc((size_t)n * sizeof(double)); 152 memcpy(ys, xs, (size_t)n * sizeof(double)); 153 // ^: don't rearrange item order in the original array 154 155 int low = 0; 156 int high = n - 1; 157 for (;;) { 158 if (low == high) { 159 double result = ys[low]; 160 free(ys); 161 return result; 162 } 163 int pivot = partition(low, high, ys, n); 164 if (pivot == k) { 165 double result = ys[pivot]; 166 free(ys); 167 return result; 168 } else if (k < pivot) { 169 high = pivot - 1; 170 } else { 171 low = pivot + 1; 172 } 173 } 174 } 175 176 ci array_get_ci(ci interval, double* xs, int n) 177 { 178 179 int low_k = (int)floor(interval.low * n); 180 int high_k = (int)ceil(interval.high * n); 181 ci result = { 182 .low = quickselect(low_k, xs, n), 183 .high = quickselect(high_k, xs, n), 184 }; 185 return result; 186 } 187 ci array_get_90_ci(double xs[], int n) 188 { 189 return array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n); 190 } 191 192 double array_get_median(double xs[], int n) 193 { 194 int median_k = (int)floor(0.5 * n); 195 return quickselect(median_k, xs, n); 196 } 197 198 /* array print: potentially useful for debugging */ 199 void array_print(double xs[], int n) 200 { 201 printf("["); 202 for (int i = 0; i < n - 1; i++) { 203 printf("%f, ", xs[i]); 204 } 205 printf("%f", xs[n - 1]); 206 printf("]\n"); 207 } 208 209 void array_print_stats(double xs[], int n) 210 { 211 ci ci_90 = array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n); 212 ci ci_80 = array_get_ci((ci) { .low = 0.1, .high = 0.9 }, xs, n); 213 ci ci_50 = array_get_ci((ci) { .low = 0.25, .high = 0.75 }, xs, n); 214 double median = array_get_median(xs, n); 215 double mean = array_mean(xs, n); 216 double std = array_std(xs, n); 217 printf("| Statistic | Value |\n" 218 "| --- | --- |\n" 219 "| Mean | %lf |\n" 220 "| Median | %lf |\n" 221 "| Std | %lf |\n" 222 "| 90%% confidence interval | %lf to %lf |\n" 223 "| 80%% confidence interval | %lf to %lf |\n" 224 "| 50%% confidence interval | %lf to %lf |\n", 225 mean, median, std, ci_90.low, ci_90.high, ci_80.low, ci_80.high, ci_50.low, ci_50.high); 226 } 227 228 void array_print_histogram(double* xs, int n_samples, int n_bins) 229 { 230 // Interface inspired by <https://github.com/red-data-tools/YouPlot> 231 if (n_bins <= 1) { 232 fprintf(stderr, "Number of bins must be greater than 1.\n"); 233 return; 234 } else if (n_samples <= 1) { 235 fprintf(stderr, "Number of samples must be higher than 1.\n"); 236 return; 237 } 238 239 int* bins = (int*)calloc((size_t)n_bins, sizeof(int)); 240 if (bins == NULL) { 241 fprintf(stderr, "Memory allocation for bins failed.\n"); 242 return; 243 } 244 245 // Find the minimum and maximum values from the samples 246 double min_value = xs[0], max_value = xs[0]; 247 for (int i = 0; i < n_samples; i++) { 248 if (xs[i] < min_value) { 249 min_value = xs[i]; 250 } 251 if (xs[i] > max_value) { 252 max_value = xs[i]; 253 } 254 } 255 256 // Avoid division by zero for a single unique value 257 if (min_value == max_value) { 258 max_value++; 259 } 260 261 // Calculate bin width 262 double bin_width = (max_value - min_value) / n_bins; 263 264 // Fill the bins with sample counts 265 for (int i = 0; i < n_samples; i++) { 266 int bin_index = (int)((xs[i] - min_value) / bin_width); 267 if (bin_index == n_bins) { 268 bin_index--; // Last bin includes max_value 269 } 270 bins[bin_index]++; 271 } 272 273 // Calculate the scaling factor based on the maximum bin count 274 int max_bin_count = 0; 275 for (int i = 0; i < n_bins; i++) { 276 if (bins[i] > max_bin_count) { 277 max_bin_count = bins[i]; 278 } 279 } 280 const int MAX_WIDTH = 50; // Adjust this to your terminal width 281 double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0; 282 283 // Print the histogram 284 for (int i = 0; i < n_bins; i++) { 285 double bin_start = min_value + i * bin_width; 286 double bin_end = bin_start + bin_width; 287 288 int decimalPlaces = 1; 289 if ((0 < bin_width) && (bin_width < 1)) { 290 int magnitude = (int)floor(log10(bin_width)); 291 decimalPlaces = -magnitude; 292 decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces; 293 } 294 printf("[%*.*f, %*.*f", 4 + decimalPlaces, decimalPlaces, bin_start, 4 + decimalPlaces, decimalPlaces, bin_end); 295 char interval_delimiter = ')'; 296 if (i == (n_bins - 1)) { 297 interval_delimiter = ']'; // last bucket is inclusive 298 } 299 printf("%c: ", interval_delimiter); 300 301 int marks = (int)(bins[i] * scale); 302 for (int j = 0; j < marks; j++) { 303 printf("█"); 304 } 305 printf(" %d\n", bins[i]); 306 } 307 308 // Free the allocated memory for bins 309 free(bins); 310 } 311 312 void array_print_90_ci_histogram(double* xs, int n_samples, int n_bins) 313 { 314 // Code duplicated from previous function 315 // I'll consider simplifying it at some future point 316 // Possible ideas: 317 // - having only one function that takes any confidence interval? 318 // - having a utility function that is called by both functions? 319 ci ci_90 = array_get_90_ci(xs, n_samples); 320 321 if (n_bins <= 1) { 322 fprintf(stderr, "Number of bins must be greater than 1.\n"); 323 return; 324 } else if (n_samples <= 10) { 325 fprintf(stderr, "Number of samples must be higher than 10.\n"); 326 return; 327 } 328 329 int* bins = (int*)calloc((size_t)n_bins, sizeof(int)); 330 if (bins == NULL) { 331 fprintf(stderr, "Memory allocation for bins failed.\n"); 332 return; 333 } 334 335 double min_value = ci_90.low, max_value = ci_90.high; 336 337 // Avoid division by zero for a single unique value 338 if (min_value == max_value) { 339 max_value++; 340 } 341 double bin_width = (max_value - min_value) / n_bins; 342 343 // Fill the bins with sample counts 344 int below_min = 0, above_max = 0; 345 for (int i = 0; i < n_samples; i++) { 346 if (xs[i] < min_value) { 347 below_min++; 348 } else if (xs[i] > max_value) { 349 above_max++; 350 } else { 351 int bin_index = (int)((xs[i] - min_value) / bin_width); 352 if (bin_index == n_bins) { 353 bin_index--; // Last bin includes max_value 354 } 355 bins[bin_index]++; 356 } 357 } 358 359 // Calculate the scaling factor based on the maximum bin count 360 int max_bin_count = 0; 361 for (int i = 0; i < n_bins; i++) { 362 if (bins[i] > max_bin_count) { 363 max_bin_count = bins[i]; 364 } 365 } 366 const int MAX_WIDTH = 40; // Adjust this to your terminal width 367 double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0; 368 369 // Print the histogram 370 int decimalPlaces = 1; 371 if ((0 < bin_width) && (bin_width < 1)) { 372 int magnitude = (int)floor(log10(bin_width)); 373 decimalPlaces = -magnitude; 374 decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces; 375 } 376 printf("(%*s, %*.*f): ", 6 + decimalPlaces, "-∞", 4 + decimalPlaces, decimalPlaces, min_value); 377 int marks_below_min = (int)(below_min * scale); 378 for (int j = 0; j < marks_below_min; j++) { 379 printf("█"); 380 } 381 printf(" %d\n", below_min); 382 for (int i = 0; i < n_bins; i++) { 383 double bin_start = min_value + i * bin_width; 384 double bin_end = bin_start + bin_width; 385 386 printf("[%*.*f, %*.*f", 4 + decimalPlaces, decimalPlaces, bin_start, 4 + decimalPlaces, decimalPlaces, bin_end); 387 char interval_delimiter = ')'; 388 if (i == (n_bins - 1)) { 389 interval_delimiter = ']'; // last bucket is inclusive 390 } 391 printf("%c: ", interval_delimiter); 392 393 int marks = (int)(bins[i] * scale); 394 for (int j = 0; j < marks; j++) { 395 printf("█"); 396 } 397 printf(" %d\n", bins[i]); 398 } 399 printf("(%*.*f, %*s): ", 4 + decimalPlaces, decimalPlaces, max_value, 6 + decimalPlaces, "+∞"); 400 int marks_above_max = (int)(above_max * scale); 401 for (int j = 0; j < marks_above_max; j++) { 402 printf("█"); 403 } 404 printf(" %d\n", above_max); 405 406 // Free the allocated memory for bins 407 free(bins); 408 } 409 410 /* Algebra manipulations */ 411 412 #define NORMAL90CONFIDENCE 1.6448536269514727 413 414 typedef struct normal_params_t { 415 double mean; 416 double std; 417 } normal_params; 418 419 normal_params algebra_sum_normals(normal_params a, normal_params b) 420 { 421 normal_params result = { 422 .mean = a.mean + b.mean, 423 .std = sqrt((a.std * a.std) + (b.std * b.std)), 424 }; 425 return result; 426 } 427 428 typedef struct lognormal_params_t { 429 double logmean; 430 double logstd; 431 } lognormal_params; 432 433 lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b) 434 { 435 lognormal_params result = { 436 .logmean = a.logmean + b.logmean, 437 .logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)), 438 }; 439 return result; 440 } 441 442 lognormal_params convert_ci_to_lognormal_params(ci x) 443 { 444 double loghigh = log(x.high); 445 double loglow = log(x.low); 446 double logmean = (loghigh + loglow) / 2.0; 447 double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE); 448 lognormal_params result = { .logmean = logmean, .logstd = logstd }; 449 return result; 450 } 451 452 ci convert_lognormal_params_to_ci(lognormal_params y) 453 { 454 double h = y.logstd * NORMAL90CONFIDENCE; 455 double loghigh = y.logmean + h; 456 double loglow = y.logmean - h; 457 ci result = { .low = exp(loglow), .high = exp(loghigh) }; 458 return result; 459 }