samples.c (3970B)
1 #include <gsl/gsl_randist.h> 2 #include <gsl/gsl_rng.h> 3 #include <math.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <time.h> 7 8 #define N 1000000 9 /* 10 * For very high values of N, you will want to increase the maximum stack trace, otherwise you will suffer a segmentation fault 11 * In Ubuntu/bash you can do this with $ ulimit -Ss 256000 ## ~256Mbs 12 * And confirm it with $ ulimit -a 13 */ 14 15 /* Helpers */ 16 void print(double* ys) 17 { 18 for (int i = 0; i < N; i++) { 19 printf("%f\n", ys[i]); 20 } 21 printf("\n"); 22 } 23 24 void fill(double* ys, float f) 25 { 26 for (int i = 0; i < N; i++) { 27 ys[i] = f; 28 } 29 } 30 31 double sum(double* ps, int n) 32 { 33 double result = 0; 34 for (int i = 0; i < n; i++) { 35 result += ps[i]; 36 } 37 return (result); 38 } 39 40 void cumsum(double* ps, double* rs, int n) 41 { 42 double counter = 0; 43 for (int i = 0; i < n; i++) { 44 counter += ps[i]; 45 rs[i] = counter; 46 } 47 } 48 49 /* Distributions*/ 50 void normal(gsl_rng* r, double* ys, double mean, double std) 51 { 52 for (int i = 0; i < N; i++) { 53 ys[i] = mean + gsl_ran_gaussian(r, std); 54 } 55 } 56 57 void lognormal(gsl_rng* r, double* ys, double zeta, double sigma) 58 { 59 for (int i = 0; i < N; i++) { 60 ys[i] = gsl_ran_lognormal(r, zeta, sigma); 61 } 62 } 63 64 void to(gsl_rng* r, double* ys, double low, double high) 65 { 66 double normal95confidencePoint = 1.6448536269514722; 67 double log_low = log(low); 68 double log_high = log(high); 69 double zeta = (log_low + log_high) / 2; 70 double sigma = (log_high - log_low) / (2.0 * normal95confidencePoint); 71 lognormal(r, ys, zeta, sigma); 72 } 73 74 /* Mixture of distributions */ 75 void mixture(gsl_rng* r, double* dists[], double* weights, int n, double* results) 76 { 77 /* Get cummulative, normalized weights */ 78 double sum_weights = sum(weights, n); 79 double normalized_weights[n]; 80 for (int i = 0; i < n; i++) { 81 normalized_weights[i] = weights[i] / sum_weights; 82 } 83 double cummulative_weights[n]; 84 cumsum(normalized_weights, cummulative_weights, n); 85 86 /* Get N samples, drawn from the different distributions in proportion to their weights. */ 87 for (int i = 0; i < N; i++) { 88 double p_1 = gsl_rng_uniform(r); 89 double p_2 = gsl_rng_uniform(r); 90 91 int index_found = 0; 92 int index_counter = 0; 93 while ((index_found == 0) && (index_counter < n)) { 94 if (p_1 < cummulative_weights[index_counter]) { 95 index_found = 1; 96 } else { 97 index_counter++; 98 } 99 } 100 if (index_found == 0) { 101 printf("\nThis shouldn't be able to happen"); 102 // gsl_rng_free (r); 103 // abort(); // this shouldn't have happened. 104 105 } else { 106 int sample_index = (int)floor(p_2 * N); 107 results[i] = dists[index_counter][sample_index]; 108 } 109 } 110 } 111 112 /* Main */ 113 int main(void) 114 { 115 // Start clock 116 clock_t start, end; 117 start = clock(); 118 119 /* Initialize GNU Statistical Library (GSL) stuff */ 120 const gsl_rng_type* T; 121 gsl_rng* r; 122 // gsl_rng_env_setup(); 123 T = gsl_rng_default; 124 r = gsl_rng_alloc(T); 125 126 /* Toy example */ 127 /* Declare variables in play */ 128 double p_a, p_b, p_c; 129 double dist_none[N], dist_one[N], dist_few[N], dist_many[N], dist_mixture[N]; 130 131 /* Initialize variables */ 132 p_a = 0.8; 133 p_b = 0.5; 134 p_c = p_a * p_b; 135 136 fill(dist_none, 0); 137 fill(dist_one, 1); 138 to(r, dist_few, 1, 3); 139 to(r, dist_many, 2, 10); 140 141 /* Generate mixture */ 142 int n = 4; 143 double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; 144 double* dists[] = { dist_none, dist_one, dist_few, dist_many }; 145 146 mixture(r, dists, weights, n, dist_mixture); 147 printf("%f\n", sum(dist_mixture, N) / N); 148 149 /* Clean up GSL */ 150 gsl_rng_free(r); 151 152 // End clock 153 end = clock(); 154 printf("Total time (ms): %f\n", ((double)(end - start)) / CLOCKS_PER_SEC * 1000); 155 /* Return success*/ 156 return EXIT_SUCCESS; 157 }