time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

samples.c (3970B)


      1 #include <gsl/gsl_randist.h>
      2 #include <gsl/gsl_rng.h>
      3 #include <math.h>
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <time.h>
      7 
      8 #define N 1000000
      9 /* 
     10  * For very high values of N, you will want to increase the maximum stack trace, otherwise you will suffer a segmentation fault
     11  * In Ubuntu/bash you can do this with $ ulimit -Ss 256000 ## ~256Mbs
     12  * And confirm it with $ ulimit -a
     13 */
     14 
     15 /* Helpers */
     16 void print(double* ys)
     17 {
     18     for (int i = 0; i < N; i++) {
     19         printf("%f\n", ys[i]);
     20     }
     21     printf("\n");
     22 }
     23 
     24 void fill(double* ys, float f)
     25 {
     26     for (int i = 0; i < N; i++) {
     27         ys[i] = f;
     28     }
     29 }
     30 
     31 double sum(double* ps, int n)
     32 {
     33     double result = 0;
     34     for (int i = 0; i < n; i++) {
     35         result += ps[i];
     36     }
     37     return (result);
     38 }
     39 
     40 void cumsum(double* ps, double* rs, int n)
     41 {
     42     double counter = 0;
     43     for (int i = 0; i < n; i++) {
     44         counter += ps[i];
     45         rs[i] = counter;
     46     }
     47 }
     48 
     49 /* Distributions*/
     50 void normal(gsl_rng* r, double* ys, double mean, double std)
     51 {
     52     for (int i = 0; i < N; i++) {
     53         ys[i] = mean + gsl_ran_gaussian(r, std);
     54     }
     55 }
     56 
     57 void lognormal(gsl_rng* r, double* ys, double zeta, double sigma)
     58 {
     59     for (int i = 0; i < N; i++) {
     60         ys[i] = gsl_ran_lognormal(r, zeta, sigma);
     61     }
     62 }
     63 
     64 void to(gsl_rng* r, double* ys, double low, double high)
     65 {
     66     double normal95confidencePoint = 1.6448536269514722;
     67     double log_low = log(low);
     68     double log_high = log(high);
     69     double zeta = (log_low + log_high) / 2;
     70     double sigma = (log_high - log_low) / (2.0 * normal95confidencePoint);
     71     lognormal(r, ys, zeta, sigma);
     72 }
     73 
     74 /* Mixture of distributions */
     75 void mixture(gsl_rng* r, double* dists[], double* weights, int n, double* results)
     76 {
     77     /* Get cummulative, normalized weights */
     78     double sum_weights = sum(weights, n);
     79     double normalized_weights[n];
     80     for (int i = 0; i < n; i++) {
     81         normalized_weights[i] = weights[i] / sum_weights;
     82     }
     83     double cummulative_weights[n];
     84     cumsum(normalized_weights, cummulative_weights, n);
     85 
     86     /* Get N samples, drawn from the different distributions in proportion to their weights. */
     87     for (int i = 0; i < N; i++) {
     88         double p_1 = gsl_rng_uniform(r);
     89         double p_2 = gsl_rng_uniform(r);
     90 
     91         int index_found = 0;
     92         int index_counter = 0;
     93         while ((index_found == 0) && (index_counter < n)) {
     94             if (p_1 < cummulative_weights[index_counter]) {
     95                 index_found = 1;
     96             } else {
     97                 index_counter++;
     98             }
     99         }
    100         if (index_found == 0) {
    101             printf("\nThis shouldn't be able to happen");
    102             // gsl_rng_free (r);
    103             // abort(); // this shouldn't have happened.
    104 
    105         } else {
    106             int sample_index = (int)floor(p_2 * N);
    107             results[i] = dists[index_counter][sample_index];
    108         }
    109     }
    110 }
    111 
    112 /* Main */
    113 int main(void)
    114 {
    115 		// Start clock
    116     clock_t start, end;
    117     start = clock();
    118 
    119     /* Initialize GNU Statistical Library (GSL) stuff */
    120     const gsl_rng_type* T;
    121     gsl_rng* r;
    122     // gsl_rng_env_setup();
    123     T = gsl_rng_default;
    124     r = gsl_rng_alloc(T);
    125 
    126     /* Toy example */
    127     /* Declare variables in play */
    128     double p_a, p_b, p_c;
    129     double dist_none[N], dist_one[N], dist_few[N], dist_many[N], dist_mixture[N];
    130 
    131     /* Initialize variables */
    132     p_a = 0.8;
    133     p_b = 0.5;
    134     p_c = p_a * p_b;
    135 
    136     fill(dist_none, 0);
    137     fill(dist_one, 1);
    138     to(r, dist_few, 1, 3);
    139     to(r, dist_many, 2, 10);
    140 
    141     /* Generate mixture */
    142     int n = 4;
    143     double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
    144     double* dists[] = { dist_none, dist_one, dist_few, dist_many };
    145 
    146     mixture(r, dists, weights, n, dist_mixture);
    147     printf("%f\n", sum(dist_mixture, N) / N);
    148 
    149     /* Clean up GSL */
    150     gsl_rng_free(r);
    151 		
    152 		// End clock
    153     end = clock();
    154     printf("Total time (ms): %f\n", ((double)(end - start)) / CLOCKS_PER_SEC * 1000);
    155     /* Return success*/
    156     return EXIT_SUCCESS;
    157 }