samples.c (6284B)
1 #include <math.h> 2 #include <omp.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 7 const float PI = 3.14159265358979323846; 8 9 #define N_SAMPLES (1024 * 1000) 10 11 //Array helpers 12 void array_print(float* array, int length) 13 { 14 for (int i = 0; i < length; i++) { 15 printf("item[%d] = %f\n", i, array[i]); 16 } 17 printf("\n"); 18 } 19 20 float array_sum(float* array, int length) 21 { 22 float output = 0.0; 23 for (int i = 0; i < length; i++) { 24 output += array[i]; 25 } 26 return output; 27 } 28 29 void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) 30 { 31 array_cumsummed[0] = array_to_sum[0]; 32 for (int i = 1; i < length; i++) { 33 array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i]; 34 } 35 } 36 37 // Pseudo Random number generator 38 39 uint32_t xorshift32(uint32_t* seed) 40 { 41 // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RN_SAMPLESGs" 42 // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works> 43 // https://en.wikipedia.org/wiki/Xorshift 44 // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/> 45 46 uint32_t x = *seed; 47 x ^= x << 13; 48 x ^= x >> 17; 49 x ^= x << 5; 50 return *seed = x; 51 } 52 53 // Distribution & sampling functions 54 55 float rand_0_to_1(uint32_t* seed) 56 { 57 return ((float)xorshift32(seed)) / ((float)UINT32_MAX); 58 /* 59 uint32_t x = *seed; 60 x ^= x << 13; 61 x ^= x >> 17; 62 x ^= x << 5; 63 return ((float)(*seed = x))/((float) UIN_SAMPLEST32_MAX); 64 */ 65 // previously: 66 // ((float)rand_r(seed) / (float)RAN_SAMPLESD_MAX) 67 // and before that: rand, but it wasn't thread-safe. 68 // See: <https://stackoverflow.com/questions/43151361/how-to-create-thread-safe-random-number-generator-in-c-using-rand-r> for why to use rand_r: 69 // rand() is not thread-safe, as it relies on (shared) hidden seed. 70 } 71 72 float rand_float(float max, uint32_t* seed) 73 { 74 return rand_0_to_1(seed) * max; 75 } 76 77 float ur_normal(uint32_t* seed) 78 { 79 float u1 = rand_0_to_1(seed); 80 float u2 = rand_0_to_1(seed); 81 float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); 82 return z; 83 } 84 85 float random_uniform(float from, float to, uint32_t* seed) 86 { 87 return rand_0_to_1(seed) * (to - from) + from; 88 } 89 90 float random_normal(float mean, float sigma, uint32_t* seed) 91 { 92 return (mean + sigma * ur_normal(seed)); 93 } 94 95 float random_lognormal(float logmean, float logsigma, uint32_t* seed) 96 { 97 return expf(random_normal(logmean, logsigma, seed)); 98 } 99 100 float random_to(float low, float high, uint32_t* seed) 101 { 102 const float N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE = 1.6448536269514722; 103 float loglow = logf(low); 104 float loghigh = logf(high); 105 float logmean = (loglow + loghigh) / 2; 106 float logsigma = (loghigh - loglow) / (2.0 * N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE); 107 return random_lognormal(logmean, logsigma, seed); 108 } 109 110 // Mixture function 111 112 float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed) 113 { 114 115 // You can see a slightly simpler version of this function in the git history 116 // or in alt/C-02-better-algorithm-one-thread/ 117 float sum_weights = array_sum(weights, n_dists); 118 float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); 119 cumsummed_normalized_weights[0] = weights[0] / sum_weights; 120 for (int i = 1; i < n_dists; i++) { 121 cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights; 122 } 123 124 //create var holders 125 float p1, result; 126 int sample_index, i, own_length; 127 p1 = random_uniform(0, 1, seed); 128 for (int i = 0; i < n_dists; i++) { 129 if (p1 < cumsummed_normalized_weights[i]) { 130 result = samplers[i](seed); 131 break; 132 } 133 } 134 free(cumsummed_normalized_weights); 135 return result; 136 } 137 138 // Parallization function 139 void paralellize(float (*sampler)(uint32_t* seed), float* results, int n_threads, int n_samples){ 140 if((N_SAMPLES % n_threads) != 0){ 141 fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n"); 142 exit(1); 143 } 144 // int n_samples_per_thread = N_SAMPLES / n_thread; 145 uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*)); 146 for (uint32_t i = 0; i < n_threads; i++) { 147 seeds[i] = malloc(sizeof(uint32_t)); 148 *seeds[i] = i + 1; // xorshift can't start with 0 149 } 150 151 int i; 152 #pragma omp parallel private(i) 153 { 154 #pragma omp for 155 for (i = 0; i < n_threads; i++) { 156 int lower_bound = i * (n_samples / n_threads); 157 int upper_bound = ((i+1) * (n_samples / n_threads)) - 1; 158 // printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound); 159 for (int j = lower_bound; j < upper_bound; j++) { 160 results[j] = sampler(seeds[i]); 161 } 162 } 163 } 164 165 for (uint32_t i = 0; i < n_threads; i++) { 166 free(seeds[i]); 167 } 168 free(seeds); 169 } 170 171 // Functions used for the BOTEC. 172 // Their type has to be the same, as we will be passing them around. 173 174 float sample_0(uint32_t* seed) 175 { 176 return 0; 177 } 178 179 float sample_1(uint32_t* seed) 180 { 181 return 1; 182 } 183 184 float sample_few(uint32_t* seed) 185 { 186 return random_to(1, 3, seed); 187 } 188 189 float sample_many(uint32_t* seed) 190 { 191 return random_to(2, 10, seed); 192 } 193 194 float sample_mixture(uint32_t* seed){ 195 float p_a, p_b, p_c; 196 197 // Initialize variables 198 p_a = 0.8; 199 p_b = 0.5; 200 p_c = p_a * p_b; 201 202 // Generate mixture 203 int n_dists = 4; 204 float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; 205 float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many }; 206 207 return mixture(samplers, weights, n_dists, seed); 208 } 209 210 int main() 211 { 212 int n_threads = omp_get_max_threads(); 213 float* split_array_results = malloc(N_SAMPLES * sizeof(float)); 214 215 paralellize(sample_mixture, split_array_results, n_threads, N_SAMPLES); 216 printf("Sum(split_array_results, N_SAMPLES)/N_SAMPLES = %f\n", array_sum(split_array_results, N_SAMPLES) / N_SAMPLES); 217 218 free(split_array_results); 219 return 0; 220 }