samples.c (7293B)
1 #include <math.h> 2 #include <omp.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 7 const float PI = 3.14159265358979323846; 8 9 #define N 1000000 10 11 //Array helpers 12 13 void array_print(float* array, int length) 14 { 15 for (int i = 0; i < length; i++) { 16 printf("item[%d] = %f\n", i, array[i]); 17 } 18 printf("\n"); 19 } 20 21 float array_sum(float* array, int length) 22 { 23 float output = 0.0; 24 for (int i = 0; i < length; i++) { 25 output += array[i]; 26 } 27 return output; 28 } 29 30 void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) 31 { 32 array_cumsummed[0] = array_to_sum[0]; 33 for (int i = 1; i < length; i++) { 34 array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i]; 35 } 36 } 37 38 // Split array helpers 39 int split_array_get_length(int index, int total_length, int n_threads) 40 { 41 return (total_length % n_threads > index ? total_length / n_threads + 1 : total_length / n_threads); 42 } 43 44 void split_array_allocate(float** meta_array, int length, int divide_into) 45 { 46 int split_array_length; 47 48 for (int i = 0; i < divide_into; i++) { 49 split_array_length = split_array_get_length(i, length, divide_into); 50 meta_array[i] = malloc(split_array_length * sizeof(float)); 51 } 52 } 53 54 void split_array_free(float** meta_array, int divided_into) 55 { 56 for (int i = 0; i < divided_into; i++) { 57 free(meta_array[i]); 58 } 59 free(meta_array); 60 } 61 62 float split_array_sum(float** meta_array, int length, int divided_into) 63 { 64 int i; 65 float output = 0; 66 67 #pragma omp parallel for reduction(+ \ 68 : output) 69 for (int i = 0; i < divided_into; i++) { 70 float own_partial_sum = 0; 71 int split_array_length = split_array_get_length(i, length, divided_into); 72 for (int j = 0; j < split_array_length; j++) { 73 own_partial_sum += meta_array[i][j]; 74 } 75 output += own_partial_sum; 76 } 77 return output; 78 } 79 80 // Pseudo Random number generator 81 82 uint32_t xorshift32(uint32_t* seed) 83 { 84 // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" 85 // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works> 86 // https://en.wikipedia.org/wiki/Xorshift 87 // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/> 88 89 uint32_t x = *seed; 90 x ^= x << 13; 91 x ^= x >> 17; 92 x ^= x << 5; 93 return *seed = x; 94 } 95 96 // Distribution & sampling functions 97 98 float rand_0_to_1(uint32_t* seed) 99 { 100 return ((float)xorshift32(seed)) / ((float)UINT32_MAX); 101 /* 102 uint32_t x = *seed; 103 x ^= x << 13; 104 x ^= x >> 17; 105 x ^= x << 5; 106 return ((float)(*seed = x))/((float) UINT32_MAX); 107 */ 108 // previously: 109 // ((float)rand_r(seed) / (float)RAND_MAX) 110 // and before that: rand, but it wasn't thread-safe. 111 // See: <https://stackoverflow.com/questions/43151361/how-to-create-thread-safe-random-number-generator-in-c-using-rand-r> for why to use rand_r: 112 // rand() is not thread-safe, as it relies on (shared) hidden seed. 113 } 114 115 float rand_float(float max, uint32_t* seed) 116 { 117 return rand_0_to_1(seed) * max; 118 } 119 120 float ur_normal(uint32_t* seed) 121 { 122 float u1 = rand_0_to_1(seed); 123 float u2 = rand_0_to_1(seed); 124 float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); 125 return z; 126 } 127 128 float random_uniform(float from, float to, uint32_t* seed) 129 { 130 return rand_0_to_1(seed) * (to - from) + from; 131 } 132 133 float random_normal(float mean, float sigma, uint32_t* seed) 134 { 135 return (mean + sigma * ur_normal(seed)); 136 } 137 138 float random_lognormal(float logmean, float logsigma, uint32_t* seed) 139 { 140 return expf(random_normal(logmean, logsigma, seed)); 141 } 142 143 float random_to(float low, float high, uint32_t* seed) 144 { 145 const float NORMAL95CONFIDENCE = 1.6448536269514722; 146 float loglow = logf(low); 147 float loghigh = logf(high); 148 float logmean = (loglow + loghigh) / 2; 149 float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE); 150 return random_lognormal(logmean, logsigma, seed); 151 } 152 153 // Mixture function 154 155 float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed) 156 { 157 158 // You can see a slightly simpler version of this function in the git history 159 // or in alt/C-02-better-algorithm-one-thread/ 160 float sum_weights = array_sum(weights, n_dists); 161 float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); 162 cumsummed_normalized_weights[0] = weights[0] / sum_weights; 163 for (int i = 1; i < n_dists; i++) { 164 cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights; 165 } 166 167 //create var holders 168 float p1, result; 169 int sample_index, i, own_length; 170 p1 = random_uniform(0, 1, seed); 171 for (int i = 0; i < n_dists; i++) { 172 if (p1 < cumsummed_normalized_weights[i]) { 173 result = samplers[i](seed); 174 break; 175 } 176 } 177 free(cumsummed_normalized_weights); 178 return result; 179 } 180 181 // Parallization function 182 void paralellize(float (*sampler)(uint32_t* seed), float** results, int n_threads){ 183 184 int sample_index, i, split_array_length; 185 uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*)); 186 for (uint32_t i = 0; i < n_threads; i++) { 187 seeds[i] = malloc(sizeof(uint32_t)); 188 *seeds[i] = i + 1; // xorshift can't start with 0 189 } 190 191 #pragma omp parallel private(i, sample_index, split_array_length) 192 { 193 #pragma omp for 194 for (i = 0; i < n_threads; i++) { 195 split_array_length = split_array_get_length(i, N, n_threads); 196 for (int j = 0; j < split_array_length; j++) { 197 results[i][j] = sampler(seeds[i]); 198 } 199 } 200 } 201 202 for (uint32_t i = 0; i < n_threads; i++) { 203 free(seeds[i]); 204 } 205 free(seeds); 206 } 207 208 // Functions used for the BOTEC. 209 // Their type has to be the same, as we will be passing them around. 210 211 float sample_0(uint32_t* seed) 212 { 213 return 0; 214 } 215 216 float sample_1(uint32_t* seed) 217 { 218 return 1; 219 } 220 221 float sample_few(uint32_t* seed) 222 { 223 return random_to(1, 3, seed); 224 } 225 226 float sample_many(uint32_t* seed) 227 { 228 return random_to(2, 10, seed); 229 } 230 231 float sample_mixture(uint32_t* seed){ 232 float p_a, p_b, p_c; 233 234 // Initialize variables 235 p_a = 0.8; 236 p_b = 0.5; 237 p_c = p_a * p_b; 238 239 // Generate mixture 240 int n_dists = 4; 241 float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; 242 float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many }; 243 244 return mixture(samplers, weights, n_dists, seed); 245 } 246 247 int main() 248 { 249 int n_threads = omp_get_max_threads(); 250 // printf("Max threads: %d\n", n_threads); 251 // omp_set_num_threads(n_threads); 252 float** split_array_results = malloc(n_threads * sizeof(float*)); 253 split_array_allocate(split_array_results, N, n_threads); 254 255 paralellize(sample_mixture, split_array_results, n_threads); 256 printf("Sum(split_array_results, N)/N = %f\n", split_array_sum(split_array_results, N, n_threads) / N); 257 258 split_array_free(split_array_results, n_threads); 259 return 0; 260 }