samples.c (7216B)
1 #include <math.h> 2 #include <omp.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 7 const float PI = 3.14159265358979323846; 8 9 #define N 1000000 10 11 //Array helpers 12 13 void array_print(float* array, int length) 14 { 15 for (int i = 0; i < length; i++) { 16 printf("item[%d] = %f\n", i, array[i]); 17 } 18 printf("\n"); 19 } 20 21 float array_sum(float* array, int length) 22 { 23 float output = 0.0; 24 for (int i = 0; i < length; i++) { 25 output += array[i]; 26 } 27 return output; 28 } 29 30 void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) 31 { 32 array_cumsummed[0] = array_to_sum[0]; 33 for (int i = 1; i < length; i++) { 34 array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i]; 35 } 36 } 37 38 // Split array helpers 39 int split_array_get_length(int index, int total_length, int n_threads) 40 { 41 return (total_length % n_threads > index ? total_length / n_threads + 1 : total_length / n_threads); 42 } 43 44 void split_array_allocate(float** meta_array, int length, int divide_into) 45 { 46 int split_array_length; 47 48 for (int i = 0; i < divide_into; i++) { 49 split_array_length = split_array_get_length(i, length, divide_into); 50 meta_array[i] = malloc(split_array_length * sizeof(float)); 51 } 52 } 53 54 void split_array_free(float** meta_array, int divided_into) 55 { 56 for (int i = 0; i < divided_into; i++) { 57 free(meta_array[i]); 58 } 59 free(meta_array); 60 } 61 62 float split_array_sum(float** meta_array, int length, int divided_into) 63 { 64 int i; 65 float output = 0; 66 67 #pragma omp parallel for reduction(+ \ 68 : output) 69 for (int i = 0; i < divided_into; i++) { 70 float own_partial_sum = 0; 71 int split_array_length = split_array_get_length(i, length, divided_into); 72 for (int j = 0; j < split_array_length; j++) { 73 own_partial_sum += meta_array[i][j]; 74 } 75 output += own_partial_sum; 76 } 77 return output; 78 } 79 80 // Pseudo Random number generator 81 82 uint32_t xorshift32(uint32_t* seed) 83 { 84 // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" 85 // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works> 86 // https://en.wikipedia.org/wiki/Xorshift 87 // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/> 88 89 uint32_t x = *seed; 90 x ^= x << 13; 91 x ^= x >> 17; 92 x ^= x << 5; 93 return *seed = x; 94 } 95 96 // Distribution & sampling functions 97 98 float rand_0_to_1(uint32_t* seed) 99 { 100 return ((float)xorshift32(seed)) / ((float)UINT32_MAX); 101 /* 102 uint32_t x = *seed; 103 x ^= x << 13; 104 x ^= x >> 17; 105 x ^= x << 5; 106 return ((float)(*seed = x))/((float) UINT32_MAX); 107 */ 108 // previously: 109 // ((float)rand_r(seed) / (float)RAND_MAX) 110 // and before that: rand, but it wasn't thread-safe. 111 // See: <https://stackoverflow.com/questions/43151361/how-to-create-thread-safe-random-number-generator-in-c-using-rand-r> for why to use rand_r: 112 // rand() is not thread-safe, as it relies on (shared) hidden seed. 113 } 114 115 float rand_float(float max, uint32_t* seed) 116 { 117 return rand_0_to_1(seed) * max; 118 } 119 120 float ur_normal(uint32_t* seed) 121 { 122 float u1 = rand_0_to_1(seed); 123 float u2 = rand_0_to_1(seed); 124 float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); 125 return z; 126 } 127 128 float random_uniform(float from, float to, uint32_t* seed) 129 { 130 return rand_0_to_1(seed) * (to - from) + from; 131 } 132 133 float random_normal(float mean, float sigma, uint32_t* seed) 134 { 135 return (mean + sigma * ur_normal(seed)); 136 } 137 138 float random_lognormal(float logmean, float logsigma, uint32_t* seed) 139 { 140 return expf(random_normal(logmean, logsigma, seed)); 141 } 142 143 float random_to(float low, float high, uint32_t* seed) 144 { 145 const float NORMAL95CONFIDENCE = 1.6448536269514722; 146 float loglow = logf(low); 147 float loghigh = logf(high); 148 float logmean = (loglow + loghigh) / 2; 149 float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE); 150 return random_lognormal(logmean, logsigma, seed); 151 } 152 153 // Mixture function 154 void mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, float** results, int n_threads) 155 { 156 // You can see a simpler version of this function in the git history 157 // or in alt/C-02-better-algorithm-one-thread/ 158 float sum_weights = array_sum(weights, n_dists); 159 float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); 160 cumsummed_normalized_weights[0] = weights[0] / sum_weights; 161 for (int i = 1; i < n_dists; i++) { 162 cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights; 163 } 164 165 //create var holders 166 float p1; 167 int sample_index, i, split_array_length; 168 169 // uint32_t* seeds[n_threads]; 170 uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*)); 171 for (uint32_t i = 0; i < n_threads; i++) { 172 seeds[i] = malloc(sizeof(uint32_t)); 173 *seeds[i] = i + 1; // xorshift can't start with 0 174 } 175 176 #pragma omp parallel private(i, p1, sample_index, split_array_length) 177 { 178 #pragma omp for 179 for (i = 0; i < n_threads; i++) { 180 split_array_length = split_array_get_length(i, N, n_threads); 181 for (int j = 0; j < split_array_length; j++) { 182 p1 = random_uniform(0, 1, seeds[i]); 183 for (int k = 0; k < n_dists; k++) { 184 if (p1 < cumsummed_normalized_weights[k]) { 185 results[i][j] = samplers[k](seeds[i]); 186 break; 187 } 188 } 189 } 190 } 191 } 192 // free(normalized_weights); 193 // free(cummulative_weights); 194 free(cumsummed_normalized_weights); 195 for (uint32_t i = 0; i < n_threads; i++) { 196 free(seeds[i]); 197 } 198 free(seeds); 199 } 200 201 // Functions used for the BOTEC. 202 // Their type has to be the same, as we will be passing them around. 203 204 float sample_0(uint32_t* seed) 205 { 206 return 0; 207 } 208 209 float sample_1(uint32_t* seed) 210 { 211 return 1; 212 } 213 214 float sample_few(uint32_t* seed) 215 { 216 return random_to(1, 3, seed); 217 } 218 219 float sample_many(uint32_t* seed) 220 { 221 return random_to(2, 10, seed); 222 } 223 224 int main() 225 { 226 227 // Toy example 228 // Declare variables in play 229 float p_a, p_b, p_c; 230 int n_threads = omp_get_max_threads(); 231 // printf("Max threads: %d\n", n_threads); 232 // omp_set_num_threads(n_threads); 233 float** dist_mixture = malloc(n_threads * sizeof(float*)); 234 split_array_allocate(dist_mixture, N, n_threads); 235 236 // Initialize variables 237 p_a = 0.8; 238 p_b = 0.5; 239 p_c = p_a * p_b; 240 241 // Generate mixture 242 int n_dists = 4; 243 float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; 244 float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many }; 245 246 mixture(samplers, weights, n_dists, dist_mixture, n_threads); 247 printf("Sum(dist_mixture, N)/N = %f\n", split_array_sum(dist_mixture, N, n_threads) / N); 248 // array_print(dist_mixture[0], N); 249 split_array_free(dist_mixture, n_threads); 250 251 return 0; 252 }