samples.c (8096B)
1 #include <math.h> 2 #include <omp.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 7 const float PI = 3.14159265358979323846; 8 9 #define N_SAMPLES (1024 * 1000 ) 10 11 //Array helpers 12 13 void array_print(float* array, int length) 14 { 15 for (int i = 0; i < length; i++) { 16 printf("item[%d] = %f\n", i, array[i]); 17 } 18 printf("\n"); 19 } 20 21 float array_sum(float* array, int length) 22 { 23 float output = 0.0; 24 for (int i = 0; i < length; i++) { 25 output += array[i]; 26 } 27 return output; 28 } 29 30 void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) 31 { 32 array_cumsummed[0] = array_to_sum[0]; 33 for (int i = 1; i < length; i++) { 34 array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i]; 35 } 36 } 37 38 // Split array helpers 39 int split_array_get_length(int index, int total_length, int n_threads) 40 { 41 return (total_length % n_threads > index ? total_length / n_threads + 1 : total_length / n_threads); 42 } 43 44 void split_array_allocate(float** meta_array, int length, int divide_into) 45 { 46 int split_array_length; 47 48 for (int i = 0; i < divide_into; i++) { 49 split_array_length = split_array_get_length(i, length, divide_into); 50 meta_array[i] = malloc(split_array_length * sizeof(float)); 51 } 52 } 53 54 void split_array_free(float** meta_array, int divided_into) 55 { 56 for (int i = 0; i < divided_into; i++) { 57 free(meta_array[i]); 58 } 59 free(meta_array); 60 } 61 62 float split_array_sum(float** meta_array, int length, int divided_into) 63 { 64 int i; 65 float output = 0; 66 67 #pragma omp parallel for reduction(+ \ 68 : output) 69 for (int i = 0; i < divided_into; i++) { 70 float own_partial_sum = 0; 71 int split_array_length = split_array_get_length(i, length, divided_into); 72 for (int j = 0; j < split_array_length; j++) { 73 own_partial_sum += meta_array[i][j]; 74 } 75 output += own_partial_sum; 76 } 77 return output; 78 } 79 80 // Pseudo Random number generator 81 82 uint32_t xorshift32(uint32_t* seed) 83 { 84 // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RN_SAMPLESGs" 85 // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works> 86 // https://en.wikipedia.org/wiki/Xorshift 87 // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/> 88 89 uint32_t x = *seed; 90 x ^= x << 13; 91 x ^= x >> 17; 92 x ^= x << 5; 93 return *seed = x; 94 } 95 96 // Distribution & sampling functions 97 98 float rand_0_to_1(uint32_t* seed) 99 { 100 return ((float)xorshift32(seed)) / ((float)UINT32_MAX); 101 /* 102 uint32_t x = *seed; 103 x ^= x << 13; 104 x ^= x >> 17; 105 x ^= x << 5; 106 return ((float)(*seed = x))/((float) UIN_SAMPLEST32_MAX); 107 */ 108 // previously: 109 // ((float)rand_r(seed) / (float)RAN_SAMPLESD_MAX) 110 // and before that: rand, but it wasn't thread-safe. 111 // See: <https://stackoverflow.com/questions/43151361/how-to-create-thread-safe-random-number-generator-in-c-using-rand-r> for why to use rand_r: 112 // rand() is not thread-safe, as it relies on (shared) hidden seed. 113 } 114 115 float rand_float(float max, uint32_t* seed) 116 { 117 return rand_0_to_1(seed) * max; 118 } 119 120 float ur_normal(uint32_t* seed) 121 { 122 float u1 = rand_0_to_1(seed); 123 float u2 = rand_0_to_1(seed); 124 float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); 125 return z; 126 } 127 128 float random_uniform(float from, float to, uint32_t* seed) 129 { 130 return rand_0_to_1(seed) * (to - from) + from; 131 } 132 133 float random_normal(float mean, float sigma, uint32_t* seed) 134 { 135 return (mean + sigma * ur_normal(seed)); 136 } 137 138 float random_lognormal(float logmean, float logsigma, uint32_t* seed) 139 { 140 return expf(random_normal(logmean, logsigma, seed)); 141 } 142 143 float random_to(float low, float high, uint32_t* seed) 144 { 145 const float N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE = 1.6448536269514722; 146 float loglow = logf(low); 147 float loghigh = logf(high); 148 float logmean = (loglow + loghigh) / 2; 149 float logsigma = (loghigh - loglow) / (2.0 * N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE); 150 return random_lognormal(logmean, logsigma, seed); 151 } 152 153 // Mixture function 154 155 float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed) 156 { 157 158 // You can see a slightly simpler version of this function in the git history 159 // or in alt/C-02-better-algorithm-one-thread/ 160 float sum_weights = array_sum(weights, n_dists); 161 float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); 162 cumsummed_normalized_weights[0] = weights[0] / sum_weights; 163 for (int i = 1; i < n_dists; i++) { 164 cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights; 165 } 166 167 //create var holders 168 float p1, result; 169 int sample_index, i, own_length; 170 p1 = random_uniform(0, 1, seed); 171 for (int i = 0; i < n_dists; i++) { 172 if (p1 < cumsummed_normalized_weights[i]) { 173 result = samplers[i](seed); 174 break; 175 } 176 } 177 free(cumsummed_normalized_weights); 178 return result; 179 } 180 181 // Parallization function 182 void paralellize(float (*sampler)(uint32_t* seed), float* results, int n_threads, int n_samples){ 183 184 if((N_SAMPLES % n_threads) != 0){ 185 fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n"); 186 exit(1); 187 } 188 int n_samples_per_thread = N_SAMPLES / n_threads; 189 190 float** split_results = malloc(n_threads * sizeof(float*)); 191 for(int i=0; i<n_threads; i++){ 192 split_results[i] = malloc(n_samples_per_thread * sizeof(float)); 193 } 194 195 uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*)); 196 for (uint32_t i = 0; i < n_threads; i++) { 197 seeds[i] = malloc(sizeof(uint32_t)); 198 *seeds[i] = i + 1; // xorshift can't start with 0 199 } 200 201 int i; 202 #pragma omp parallel private(i) 203 { 204 #pragma omp for 205 for (i = 0; i < n_threads; i++) { 206 // split_array_length = split_array_get_length(i, N_SAMPLES, n_threads); 207 for (int j = 0; j < n_samples_per_thread; j++) { 208 split_results[i][j] = sampler(seeds[i]); 209 } 210 } 211 } 212 213 for(int i=0; i<n_threads; i++){ 214 int lower_bound = i * (n_samples / n_threads); 215 int upper_bound = ((i+1) * (n_samples / n_threads)) - 1; 216 // printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound); 217 for(int j=lower_bound; j<upper_bound; j++){ 218 results[j] = split_results[i][j-lower_bound]; 219 } 220 } 221 222 for (uint32_t i = 0; i < n_threads; i++) { 223 free(seeds[i]); 224 } 225 free(seeds); 226 227 for(int i=0; i<n_threads; i++){ 228 free(split_results[i]); 229 } 230 free(split_results); 231 } 232 233 // Functions used for the BOTEC. 234 // Their type has to be the same, as we will be passing them around. 235 236 float sample_0(uint32_t* seed) 237 { 238 return 0; 239 } 240 241 float sample_1(uint32_t* seed) 242 { 243 return 1; 244 } 245 246 float sample_few(uint32_t* seed) 247 { 248 return random_to(1, 3, seed); 249 } 250 251 float sample_many(uint32_t* seed) 252 { 253 return random_to(2, 10, seed); 254 } 255 256 float sample_mixture(uint32_t* seed){ 257 float p_a, p_b, p_c; 258 259 // Initialize variables 260 p_a = 0.8; 261 p_b = 0.5; 262 p_c = p_a * p_b; 263 264 // Generate mixture 265 int n_dists = 4; 266 float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; 267 float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many }; 268 269 return mixture(samplers, weights, n_dists, seed); 270 } 271 272 int main() 273 { 274 int n_threads = omp_get_max_threads(); 275 float* split_array_results = malloc(N_SAMPLES * sizeof(float)); 276 277 paralellize(sample_mixture, split_array_results, n_threads, N_SAMPLES); 278 printf("Sum(split_array_results, N_SAMPLES)/N_SAMPLES = %f\n", 279 array_sum(split_array_results, N_SAMPLES) / N_SAMPLES); 280 281 free(split_array_results); 282 return 0; 283 }