commit b497b5b399b0bab278ac450b8d4a1fc55c79e16d
parent 3bb2804ccf2db29dfe59ba077fe731d9697762af
Author: NunoSempere <nuno.sempere@protonmail.com>
Date: Sat, 13 Jan 2024 00:50:51 +0100
remove loop unrolling again
Diffstat:
1 file changed, 0 insertions(+), 17 deletions(-)
diff --git a/squiggle_more.c b/squiggle_more.c
@@ -75,23 +75,6 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
// b) trying to unroll loops actually makes the code slower
// c) 8 results[j] are 8 doubles, which fit a cache line. If n_samples/n_threads
}
- // Failed loop unrolling
- /*
- for (int j = lower_bound_inclusive; j < 4*(upper_bound_not_inclusive/4); j++) {
- results[j+0] = sampler(&(cache_box[i].seed));
- results[j+1] = sampler(&(cache_box[i].seed));
- results[j+2] = sampler(&(cache_box[i].seed));
- results[j+3] = sampler(&(cache_box[i].seed));
- // In principle, these results[j] could also result in two threads competing for the same cache line.
- // In practice, though,
- // a) this would happen infrequently
- // b) trying to unroll loops actually makes the code slower
- // c) 8 results[j] are 8 doubles, which fit a cache line. If n_samples/n_threads
- }
- for (int j = 4*(upper_bound_not_inclusive/4); j < upper_bound_not_inclusive; j++) {
- results[j] = sampler(&(cache_box[i].seed));
- }
- */
}
}
for (int j = divisor_multiple; j < n_samples; j++) {