commit 58cfe378e51797098a5f4f3082e4893b3550a7cf
parent d2290216250c5ff773edb3ccec299171171db77d
Author: NunoSempere <nuno.sempere@protonmail.com>
Date: Sat, 3 Jun 2023 01:42:48 -0600
perf tweaks
Diffstat:
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/C/README.md b/C/README.md
@@ -10,6 +10,7 @@ This repository contains a few implementations of a simple botec (back-of-the-en
- [ ] Add Windows/Powershell time-measuring commands
- [ ] Add CUDA?
+- [x] Added results of perf. `rand_r` seems like a big chunk of it, but I'm hesitant to use lower-quality random numbers
- [x] Update repository with correct timing
- [x] Use better profiling approach to capture timing with 1M samples.
- [x] See if program can be reworded so as to use multithreading effectively, e.g., so that you see speed gains proportional to the number of threads used
diff --git a/C/makefile b/C/makefile
@@ -82,7 +82,7 @@ time-linux-simple:
## Profiling
profile-linux:
- echo "Requires perf, which depends on the kernel, and might be in linux-tools package or similar"
+ echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
# ./$(OUTPUT)
diff --git a/C/perf.txt b/C/perf.txt
@@ -0,0 +1,25 @@
+Overhead Command Shared Object Symbol
+ 23.94% samples libc-2.31.so [.] rand_r
+ 18.14% samples libgomp.so.1.0.0 [.] 0x000000000001d132
+ 15.43% samples libgomp.so.1.0.0 [.] 0x000000000001d2ea
+ 12.16% samples samples [.] mixture._omp_fn.0
+ 4.36% samples libm-2.31.so [.] __sin_fma
+ 3.49% samples libm-2.31.so [.] __ieee754_log_fma
+ 3.34% samples samples [.] random_to
+ 3.13% samples samples [.] random_uniform
+ 2.77% samples samples [.] split_array_sum._omp_fn.0
+ 2.01% samples samples [.] rand_float
+ 1.65% samples libm-2.31.so [.] __logf_fma
+ 0.88% samples libgomp.so.1.0.0 [.] 0x000000000001d2f5
+ 0.86% samples samples [.] ur_normal
+ 0.75% samples libm-2.31.so [.] __expf_fma
+ 0.70% samples libgomp.so.1.0.0 [.] 0x000000000001d13d
+ 0.69% samples libgomp.so.1.0.0 [.] 0x000000000001d139
+ 0.57% samples libgomp.so.1.0.0 [.] 0x000000000001d2f1
+ 0.57% samples samples [.] sample_1
+ 0.55% samples samples [.] random_lognormal
+ 0.50% samples [kernel.kallsyms] [k] asm_exc_page_fault
+ 0.49% samples [kernel.kallsyms] [k] clear_page_rep
+ 0.47% samples samples [.] random_normal
+ 0.38% samples [kernel.kallsyms] [k] default_send_IPI_single_phys
+