commit fee06aec65a5fc6061cdb254ec29264d7b647177
parent baa8a532a26a46e5cfa487d611be93a8d4f217af
Author: NunoSempere <nuno.sempere@protonmail.com>
Date: Sat, 22 Jul 2023 21:40:35 +0200
add gamma distribution & documentation.
Diffstat:
5 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
@@ -10,8 +10,10 @@ A self-contained C99 library that provides a subset of [Squiggle](https://www.sq
- Because it will last long
- Because it can fit in my head
- Because if you can implement something in C, you can implement it anywhere else
-- Because it can be made faster if need be, e.g., with a multi-threading library like OpenMP, or by adding more algorithmic complexity
-- **Because there are few abstractions between it and the bare metal: C -> assembly = CPU instructions**, leading to fewer errors beyond the programmer's control.
+- Because it can be made faster if need be
+ - e.g., with a multi-threading library like OpenMP, or by adding more algorithmic complexity
+ - or more simply, by inlining the sampling functions (adding an `inline` directive before their function declaration)
+- **Because there are few abstractions between it and machine code** (C => assembly => machine code with gcc, or C => machine code, with tcc), leading to fewer errors beyond the programmer's control.
## Getting started
@@ -79,8 +81,13 @@ Behaviour on error can be toggled by the `EXIT_ON_ERROR` variable. This library
- [ ] Have some more complicated & realistic example
- [ ] Add summarization functions: 90% ci (or all c.i.?)
+- [ ] Systematize references
- [ ] Publish online
- [ ] Add efficient sampling from a beta distribution
+ - https://dl.acm.org/doi/10.1145/358407.358414
+ - https://link.springer.com/article/10.1007/bf02293108
+ - https://stats.stackexchange.com/questions/502146/how-does-numpy-generate-samples-from-a-beta-distribution
+ - https://github.com/numpy/numpy/blob/5cae51e794d69dd553104099305e9f92db237c53/numpy/random/src/distributions/distributions.c
- [ ] Support all distribution functions in <https://www.squiggle-language.com/docs/Api/Dist>
- [ ] Support all distribution functions in <https://www.squiggle-language.com/docs/Api/Dist>, and do so efficiently
@@ -107,3 +114,5 @@ Behaviour on error can be toggled by the `EXIT_ON_ERROR` variable. This library
- [x] Explain individual examples
- [x] Rename functions to something more self-explanatory, e.g,. `sample_unit_normal`.
- [x] Add summarization functions: mean, std
+- [x] Add sampling from a gamma distribution
+ - https://dl.acm.org/doi/pdf/10.1145/358407.358414
diff --git a/examples/05_sample_from_cdf_beta/example b/examples/05_sample_from_cdf_beta/example
Binary files differ.
diff --git a/examples/05_sample_from_cdf_beta/makefile b/examples/05_sample_from_cdf_beta/makefile
@@ -18,12 +18,13 @@ DEPENDENCIES=$(MATH)
# OPENMP=-fopenmp
## Flags
+VERBOSE=#-v
DEBUG= #'-g'
STANDARD=-std=c99 ## gnu99 allows for nested functions.
EXTENSIONS= #-fnested-functions
WARNINGS=-Wall
OPTIMIZED=-O3#-Ofast
-CFLAGS=$(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED)
+CFLAGS=$(VERBOSE) $(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED)
## Formatter
STYLE_BLUEPRINT=webkit
diff --git a/references/marsaglia-gamma.pdf b/references/marsaglia-gamma.pdf
Binary files differ.
diff --git a/squiggle.c b/squiggle.c
@@ -73,6 +73,42 @@ float sample_to(float low, float high, uint32_t* seed)
return sample_lognormal(logmean, logsigma, seed);
}
+float sample_gamma(float alpha, uint32_t* seed){
+
+ // A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
+ // https://dl.acm.org/doi/pdf/10.1145/358407.358414
+ // see also the references/ folder
+ if(alpha >=1){
+ float d, c, x, v, u;
+ d = alpha - 1.0/3.0;
+ c = 1.0/sqrt(9.0 * d);
+ while(1){
+
+ do {
+ x = sample_unit_normal(seed);
+ v = 1.0 + c * x;
+ } while(v <= 0.0);
+
+ v = pow(v, 3);
+ u = sample_unit_uniform(seed);
+ if( u < 1.0 - 0.0331 * pow(x, 4)){ // Condition 1
+ // the 0.0331 doesn't inspire much confidence
+ // however, this isn't the whole story
+ // by knowing that Condition 1 implies condition 2
+ // we realize that this is just a way of making the algorithm faster
+ // i.e., of not using the logarithms
+ return d*v;
+ }
+ if(log(u) < 0.5*pow(x,2) + d*(1.0 - v + log(v))){ // Condition 2
+ return d*v;
+ }
+ }
+ }else{
+ return sample_gamma(1 + alpha, seed) * pow(sample_unit_uniform(seed), 1/alpha);
+ // see note in p. 371 of https://dl.acm.org/doi/pdf/10.1145/358407.358414
+ }
+}
+
// Array helpers
float array_sum(float* array, int length)
{