time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

README.md (7201B)


      1 <!--
      2 
      3 @license Apache-2.0
      4 
      5 Copyright (c) 2018 The Stdlib Authors.
      6 
      7 Licensed under the Apache License, Version 2.0 (the "License");
      8 you may not use this file except in compliance with the License.
      9 You may obtain a copy of the License at
     10 
     11    http://www.apache.org/licenses/LICENSE-2.0
     12 
     13 Unless required by applicable law or agreed to in writing, software
     14 distributed under the License is distributed on an "AS IS" BASIS,
     15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16 See the License for the specific language governing permissions and
     17 limitations under the License.
     18 
     19 -->
     20 
     21 # Chi-square goodness-of-fit test
     22 
     23 > Perform a chi-square goodness-of-fit test.
     24 
     25 <section class="usage">
     26 
     27 ## Usage
     28 
     29 ```javascript
     30 var chi2gof = require( '@stdlib/stats/chi2gof' );
     31 ```
     32 
     33 #### chi2gof( x, y\[, ...args]\[, opts] )
     34 
     35 Computes a chi-square goodness-of-fit test for the **null hypothesis** that the values of `x` come from the discrete probability distribution specified by `y`.
     36 
     37 ```javascript
     38 // Observed counts:
     39 var x = [ 30, 20, 23, 27 ];
     40 
     41 // Expected counts:
     42 var y = [ 25, 25, 25, 25 ];
     43 
     44 var res = chi2gof( x, y );
     45 var o = res.toJSON();
     46 /* returns
     47     {
     48         'rejected': false,
     49         'alpha': 0.05,
     50         'pValue': ~0.5087,
     51         'df': 3,
     52         'statistic': ~2.32,
     53         ...
     54     }
     55 */
     56 ```
     57 
     58 The second argument can either be an array-like object (or 1-dimensional [`ndarray`][@stdlib/ndarray/array]) of expected frequencies, an array-like object (or 1-dimensional [`ndarray`][@stdlib/ndarray/array]) of population probabilities summing to one, or a discrete probability distribution name to test against.
     59 
     60 ```javascript
     61 // Observed counts:
     62 var x = [ 89, 37, 30, 28, 2 ];
     63 
     64 // Expected probabilities:
     65 var y = [ 0.40, 0.20, 0.20, 0.15, 0.05 ];
     66 
     67 var res = chi2gof( x, y );
     68 var o = res.toJSON();
     69 /* returns
     70     {
     71         'rejected': true,
     72         'alpha': 0.05,
     73         'pValue': ~0.0187,
     74         'df': 3,
     75         'statistic': ~9.9901,
     76         ...
     77     }
     78 */
     79 ```
     80 
     81 When specifying a discrete probability distribution name, distribution parameters **must** be provided as additional arguments.
     82 
     83 ```javascript
     84 var Int32Array = require( '@stdlib/array/int32' );
     85 var discreteUniform = require( '@stdlib/random/base/discrete-uniform' );
     86 
     87 var res;
     88 var x;
     89 var v;
     90 var i;
     91 
     92 // Simulate expected counts...
     93 x = new Int32Array( 100 );
     94 for ( i = 0; i < x.length; i++ ) {
     95     v = discreteUniform( 0, 99 );
     96     x[ v ] += 1;
     97 }
     98 
     99 res = chi2gof( x, 'discrete-uniform', 0, 99 );
    100 // returns {...}
    101 ```
    102 
    103 The function accepts the following `options`:
    104 
    105 -   **alpha**: significance level of the hypothesis test. Must be on the interval `[0,1]`. Default: `0.05`.
    106 -   **ddof**: "delta degrees of freedom" adjustment. Must be a nonnegative integer. Default: `0`.
    107 -   **simulate**: `boolean` indicating whether to calculate p-values by Monte Carlo simulation. Default: `false`.
    108 -   **iterations**: number of Monte Carlo iterations. Default: `500`.
    109 
    110 By default, the test is performed at a significance level of `0.05`. To adjust the significance level, set the `alpha` option.
    111 
    112 ```javascript
    113 var x = [ 89, 37, 30, 28, 2 ];
    114 var p = [ 0.40, 0.20, 0.20, 0.15, 0.05 ];
    115 
    116 var res = chi2gof( x, p );
    117 
    118 var table = res.toString();
    119 /* e.g., returns
    120 
    121     Chi-square goodness-of-fit test
    122 
    123     Null hypothesis: population probabilities are equal to those in p
    124 
    125         pValue: 0.0186
    126         statistic: 9.9901
    127         degrees of freedom: 3
    128 
    129     Test Decision: Reject null in favor of alternative at 5% significance level
    130 
    131 */
    132 
    133 res = chi2gof( x, p, {
    134     'alpha': 0.01
    135 });
    136 
    137 table = res.toString();
    138 /* e.g., returns
    139 
    140     Chi-square goodness-of-fit test
    141 
    142     Null hypothesis: population probabilities are equal to those in p
    143 
    144         pValue: 0.0186
    145         statistic: 9.9901
    146         degrees of freedom: 3
    147 
    148     Test Decision: Fail to reject null in favor of alternative at 1% significance level
    149 
    150 */
    151 ```
    152 
    153 By default, the p-value is computed using a chi-square distribution with `k-1` degrees of freedom, where `k` is the length of `x`. If provided distribution arguments are estimated (e.g., via maximum likelihood estimation), the degrees of freedom **should** be corrected. Set the `ddof` option to use `k-1-n` degrees of freedom, where `n` is the degrees of freedom adjustment.
    154 
    155 ```javascript
    156 var x = [ 89, 37, 30, 28, 2 ];
    157 var p = [ 0.40, 0.20, 0.20, 0.15, 0.05 ];
    158 
    159 var res = chi2gof( x, p, {
    160     'ddof': 1
    161 });
    162 
    163 var o = res.toJSON();
    164 // returns { 'pValue': ~0.0186, 'statistic': ~9.9901, 'df': 3, ... }
    165 ```
    166 
    167 Instead of relying on chi-square approximation to calculate the p-value, one can use Monte Carlo simulation. When the `simulate` option is `true`, the simulation is performed by re-sampling from the discrete probability distribution specified by `y`.
    168 
    169 ```javascript
    170 var x = [ 89, 37, 30, 28, 2 ];
    171 var p = [ 0.40, 0.20, 0.20, 0.15, 0.05 ];
    172 
    173 var res = chi2gof( x, p, {
    174     'simulate': true,
    175     'iterations': 1000 // explicitly set the number of Monte Carlo simulations
    176 });
    177 // returns {...}
    178 ```
    179 
    180 The function returns a results `object` having the following properties:
    181 
    182 -   **alpha**: significance level.
    183 -   **rejected**: `boolean` indicating the test decision.
    184 -   **pValue**: test p-value.
    185 -   **statistic**: test statistic.
    186 -   **df**: degrees of freedom.
    187 -   **method**: test name.
    188 -   **toString**: serializes results as formatted test output.
    189 -   **toJSON**: serializes results as a JSON object.
    190 
    191 To print formatted test output, invoke the `toString` method. The method accepts the following options:
    192 
    193 -   **digits**: number of displayed decimal digits. Default: `4`.
    194 -   **decision**: `boolean` indicating whether to show the test decision. Default: `true`.
    195 
    196 ```javascript
    197 var x = [ 89, 37, 30, 28, 2 ];
    198 var p = [ 0.40, 0.20, 0.20, 0.15, 0.05 ];
    199 
    200 var res = chi2gof( x, p );
    201 
    202 var table = res.toString({
    203     'decision': false
    204 });
    205 /* e.g., returns
    206 
    207     Chi-square goodness-of-fit test
    208 
    209     Null hypothesis: population probabilities are equal to those in p
    210 
    211         pValue: 0.0186
    212         statistic: 9.9901
    213         degrees of freedom: 3
    214 
    215 */
    216 ```
    217 
    218 </section>
    219 
    220 <!-- /.usage -->
    221 
    222 <section class="notes">
    223 
    224 ## Notes
    225 
    226 -   The chi-square approximation may be incorrect if the observed or expected frequencies in each category are too small. Common practice is to require frequencies **greater than** five.
    227 
    228 </section>
    229 
    230 <!-- /.notes -->
    231 
    232 <section class="examples">
    233 
    234 ## Examples
    235 
    236 <!-- eslint no-undef: "error" -->
    237 
    238 ```javascript
    239 var poisson = require( '@stdlib/random/base/poisson' );
    240 var Int32Array = require( '@stdlib/array/int32' );
    241 var chi2gof = require( '@stdlib/stats/chi2gof' );
    242 
    243 var N = 400;
    244 var lambda = 3.0;
    245 var rpois = poisson.factory( lambda );
    246 
    247 // Draw samples from a Poisson distribution:
    248 var x = [];
    249 var i;
    250 for ( i = 0; i < N; i++ ) {
    251     x.push( rpois() );
    252 }
    253 
    254 // Generate a frequency table:
    255 var freqs = new Int32Array( N );
    256 for ( i = 0; i < N; i++ ) {
    257     freqs[ x[ i ] ] += 1;
    258 }
    259 
    260 // Assess whether the simulated values come from a Poisson distribution:
    261 var out = chi2gof( freqs, 'poisson', lambda );
    262 // returns {...}
    263 
    264 console.log( out.toString() );
    265 ```
    266 
    267 </section>
    268 
    269 <!-- /.examples -->
    270 
    271 <section class="links">
    272 
    273 [@stdlib/ndarray/array]: https://www.npmjs.com/package/@stdlib/ndarray-array
    274 
    275 </section>
    276 
    277 <!-- /.links -->