@ -1,11 +1,11 @@
(*** hide ** *)
#I "../../out/lib/net40"
#r "MathNet.Numerics.dll"
#r "MathNet.Numerics.FSharp.dll"
open MathNet .Numerics
open MathNet.Numerics.Statistics
(**
[hide]
#I "../../out/lib/net40"
#r "MathNet.Numerics.dll"
#r "MathNet.Numerics.FSharp.dll"
open System .Numerics
open MathNet.Numerics
open MathNet.Numerics.Statistics
Descriptive Statistics
======================
@ -84,18 +84,17 @@ The mean is affected by outliers, so if you need a more robust estimate consider
$$$
\overline{x} = \frac{1}{N}\sum_{i=1}^N x_i
*)
let whiteNoise = Generate.Gaussian(1000, mean=10.0, standardDeviation=2.0)
// [fsi:val samples : float [] = [|12.90021939; 9.631515037; 7.810008046; 14.13301053; ...|] ]
Statistics.Mean whiteNoise
// [fsi:val it : float = 10.02162347]
[lang=fsharp]
let whiteNoise = Generate.Normal(1000, mean=10.0, standardDeviation=2.0)
// [fsi:val samples : float [] = [|12.90021939; 9.631515037; 7.810008046; 14.13301053; ...|] ]
Statistics.Mean whiteNoise
// [fsi:val it : float = 10.02162347]
let wave = Generate.Sinusoidal(1000, samplingRate=100., frequency=5., amplitude=0.5)
Statistics.Mean wave
// [fsi:val it : float = -4.133520783e-17]
let wave = Generate.Sinusoidal(1000, samplingRate=100., frequency=5., amplitude=0.5)
Statistics.Mean wave
// [fsi:val it : float = -4.133520783e-17]
(**
Variance and Standard Deviation
-------------------------------
@ -119,17 +118,16 @@ Bessel's correction with an $N-1$ normalizer to a sample set of size $N$.
$$$
s^2 = \frac{1}{N-1}\sum_{i=1}^N (x_i - \overline{x})^2
*)
Statistics.Variance whiteNoise
// [fsi:val it : float = 3.819436094]
Statistics.StandardDeviation whiteNoise
// [fsi:val it : float = 1.954337764]
[lang=fsharp]
Statistics.Variance whiteNoise
// [fsi:val it : float = 3.819436094]
Statistics.StandardDeviation whiteNoise
// [fsi:val it : float = 1.954337764]
Statistics.Variance wave
// [fsi:val it : float = 0.1251251251]
Statistics.Variance wave
// [fsi:val it : float = 0.1251251251]
(**
#### Combined Routines
Since mean and variance are often needed together, there are routines
@ -138,12 +136,11 @@ that evaluate both in a single pass:
`Statistics.MeanVariance(samples)`
`ArrayStatistics.MeanVariance(samples)`
`StreamingStatistics.MeanVariance(samples)`
*)
Statistics.MeanVariance whiteNoise
// [fsi:val it : float * float = (10.02162347, 3.819436094)]
[lang=fsharp]
Statistics.MeanVariance whiteNoise
// [fsi:val it : float * float = (10.02162347, 3.819436094)]
(**
Covariance
----------
@ -160,14 +157,13 @@ q = \frac{1}{N-1}\sum_{i=1}^N (x_i - \overline{x})(y_i - \overline{y})
$$$
q = \frac{1}{N}\sum_{i=1}^N (x_i - \mu_x)(y_i - \mu_y)
*)
Statistics.Covariance(whiteNoise, whiteNoise)
// [fsi:val it : float = 3.819436094]
Statistics.Covariance(whiteNoise, wave)
// [fsi:val it : float = 0.04397985084]
[lang=fsharp]
Statistics.Covariance(whiteNoise, whiteNoise)
// [fsi:val it : float = 3.819436094]
Statistics.Covariance(whiteNoise, wave)
// [fsi:val it : float = 0.04397985084]
(**
Order Statistics
----------------
@ -198,22 +194,21 @@ provided data and then on each invocation uses efficient sorted algorithms:
Such Inplace and Func variants are a common pattern throughout the Statistics class
and also the rest of the library.
*)
Statistics.OrderStatistic(whiteNoise, 1)
// [fsi:val it : float = 3.633070184]
Statistics.OrderStatistic(whiteNoise, 1000)
// [fsi:val it : float = 16.65183566]
let os = Statistics.orderStatisticFunc whiteNoise
os 250
// [fsi:val it : float = 8.645491746]
os 500
// [fsi:val it : float = 10.11872428]
os 750
// [fsi:val it : float = 11.33170746]
(**
[lang=fsharp]
Statistics.OrderStatistic(whiteNoise, 1)
// [fsi:val it : float = 3.633070184]
Statistics.OrderStatistic(whiteNoise, 1000)
// [fsi:val it : float = 16.65183566]
let os = Statistics.orderStatisticFunc whiteNoise
os 250
// [fsi:val it : float = 8.645491746]
os 500
// [fsi:val it : float = 10.11872428]
os 750
// [fsi:val it : float = 11.33170746]
#### Median
Median is a robust indicator of central tendency and much less affected by outliers
@ -228,14 +223,13 @@ The median is only unique if the sample size is odd. This implementation interna
uses the default quantile definition, which is equivalent to mode 8 in R and is approximately
median-unbiased regardless of the sample distribution. If you need another convention, use
`QuantileCustom` instead, see below for details.
*)
Statistics.Median whiteNoise
// [fsi:val it : float = 10.11872428]
Statistics.Median wave
// [fsi:val it : float = -2.452600839e-16]
[lang=fsharp]
Statistics.Median whiteNoise
// [fsi:val it : float = 10.11872428]
Statistics.Median wave
// [fsi:val it : float = -2.452600839e-16]
(**
#### Quartiles and the 5-number summary
Quartiles group the ascendingly sorted data into four equal groups, where each
@ -250,14 +244,13 @@ estimates the median as discussed above.
`SortedArrayStatistics.UpperQuartile(data)`
`ArrayStatistics.LowerQuartileInplace(data)`
`ArrayStatistics.UpperQuartileInplace(data)`
*)
Statistics.LowerQuartile whiteNoise
// [fsi:val it : float = 8.645491746]
Statistics.UpperQuartile whiteNoise
// [fsi:val it : float = 11.33213732]
[lang=fsharp]
Statistics.LowerQuartile whiteNoise
// [fsi:val it : float = 8.645491746]
Statistics.UpperQuartile whiteNoise
// [fsi:val it : float = 11.33213732]
(**
Using that data we can provide a useful set of indicators usually named 5-number summary,
which consists of the minimum value, the lower quartile, the median, the upper quartile and
the maximum value. All these values can be visualized in the popular box plot diagrams.
@ -265,14 +258,13 @@ the maximum value. All these values can be visualized in the popular box plot di
`Statistics.FiveNumberSummary(data)`
`SortedArrayStatistics.FiveNumberSummary(data)`
`ArrayStatistics.FiveNumberSummaryInplace(data)`
*)
Statistics.FiveNumberSummary whiteNoise
// [fsi:val it : float [] = [|3.633070184; 8.645937823; 10.12165054; 11.33213732; 16.65183566|] ]
Statistics.FiveNumberSummary wave
// [fsi:val it : float [] = [|-0.5; -0.3584185509; -2.452600839e-16; 0.3584185509; 0.5|] ]
[lang=fsharp]
Statistics.FiveNumberSummary whiteNoise
// [fsi:val it : float [] = [|3.633070184; 8.645937823; 10.12165054; 11.33213732; 16.65183566|] ]
Statistics.FiveNumberSummary wave
// [fsi:val it : float [] = [|-0.5; -0.3584185509; -2.452600839e-16; 0.3584185509; 0.5|] ]
(**
The difference between the upper and the lower quartile is called inter-quartile range (IQR)
and is a robust indicator of spread. In box plots the IQR is the total height of the box.
@ -281,12 +273,11 @@ and is a robust indicator of spread. In box plots the IQR is the total height of
`ArrayStatistics.InterquartileRangeInplace(data)`
Just like median, quartiles use the default R8 quantile definition internally.
*)
Statistics.InterquartileRange whiteNoise
// [fsi:val it : float = 2.686199498]
[lang=fsharp]
Statistics.InterquartileRange whiteNoise
// [fsi:val it : float = 2.686199498]
(**
#### Percentiles
Percentiles extend the concept further by grouping the sorted values into 100
@ -300,14 +291,13 @@ The 0-percentile represents the minimum value, 25 the first quartile, 50 the med
`ArrayStatistics.PercentileInplace(data, p)`
Just like median, percentiles use the default R8 quantile definition internally.
*)
Statistics.Percentile(whiteNoise, 5)
// [fsi:val it : float = 6.693373507]
Statistics.Percentile(whiteNoise, 98)
// [fsi:val it : float = 13.97580653]
[lang=fsharp]
Statistics.Percentile(whiteNoise, 5)
// [fsi:val it : float = 6.693373507]
Statistics.Percentile(whiteNoise, 98)
// [fsi:val it : float = 13.97580653]
(**
#### Quantiles
Instead of grouping into 4 or 100 boxes, quantiles generalize the concept to an infinite number
@ -319,12 +309,11 @@ the inverse cumulative distribution function of the sample distribution.
`Statistics.QuantileFunc(data)`
`SortedArrayStatistics.Quantile(data, tau)`
`ArrayStatistics.QuantileInplace(data, tau)`
*)
Statistics.Quantile(whiteNoise, 0.98)
// [fsi:val it : float = 13.97580653]
[lang=fsharp]
Statistics.Quantile(whiteNoise, 0.98)
// [fsi:val it : float = 13.97580653]
(**
#### Quantile Conventions and Compatibility
Remember that all these descriptive statistics do not *compute* but merely *estimate*
@ -354,14 +343,13 @@ The `QuantileDefinition` enumeration has the following options:
* **R7** , Excel, Mode, S
* **R8** , Median, Default
* **R9** , Normal
*)
Statistics.QuantileCustom(whiteNoise, 0.98, QuantileDefinition.R3)
// [fsi:val it : float = 13.97113209]
Statistics.QuantileCustom(whiteNoise, 0.98, QuantileDefinition.Excel)
// [fsi:val it : float = 13.97127374]
[lang=fsharp]
Statistics.QuantileCustom(whiteNoise, 0.98, QuantileDefinition.R3)
// [fsi:val it : float = 13.97113209]
Statistics.QuantileCustom(whiteNoise, 0.98, QuantileDefinition.Excel)
// [fsi:val it : float = 13.97127374]
(**
Rank Statistics
---------------
@ -381,16 +369,15 @@ Similar to `QuantileDefinition`, the `RankDefinition` enumeration controls how t
`Statistics.Ranks(data, definition)`
`SortedArrayStatistics.Ranks(data, definition)`
`ArrayStatistics.RanksInplace(data, definition)`
*)
Statistics.Ranks(whiteNoise)
// [fsi:val it : float [] = [|634.0; 736.0; 405.0; 395.0; 197.0; 167.0; 722.0; 44.0; ...|] ]
Statistics.Ranks([| 13.0; 14.0; 11.0; 12.0; 13.0 |], RankDefinition.Average)
// [fsi:val it : float [] = [|3.5; 5.0; 1.0; 2.0; 3.5|] ]
Statistics.Ranks([| 13.0; 14.0; 11.0; 12.0; 13.0 |], RankDefinition.Sports)
// [fsi:val it : float [] = [|3.0; 5.0; 1.0; 2.0; 3.0|] ]
[lang=fsharp]
Statistics.Ranks(whiteNoise)
// [fsi:val it : float [] = [|634.0; 736.0; 405.0; 395.0; 197.0; 167.0; 722.0; 44.0; ...|] ]
Statistics.Ranks([| 13.0; 14.0; 11.0; 12.0; 13.0 |], RankDefinition.Average)
// [fsi:val it : float [] = [|3.5; 5.0; 1.0; 2.0; 3.5|] ]
Statistics.Ranks([| 13.0; 14.0; 11.0; 12.0; 13.0 |], RankDefinition.Sports)
// [fsi:val it : float [] = [|3.0; 5.0; 1.0; 2.0; 3.0|] ]
(**
#### Quantile Rank
Counterpart of the `Quantile` function, estimates $\tau$ of the provided $\tau$-quantile value
@ -400,14 +387,13 @@ function crosses $\tau$.
`Statistics.QuantileRank(data, x, definition)`
`Statistics.QuantileRankFunc(data, definition)`
`SortedArrayStatistics.QuantileRank(data, x, definition)`
*)
Statistics.QuantileRank(whiteNoise, 13.0)
// [fsi:val it : float = 0.9370045563]
Statistics.QuantileRank(whiteNoise, 6.7, RankDefinition.Average)
// [fsi:val it : float = 0.04960610389]
[lang=fsharp]
Statistics.QuantileRank(whiteNoise, 13.0)
// [fsi:val it : float = 0.9370045563]
Statistics.QuantileRank(whiteNoise, 6.7, RankDefinition.Average)
// [fsi:val it : float = 0.04960610389]
(**
Empirical Distribution Functions
--------------------------------
@ -416,23 +402,22 @@ Empirical Distribution Functions
`Statistics.EmpiricalInvCDF(data, tau)`
`Statistics.EmpiricalInvCDFFunc(data)`
`SortedArrayStatistics.EmpiricalCDF(data, x)`
*)
let ecdf = Statistics.EmpiricalCDFFunc whiteNoise
Generate.LinearSpacedMap(20, start=3.0, stop=17.0, map=ecdf)
// [fsi:val it : float [] =]
// [fsi: [|0.0; 0.001; 0.002; 0.005; 0.022; 0.05; 0.094; 0.172; 0.278; 0.423; 0.555; ]
// [fsi: 0.705; 0.843; 0.921; 0.944; 0.983; 0.992; 0.997; 0.999; 1.0|] ]
let eicdf = Statistics.empiricalInvCDFFunc whiteNoise
[ for tau in 0.0..0.05..1.0 -> eicdf tau ]
// [fsi:val it : float [] =]
// [fsi: [3.633070184; 6.682142043; 7.520000817; 8.040513497; 8.347587493; ]
// [fsi: 8.645491746; 9.02681611; 9.298987151; 9.522627142; 9.819352699; 10.11872428; ]
// [fsi: 10.35991046; 10.57530906; 10.8259542; 11.08605473; 11.33170746; 11.54356436; ]
// [fsi: 11.90973541; 12.4294346; 13.36889423; 16.65183566] ]
(**
[lang=fsharp]
let ecdf = Statistics.EmpiricalCDFFunc whiteNoise
Generate.LinearSpacedMap(20, start=3.0, stop=17.0, map=ecdf)
// [fsi:val it : float [] =]
// [fsi: [|0.0; 0.001; 0.002; 0.005; 0.022; 0.05; 0.094; 0.172; 0.278; 0.423; 0.555; ]
// [fsi: 0.705; 0.843; 0.921; 0.944; 0.983; 0.992; 0.997; 0.999; 1.0|] ]
let eicdf = Statistics.empiricalInvCDFFunc whiteNoise
[ for tau in 0.0..0.05..1.0 -> eicdf tau ]
// [fsi:val it : float [] =]
// [fsi: [3.633070184; 6.682142043; 7.520000817; 8.040513497; 8.347587493; ]
// [fsi: 8.645491746; 9.02681611; 9.298987151; 9.522627142; 9.819352699; 10.11872428; ]
// [fsi: 10.35991046; 10.57530906; 10.8259542; 11.08605473; 11.33170746; 11.54356436; ]
// [fsi: 11.90973541; 12.4294346; 13.36889423; 16.65183566] ]
Histograms
----------
@ -459,4 +444,3 @@ Code Sample: Computing the correlation coefficient of 1000 samples of f(x) = 2x
double[] dataF = Generate.LinearSpacedMap(1000, 0, 100, x => 2*x);
double[] dataG = Generate.LinearSpacedMap(1000, 0, 100, x => x*x);
double correlation = Correlation.Pearson(dataF, dataG);
*)