//
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
// http://mathnetnumerics.codeplex.com
//
// Copyright (c) 2009-2010 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
namespace MathNet.Numerics.Statistics
{
using System.Collections.Generic;
///
/// Computes the basic statistics of data set. The class meets the
/// NIST standard of accuracy for mean, variance, and standard deviation
/// (the only statistics they provide exact values for) and exceeds them
/// in increased accuracy mode.
///
public class DescriptiveStatistics
{
///
/// Initializes a new instance of the class.
///
/// The sample data.
public DescriptiveStatistics(IEnumerable data) : this(data, false)
{
}
///
/// Initializes a new instance of the class.
///
/// The sample data.
public DescriptiveStatistics(IEnumerable data) : this(data, false)
{
}
///
/// Initializes a new instance of the class.
///
/// The sample data.
///
/// If set to true, increased accuracy mode used.
/// Increased accuracy mode uses types for internal calculations.
///
///
/// Don't use increased accuracy for data sets containing large values (in absolute value).
/// This may cause the calculations to overflow.
///
public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy)
{
if (increasedAccuracy)
{
ComputeHA(data);
}
else
{
Compute(data);
}
Median = data.Median();
Maximum = data.Maximum();
Minimum = data.Minimum();
}
///
/// Initializes a new instance of the class.
///
/// The sample data.
///
/// If set to true, increased accuracy mode used.
/// Increased accuracy mode uses types for internal calculations.
///
///
/// Don't use increased accuracy for data sets containing large values (in absolute value).
/// This may cause the calculations to overflow.
///
public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy)
{
if (increasedAccuracy)
{
ComputeHA(data);
}
else
{
Compute(data);
}
Median = data.Median();
Maximum = data.Maximum();
Minimum = data.Minimum();
}
///
/// Gets the size of the sample.
///
/// The size of the sample.
public int Count { get; private set; }
///
/// Gets the sample mean.
///
/// The sample mean.
public double Mean { get; private set; }
///
/// Gets the sample variance.
///
/// The sample variance.
public double Variance { get; private set; }
///
/// Gets the sample standard deviation.
///
/// The sample standard deviation.
public double StandardDeviation { get; private set; }
///
/// Gets the sample skewness.
///
/// The sample skewness.
/// Returns zero if is less than three.
public double Skewness { get; private set; }
///
/// Gets the sample median.
///
/// The sample median.
public double Median { get; private set; }
///
/// Gets the sample kurtosis.
///
/// The sample kurtosis.
/// Returns zero if is less than four.
public double Kurtosis { get; private set; }
///
/// Gets the maximum sample value.
///
/// The maximum sample value.
public double Maximum { get; private set; }
///
/// Gets the minimum sample value.
///
/// The minimum sample value.
public double Minimum { get; private set; }
///
/// Computes descriptive statistics from a stream of data values.
///
/// A sequence of datapoints.
private void Compute(IEnumerable data)
{
Mean = data.Mean();
double variance = 0;
double correction = 0;
double skewness = 0;
double kurtosis = 0;
int n = 0;
foreach (var xi in data)
{
double diff = xi - Mean;
correction += diff;
double tmp = diff * diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
n++;
}
Count = n;
Variance = (variance - (correction * correction / n)) / (n - 1);
StandardDeviation = System.Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
///
/// Computes descriptive statistics from a stream of nullable data values.
///
/// A sequence of datapoints.
private void Compute(IEnumerable data)
{
Mean = data.Mean();
double variance = 0;
double correction = 0;
double skewness = 0;
double kurtosis = 0;
int n = 0;
foreach (var xi in data)
{
if (xi.HasValue)
{
double diff = xi.Value - Mean;
double tmp = diff * diff;
correction += diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
n++;
}
}
Count = n;
if (n > 0)
{
Variance = (variance - (correction * correction / n)) / (n - 1);
StandardDeviation = System.Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
}
///
/// Computes descriptive statistics from a stream of data values using high accuracy.
///
/// A sequence of datapoints.
private void ComputeHA(IEnumerable data)
{
Mean = data.Mean();
decimal mean = (decimal)Mean;
decimal variance = 0;
decimal correction = 0;
decimal skewness = 0;
decimal kurtosis = 0;
int n = 0;
foreach (decimal xi in data)
{
decimal diff = xi - mean;
decimal tmp = diff * diff;
correction += diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
n++;
}
Count = n;
Variance = (double)(variance - (correction * correction / n)) / (n - 1);
StandardDeviation = System.Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* ((double)kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
///
/// Computes descriptive statistics from a stream of nullable data values using high accuracy.
///
/// A sequence of datapoints.
private void ComputeHA(IEnumerable data)
{
Mean = data.Mean();
decimal mean = (decimal)Mean;
decimal variance = 0;
decimal correction = 0;
decimal skewness = 0;
decimal kurtosis = 0;
int n = 0;
foreach (decimal? xi in data)
{
if (xi.HasValue)
{
decimal diff = xi.Value - mean;
decimal tmp = diff * diff;
correction += diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
n++;
}
}
Count = n;
if (n > 0)
{
Variance = (double)(variance - (correction * correction / n)) / (n - 1);
StandardDeviation = System.Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* ((double)kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
}
}
}