// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // http://mathnetnumerics.codeplex.com // // Copyright (c) 2009-2010 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // namespace MathNet.Numerics.Statistics { using System; using System.Collections.Generic; /// /// Computes the basic statistics of data set. The class meets the /// NIST standard of accuracy for mean, variance, and standard deviation /// (the only statistics they provide exact values for) and exceeds them /// in increased accuracy mode. /// public class DescriptiveStatistics { /// /// Initializes a new instance of the class. /// /// The sample data. public DescriptiveStatistics(IEnumerable data) : this(data, false) { } /// /// Initializes a new instance of the class. /// /// The sample data. public DescriptiveStatistics(IEnumerable data) : this(data, false) { } /// /// Initializes a new instance of the class. /// /// The sample data. /// /// If set to true, increased accuracy mode used. /// Increased accuracy mode uses types for internal calculations. /// /// /// Don't use increased accuracy for data sets containing large values (in absolute value). /// This may cause the calculations to overflow. /// public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy) { if (data == null) { throw new ArgumentNullException("data"); } if (increasedAccuracy) { ComputeHA(data); } else { Compute(data); } Median = data.Median(); } /// /// Initializes a new instance of the class. /// /// The sample data. /// /// If set to true, increased accuracy mode used. /// Increased accuracy mode uses types for internal calculations. /// /// /// Don't use increased accuracy for data sets containing large values (in absolute value). /// This may cause the calculations to overflow. /// public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy) { if (data == null) { throw new ArgumentNullException("data"); } if (increasedAccuracy) { ComputeHA(data); } else { Compute(data); } Median = data.Median(); } /// /// Gets the size of the sample. /// /// The size of the sample. public int Count { get; private set; } /// /// Gets the sample mean. /// /// The sample mean. public double Mean { get; private set; } /// /// Gets the sample variance. /// /// The sample variance. public double Variance { get; private set; } /// /// Gets the sample standard deviation. /// /// The sample standard deviation. public double StandardDeviation { get; private set; } /// /// Gets the sample skewness. /// /// The sample skewness. /// Returns zero if is less than three. public double Skewness { get; private set; } /// /// Gets the sample median. /// /// The sample median. public double Median { get; private set; } /// /// Gets the sample kurtosis. /// /// The sample kurtosis. /// Returns zero if is less than four. public double Kurtosis { get; private set; } /// /// Gets the maximum sample value. /// /// The maximum sample value. public double Maximum { get; private set; } /// /// Gets the minimum sample value. /// /// The minimum sample value. public double Minimum { get; private set; } /// /// Computes descriptive statistics from a stream of data values. /// /// A sequence of datapoints. private void Compute(IEnumerable data) { double mean = 0; double variance = 0; double skewness = 0; double kurtosis = 0; double minimum = Double.PositiveInfinity; double maximum = Double.NegativeInfinity; int n = 0; foreach (var xi in data) { double delta = xi - mean; double scaleDelta = delta / ++n; double scaleDeltaSQR = scaleDelta * scaleDelta; double tmpDelta = delta * (n - 1); mean += scaleDelta; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3) + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness; skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance; variance += tmpDelta * scaleDelta; if (minimum > xi) { minimum = xi; } if (maximum < xi) { maximum = xi; } } SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n); } /// /// Computes descriptive statistics from a stream of nullable data values. /// /// A sequence of datapoints. private void Compute(IEnumerable data) { double mean = 0; double variance = 0; double skewness = 0; double kurtosis = 0; double minimum = Double.PositiveInfinity; double maximum = Double.NegativeInfinity; int n = 0; foreach (var xi in data) { if (xi.HasValue) { double delta = xi.Value - mean; double scaleDelta = delta / ++n; double scaleDeltaSQR = scaleDelta * scaleDelta; double tmpDelta = delta * (n - 1); mean += scaleDelta; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3) + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness; skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance; variance += tmpDelta * scaleDelta; if (minimum > xi) { minimum = xi.Value; } if (maximum < xi) { maximum = xi.Value; } } } SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n); } /// /// Computes descriptive statistics from a stream of data values using high accuracy. /// /// A sequence of datapoints. private void ComputeHA(IEnumerable data) { decimal mean = 0; decimal variance = 0; decimal skewness = 0; decimal kurtosis = 0; decimal minimum = Decimal.MaxValue; decimal maximum = Decimal.MinValue; int n = 0; foreach (decimal xi in data) { decimal delta = xi - mean; decimal scaleDelta = delta / ++n; decimal scaleDeltaSQR = scaleDelta * scaleDelta; decimal tmpDelta = delta * (n - 1); mean += scaleDelta; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3) + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness; skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance; variance += tmpDelta * scaleDelta; if (minimum > xi) { minimum = xi; } if (maximum < xi) { maximum = xi; } } SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n); } /// /// Computes descriptive statistics from a stream of nullable data values using high accuracy. /// /// A sequence of datapoints. private void ComputeHA(IEnumerable data) { decimal mean = 0; decimal variance = 0; decimal skewness = 0; decimal kurtosis = 0; decimal minimum = Decimal.MaxValue; decimal maximum = Decimal.MinValue; int n = 0; foreach (decimal? xi in data) { if (xi.HasValue) { decimal delta = xi.Value - mean; decimal scaleDelta = delta / ++n; decimal scaleDeltaSQR = scaleDelta * scaleDelta; decimal tmpDelta = delta * (n - 1); mean += scaleDelta; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3) + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness; skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance; variance += tmpDelta * scaleDelta; if (minimum > xi) { minimum = xi.Value; } if (maximum < xi) { maximum = xi.Value; } } } SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n); } /// /// Internal use. Method use for setting the statistics. /// /// For setting Mean. /// For setting Variance. /// For setting Skewness. /// For setting Kurtosis. /// For setting Minimum. /// For setting Maximum. /// For setting Count. private void SetStatistics(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, int n) { Mean = mean; Count = n; if (n > 0) { Minimum = minimum; Maximum = maximum; if (n > 1) { Variance = variance / (n - 1); StandardDeviation = Math.Sqrt(Variance); } if (Variance != 0) { if (n > 2) { Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation)); } if (n > 3) { Kurtosis = (((double)n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * (kurtosis / (Variance * Variance))) - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3))); } } } } } }