// // Math.NET Numerics, part of the Math.NET Project // http://numerics.mathdotnet.com // http://github.com/mathnet/mathnet-numerics // http://mathnetnumerics.codeplex.com // // Copyright (c) 2009-2010 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // namespace MathNet.Numerics.Statistics { using System.Collections.Generic; /// /// Computes the basic statistics of data set. The class meets the /// NIST standard of accuracy for mean, variance, and standard deviation /// (the only statistics they provide exact values for) and exceeds them /// in increased accuracy mode. /// public class DescriptiveStatistics { /// /// Initializes a new instance of the class. /// /// The sample data. public DescriptiveStatistics(IEnumerable data) : this(data, false) { } /// /// Initializes a new instance of the class. /// /// The sample data. public DescriptiveStatistics(IEnumerable data) : this(data, false) { } /// /// Initializes a new instance of the class. /// /// The sample data. /// /// If set to true, increased accuracy mode used. /// Increased accuracy mode uses types for internal calculations. /// /// /// Don't use increased accuracy for data sets containing large values (in absolute value). /// This may cause the calculations to overflow. /// public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy) { if (increasedAccuracy) { ComputeHA(data); } else { Compute(data); } Median = data.Median(); Maximum = data.Maximum(); Minimum = data.Minimum(); } /// /// Initializes a new instance of the class. /// /// The sample data. /// /// If set to true, increased accuracy mode used. /// Increased accuracy mode uses types for internal calculations. /// /// /// Don't use increased accuracy for data sets containing large values (in absolute value). /// This may cause the calculations to overflow. /// public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy) { if (increasedAccuracy) { ComputeHA(data); } else { Compute(data); } Median = data.Median(); Maximum = data.Maximum(); Minimum = data.Minimum(); } /// /// Gets the size of the sample. /// /// The size of the sample. public int Count { get; private set; } /// /// Gets the sample mean. /// /// The sample mean. public double Mean { get; private set; } /// /// Gets the sample variance. /// /// The sample variance. public double Variance { get; private set; } /// /// Gets the sample standard deviation. /// /// The sample standard deviation. public double StandardDeviation { get; private set; } /// /// Gets the sample skewness. /// /// The sample skewness. /// Returns zero if is less than three. public double Skewness { get; private set; } /// /// Gets the sample median. /// /// The sample median. public double Median { get; private set; } /// /// Gets the sample kurtosis. /// /// The sample kurtosis. /// Returns zero if is less than four. public double Kurtosis { get; private set; } /// /// Gets the maximum sample value. /// /// The maximum sample value. public double Maximum { get; private set; } /// /// Gets the minimum sample value. /// /// The minimum sample value. public double Minimum { get; private set; } /// /// Computes descriptive statistics from a stream of data values. /// /// A sequence of datapoints. private void Compute(IEnumerable data) { Mean = data.Mean(); double variance = 0; double correction = 0; double skewness = 0; double kurtosis = 0; int n = 0; foreach (var xi in data) { double diff = xi - Mean; correction += diff; double tmp = diff * diff; variance += tmp; tmp *= diff; skewness += tmp; tmp *= diff; kurtosis += tmp; n++; } Count = n; Variance = (variance - (correction * correction / n)) / (n - 1); StandardDeviation = System.Math.Sqrt(Variance); if (Variance != 0) { if (n > 2) { Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation)); } if (n > 3) { Kurtosis = (((double)n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * (kurtosis / (Variance * Variance))) - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3))); } } } /// /// Computes descriptive statistics from a stream of nullable data values. /// /// A sequence of datapoints. private void Compute(IEnumerable data) { Mean = data.Mean(); double variance = 0; double correction = 0; double skewness = 0; double kurtosis = 0; int n = 0; foreach (var xi in data) { if (xi.HasValue) { double diff = xi.Value - Mean; double tmp = diff * diff; correction += diff; variance += tmp; tmp *= diff; skewness += tmp; tmp *= diff; kurtosis += tmp; n++; } } Count = n; if (n > 0) { Variance = (variance - (correction * correction / n)) / (n - 1); StandardDeviation = System.Math.Sqrt(Variance); if (Variance != 0) { if (n > 2) { Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation)); } if (n > 3) { Kurtosis = (((double)n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * (kurtosis / (Variance * Variance))) - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3))); } } } } /// /// Computes descriptive statistics from a stream of data values using high accuracy. /// /// A sequence of datapoints. private void ComputeHA(IEnumerable data) { Mean = data.Mean(); decimal mean = (decimal)Mean; decimal variance = 0; decimal correction = 0; decimal skewness = 0; decimal kurtosis = 0; int n = 0; foreach (decimal xi in data) { decimal diff = xi - mean; decimal tmp = diff * diff; correction += diff; variance += tmp; tmp *= diff; skewness += tmp; tmp *= diff; kurtosis += tmp; n++; } Count = n; Variance = (double)(variance - (correction * correction / n)) / (n - 1); StandardDeviation = System.Math.Sqrt(Variance); if (Variance != 0) { if (n > 2) { Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation)); } if (n > 3) { Kurtosis = (((double)n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * ((double)kurtosis / (Variance * Variance))) - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3))); } } } /// /// Computes descriptive statistics from a stream of nullable data values using high accuracy. /// /// A sequence of datapoints. private void ComputeHA(IEnumerable data) { Mean = data.Mean(); decimal mean = (decimal)Mean; decimal variance = 0; decimal correction = 0; decimal skewness = 0; decimal kurtosis = 0; int n = 0; foreach (decimal? xi in data) { if (xi.HasValue) { decimal diff = xi.Value - mean; decimal tmp = diff * diff; correction += diff; variance += tmp; tmp *= diff; skewness += tmp; tmp *= diff; kurtosis += tmp; n++; } } Count = n; if (n > 0) { Variance = (double)(variance - (correction * correction / n)) / (n - 1); StandardDeviation = System.Math.Sqrt(Variance); if (Variance != 0) { if (n > 2) { Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation)); } if (n > 3) { Kurtosis = (((double)n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * ((double)kurtosis / (Variance * Variance))) - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3))); } } } } } }