From 6861de3057e1c022fa4ed2fb02fff440522360ea Mon Sep 17 00:00:00 2001 From: Christoph Ruegg Date: Thu, 21 Mar 2013 11:55:56 +0100 Subject: [PATCH] Statistics: introduce unsorted ArrayStatistics --- src/Numerics/Numerics.csproj | 1 + src/Numerics/Statistics/ArrayStatistics.cs | 96 +++++++++++++++++++ src/Portable/Portable.csproj | 3 + .../StatisticsTests/StatisticsTests.cs | 46 ++++++--- 4 files changed, 132 insertions(+), 14 deletions(-) create mode 100644 src/Numerics/Statistics/ArrayStatistics.cs diff --git a/src/Numerics/Numerics.csproj b/src/Numerics/Numerics.csproj index 262f8e2a..1afc0fec 100644 --- a/src/Numerics/Numerics.csproj +++ b/src/Numerics/Numerics.csproj @@ -111,6 +111,7 @@ + diff --git a/src/Numerics/Statistics/ArrayStatistics.cs b/src/Numerics/Statistics/ArrayStatistics.cs new file mode 100644 index 00000000..f9743e61 --- /dev/null +++ b/src/Numerics/Statistics/ArrayStatistics.cs @@ -0,0 +1,96 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2013 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace MathNet.Numerics.Statistics +{ + public static class ArrayStatistics + { + // TODO: Benchmark various options to find out the best approach (-> branch prediction) + // TODO: consider leveraging MKL + + /// + /// Returns the smallest value from the unsorted data array. + /// Returns NaN if data is empty or any entry is NaN. + /// + /// Sample array, no sorting is assumed. + public static double Minimum(double[] data) + { + if (data == null || data.Length == 0) return double.NaN; + + var min = double.PositiveInfinity; + for (int i = 0; i < data.Length; i++) + { + if (data[i] < min || double.IsNaN(data[i])) + { + min = data[i]; + } + } + return min; + } + + /// + /// Returns the smallest value from the unsorted data array. + /// Returns NaN if data is empty or any entry is NaN. + /// + /// Sample array, no sorting is assumed. + public static double Maximum(double[] data) + { + if (data == null || data.Length == 0) return double.NaN; + + var max = double.NegativeInfinity; + for (int i = 0; i < data.Length; i++) + { + if (data[i] > max || double.IsNaN(data[i])) + { + max = data[i]; + } + } + return max; + } + + /// + /// Returns the sample mean from the unsorted data array. + /// Returns NaN if data is empty or any entry is NaN. + /// + /// Sample array, no sorting is assumed. + public static double Mean(double[] data) + { + if (data == null || data.Length == 0) return double.NaN; + + double mean = 0; + ulong m = 0; + for (int i = 0; i < data.Length; i++) + { + mean += (data[i] - mean) / ++m; + } + return mean; + } + } +} \ No newline at end of file diff --git a/src/Portable/Portable.csproj b/src/Portable/Portable.csproj index bf01491b..784a8c47 100644 --- a/src/Portable/Portable.csproj +++ b/src/Portable/Portable.csproj @@ -1020,6 +1020,9 @@ SpecialFunctions\Stability.cs + + Statistics\ArrayStatistics.cs + Statistics\Correlation.cs diff --git a/src/UnitTests/StatisticsTests/StatisticsTests.cs b/src/UnitTests/StatisticsTests/StatisticsTests.cs index 393eb214..1b6f51cf 100644 --- a/src/UnitTests/StatisticsTests/StatisticsTests.cs +++ b/src/UnitTests/StatisticsTests/StatisticsTests.cs @@ -89,7 +89,8 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void Mean(string dataSet) { var data = _data[dataSet]; - AssertHelpers.AlmostEqual(data.Mean, data.Data.Mean(), 15); + AssertHelpers.AlmostEqual(data.Mean, Statistics.Mean(data.Data), 15); + AssertHelpers.AlmostEqual(data.Mean, ArrayStatistics.Mean(data.Data), 15); } /// @@ -107,7 +108,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void NullableMean(string dataSet) { var data = _data[dataSet]; - AssertHelpers.AlmostEqual(data.Mean, data.DataWithNulls.Mean(), 15); + AssertHelpers.AlmostEqual(data.Mean, Statistics.Mean(data.DataWithNulls), 15); } /// @@ -117,7 +118,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void MeanThrowsArgumentNullException() { double[] data = null; - Assert.Throws(() => data.Mean()); + Assert.Throws(() => Statistics.Mean(data)); } /// @@ -136,7 +137,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void StandardDeviation(string dataSet, int digits) { var data = _data[dataSet]; - AssertHelpers.AlmostEqual(data.StandardDeviation, data.Data.StandardDeviation(), digits); + AssertHelpers.AlmostEqual(data.StandardDeviation, Statistics.StandardDeviation(data.Data), digits); } /// @@ -155,7 +156,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void NullableStandardDeviation(string dataSet, int digits) { var data = _data[dataSet]; - AssertHelpers.AlmostEqual(data.StandardDeviation, data.DataWithNulls.StandardDeviation(), digits); + AssertHelpers.AlmostEqual(data.StandardDeviation, Statistics.StandardDeviation(data.DataWithNulls), digits); } /// @@ -165,7 +166,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void StandardDeviationThrowsArgumentNullException() { double[] data = null; - Assert.Throws(() => data.StandardDeviation()); + Assert.Throws(() => Statistics.StandardDeviation(data)); } /// @@ -175,8 +176,16 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void ShortMinMax() { var samples = new[] {-1.0, 5, 0, -3, 10, -0.5, 4}; - Assert.That(samples.Minimum(), Is.EqualTo(-3), "Min"); - Assert.That(samples.Maximum(), Is.EqualTo(10), "Max"); + Assert.That(Statistics.Minimum(samples), Is.EqualTo(-3), "Min"); + Assert.That(Statistics.Maximum(samples), Is.EqualTo(10), "Max"); + Assert.That(ArrayStatistics.Minimum(samples), Is.EqualTo(-3), "Min"); + Assert.That(ArrayStatistics.Maximum(samples), Is.EqualTo(10), "Max"); + + var sorted = new double[samples.Length]; + Array.Copy(samples, sorted, samples.Length); + Array.Sort(sorted); + Assert.That(SortedArrayStatistics.Minimum(sorted), Is.EqualTo(-3), "Min"); + Assert.That(SortedArrayStatistics.Maximum(sorted), Is.EqualTo(10), "Max"); } /// @@ -187,11 +196,16 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests { // -3 -1 -0.5 0 1 4 5 6 10 var samples = new[] {-1, 5, 0, -3, 10, -0.5, 4, 1, 6}; - Assert.That(samples.Median(), Is.EqualTo(1), "Median"); + Assert.That(Statistics.Median(samples), Is.EqualTo(1), "Median"); Assert.That(Statistics.OrderStatistic(samples, 1), Is.EqualTo(-3), "Order-1"); Assert.That(Statistics.OrderStatistic(samples, 3), Is.EqualTo(-0.5), "Order-3"); Assert.That(Statistics.OrderStatistic(samples, 7), Is.EqualTo(5), "Order-7"); Assert.That(Statistics.OrderStatistic(samples, 9), Is.EqualTo(10), "Order-9"); + + var sorted = new double[samples.Length]; + Array.Copy(samples, sorted, samples.Length); + Array.Sort(sorted); + Assert.That(SortedArrayStatistics.Median(sorted), Is.EqualTo(1), "Median"); } /// @@ -208,9 +222,9 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests RandomSource = new Numerics.Random.MersenneTwister(100) }; - AssertHelpers.AlmostEqual(1e+9, gaussian.Samples().Take(10000).Mean(), 11); - AssertHelpers.AlmostEqual(4d, gaussian.Samples().Take(10000).Variance(), 1); - AssertHelpers.AlmostEqual(2d, gaussian.Samples().Take(10000).StandardDeviation(), 2); + AssertHelpers.AlmostEqual(1e+9, Statistics.Mean(gaussian.Samples().Take(10000)), 11); + AssertHelpers.AlmostEqual(4d, Statistics.Variance(gaussian.Samples().Take(10000)), 1); + AssertHelpers.AlmostEqual(2d, Statistics.StandardDeviation(gaussian.Samples().Take(10000)), 2); } /// @@ -219,8 +233,12 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests [Test] public void Median_CodeplexIssue5667() { - var seq = File.ReadLines("./data/Codeplex-5667.csv").Select(s => double.Parse(s)); - Assert.AreEqual(1.0, seq.Median()); + var seq = File.ReadLines("./data/Codeplex-5667.csv").Select(double.Parse); + Assert.AreEqual(1.0, Statistics.Median(seq)); + + var sorted = seq.ToArray(); + Array.Sort(sorted); + Assert.AreEqual(1.0, SortedArrayStatistics.Median(sorted)); } } #endif