From 37d02b57437911bebd2c9031e0edff476385e7ba Mon Sep 17 00:00:00 2001 From: Jurgen Van Gael Date: Wed, 29 Jul 2009 00:08:17 +0200 Subject: [PATCH] Fixed a bug with the order finding method. Lowered the accuracy of StdDev computation on the numacc2 test to 13 significant digits. --- .../Managed.UnitTests.csproj | 3 + .../DescriptiveStatisticsTests.cs | 212 +++++++ .../StatisticsTests/StatTestData.cs | 96 +++ .../StatisticsTests/StatisticsTests.cs | 138 +++++ src/Managed/Managed.csproj | 2 + src/Managed/Properties/Resources.Designer.cs | 9 + src/Managed/Properties/Resources.resx | 3 + src/Managed/Sorting.cs | 9 +- .../Statistics/DescriptiveStatistics.cs | 343 +++++++++++ src/Managed/Statistics/Statistics.cs | 570 ++++++++++++++++++ src/Native.UnitTests/Native.UnitTests.csproj | 9 + src/Native/Native.csproj | 6 + 12 files changed, 1397 insertions(+), 3 deletions(-) create mode 100644 src/Managed.UnitTests/StatisticsTests/DescriptiveStatisticsTests.cs create mode 100644 src/Managed.UnitTests/StatisticsTests/StatTestData.cs create mode 100644 src/Managed.UnitTests/StatisticsTests/StatisticsTests.cs create mode 100644 src/Managed/Statistics/DescriptiveStatistics.cs create mode 100644 src/Managed/Statistics/Statistics.cs diff --git a/src/Managed.UnitTests/Managed.UnitTests.csproj b/src/Managed.UnitTests/Managed.UnitTests.csproj index a1f55975..279adacb 100644 --- a/src/Managed.UnitTests/Managed.UnitTests.csproj +++ b/src/Managed.UnitTests/Managed.UnitTests.csproj @@ -72,6 +72,9 @@ + + + diff --git a/src/Managed.UnitTests/StatisticsTests/DescriptiveStatisticsTests.cs b/src/Managed.UnitTests/StatisticsTests/DescriptiveStatisticsTests.cs new file mode 100644 index 00000000..b70a1fb6 --- /dev/null +++ b/src/Managed.UnitTests/StatisticsTests/DescriptiveStatisticsTests.cs @@ -0,0 +1,212 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://mathnet.opensourcedotnet.info +// +// Copyright (c) 2009 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace MathNet.Numerics.UnitTests.Statistics +{ + using System; + using System.Collections.Generic; + using MathNet.Numerics.Statistics; + using MbUnit.Framework; + + [TestFixture] + public class DescriptiveStatisticsTests + { + private readonly IDictionary mData = new Dictionary(); + + public DescriptiveStatisticsTests() + { + StatTestData lottery = new StatTestData("./data/NIST/Lottery.dat"); + mData.Add("lottery", lottery); + StatTestData lew = new StatTestData("./data/NIST/Lew.dat"); + mData.Add("lew", lew); + StatTestData mavro = new StatTestData("./data/NIST/Mavro.dat"); + mData.Add("mavro", mavro); + StatTestData michelso = new StatTestData("./data/NIST/Michelso.dat"); + mData.Add("michelso", michelso); + StatTestData numacc1 = new StatTestData("./data/NIST/NumAcc1.dat"); + mData.Add("numacc1", numacc1); + StatTestData numacc2 = new StatTestData("./data/NIST/NumAcc2.dat"); + mData.Add("numacc2", numacc2); + StatTestData numacc3 = new StatTestData("./data/NIST/NumAcc3.dat"); + mData.Add("numacc3", numacc3); + StatTestData numacc4 = new StatTestData("./data/NIST/NumAcc4.dat"); + mData.Add("numacc4", numacc4); + } + + [Test] + public void Constructor_ThrowArgumentNullException() + { + const IEnumerable data = null; + const IEnumerable nullableData = null; + + Assert.Throws(() => new DescriptiveStatistics(data)); + Assert.Throws(() => new DescriptiveStatistics(data, true)); + Assert.Throws(() => new DescriptiveStatistics(nullableData)); + Assert.Throws(() => new DescriptiveStatistics(nullableData, true)); + } + + [Test] + [Row("lottery", 15, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [Row("lew", 15, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [Row("mavro", 12, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [Row("michelso", 12, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [Row("numacc1", 15, 0, 0, 10000002, 10000001, 10000003, 3)] + [Row("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [Row("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [Row("numacc4", 8, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableDouble(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + StatTestData data = mData[dataSet]; + DescriptiveStatistics stats = new DescriptiveStatistics(data.Data); + + AssertHelpers.AlmostEqual(data.Mean, stats.Mean, 15); + AssertHelpers.AlmostEqual(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqual(skewness, stats.Skewness, 7); + AssertHelpers.AlmostEqual(kurtosis, stats.Kurtosis, 7); + AssertHelpers.AlmostEqual(median, stats.Median, 15); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + } + + [Test] + [Row("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [Row("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [Row("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [Row("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [Row("numacc1", 0, 0, 10000002, 10000001, 10000003, 3)] + [Row("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [Row("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [Row("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableDoubleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count) + { + StatTestData data = mData[dataSet]; + DescriptiveStatistics stats = new DescriptiveStatistics(data.Data, true); + AssertHelpers.AlmostEqual(data.Mean, stats.Mean, 15); + AssertHelpers.AlmostEqual(data.StandardDeviation, stats.StandardDeviation, 15); + AssertHelpers.AlmostEqual(skewness, stats.Skewness, 9); + AssertHelpers.AlmostEqual(kurtosis, stats.Kurtosis, 9); + AssertHelpers.AlmostEqual(median, stats.Median, 15); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + } + + [Test] + [Row("lottery", 15, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [Row("lew", 15, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [Row("mavro", 12, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [Row("michelso", 12, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [Row("numacc1", 15, 0, 0, 10000002, 10000001, 10000003, 3)] + [Row("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [Row("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [Row("numacc4", 8, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableDoubleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + StatTestData data = mData[dataSet]; + DescriptiveStatistics stats = new DescriptiveStatistics(data.Data, false); + AssertHelpers.AlmostEqual(data.Mean, stats.Mean, 15); + AssertHelpers.AlmostEqual(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqual(skewness, stats.Skewness, 7); + AssertHelpers.AlmostEqual(kurtosis, stats.Kurtosis, 7); + AssertHelpers.AlmostEqual(median, stats.Median, 15); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + } + + [Test] + [Row("lottery", 15, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [Row("lew", 15, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [Row("mavro", 12, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [Row("michelso", 12, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [Row("numacc1", 15, 0, 0, 10000002, 10000001, 10000003, 3)] + [Row("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [Row("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [Row("numacc4", 8, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableNullableDouble(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + StatTestData data = mData[dataSet]; + DescriptiveStatistics stats = new DescriptiveStatistics(data.DataWithNulls); + AssertHelpers.AlmostEqual(data.Mean, stats.Mean, 15); + AssertHelpers.AlmostEqual(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqual(skewness, stats.Skewness, 7); + AssertHelpers.AlmostEqual(kurtosis, stats.Kurtosis, 7); + AssertHelpers.AlmostEqual(median, stats.Median, 15); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + } + + [Test] + [Row("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [Row("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [Row("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [Row("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [Row("numacc1", 0, 0, 10000002, 10000001, 10000003, 3)] + [Row("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [Row("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [Row("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableNullableDoubleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count) + { + StatTestData data = mData[dataSet]; + DescriptiveStatistics stats = new DescriptiveStatistics(data.DataWithNulls, true); + AssertHelpers.AlmostEqual(data.Mean, stats.Mean, 15); + AssertHelpers.AlmostEqual(data.StandardDeviation, stats.StandardDeviation, 15); + AssertHelpers.AlmostEqual(skewness, stats.Skewness, 9); + AssertHelpers.AlmostEqual(kurtosis, stats.Kurtosis, 9); + AssertHelpers.AlmostEqual(median, stats.Median, 15); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + } + + [Test] + [Row("lottery", 15, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [Row("lew", 15, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [Row("mavro", 12, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [Row("michelso", 12, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [Row("numacc1", 15, 0, 0, 10000002, 10000001, 10000003, 3)] + [Row("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [Row("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [Row("numacc4", 8, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableNullableDoubleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + StatTestData data = mData[dataSet]; + DescriptiveStatistics stats = new DescriptiveStatistics(data.DataWithNulls, false); + AssertHelpers.AlmostEqual(data.Mean, stats.Mean, 15); + AssertHelpers.AlmostEqual(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqual(skewness, stats.Skewness, 7); + AssertHelpers.AlmostEqual(kurtosis, stats.Kurtosis, 7); + AssertHelpers.AlmostEqual(median, stats.Median, 15); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + } + } +} \ No newline at end of file diff --git a/src/Managed.UnitTests/StatisticsTests/StatTestData.cs b/src/Managed.UnitTests/StatisticsTests/StatTestData.cs new file mode 100644 index 00000000..60308827 --- /dev/null +++ b/src/Managed.UnitTests/StatisticsTests/StatTestData.cs @@ -0,0 +1,96 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://mathnet.opensourcedotnet.info +// +// Copyright (c) 2009 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace MathNet.Numerics.UnitTests.Statistics +{ + using System.IO; + using System.Linq; + using System.Collections.Generic; + + internal class StatTestData + { + public readonly double[] Data; + public readonly double?[] DataWithNulls; + + public readonly double Mean; + public readonly double StandardDeviation; + + public StatTestData(string file) + { + using (StreamReader reader = new StreamReader(file)) + { + string line = reader.ReadLine().Trim(); + + while (!line.StartsWith("--")) + { + if (line.StartsWith("Sample Mean")) + { + Mean = GetValue(line); + } + else if (line.StartsWith("Sample Standard Deviation")) + { + StandardDeviation = GetValue(line); + } + line = reader.ReadLine().Trim(); + } + + line = reader.ReadLine(); + + IList list = new List(); + while (line != null) + { + line = line.Trim(); + if (!line.Equals(string.Empty)) + { + list.Add(double.Parse(line)); + } + line = reader.ReadLine(); + } + + Data = list.ToArray(); + } + + DataWithNulls = new double?[Data.Length + 2]; + for (int i = 0; i < Data.Length; i++) + { + DataWithNulls[i + 1] = Data[i]; + } + } + + private static double GetValue(string str) + { + int start = str.IndexOf(":"); + string value = str.Substring(start + 1).Trim(); + if (value.Equals("NaN")) + { + return 0; + } + return double.Parse(str.Substring(start + 1).Trim()); + } + } +} \ No newline at end of file diff --git a/src/Managed.UnitTests/StatisticsTests/StatisticsTests.cs b/src/Managed.UnitTests/StatisticsTests/StatisticsTests.cs new file mode 100644 index 00000000..6f551e65 --- /dev/null +++ b/src/Managed.UnitTests/StatisticsTests/StatisticsTests.cs @@ -0,0 +1,138 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://mathnet.opensourcedotnet.info +// +// Copyright (c) 2009 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace MathNet.Numerics.UnitTests.Statistics +{ + using System; + using System.Collections.Generic; + using System.Text; + using MbUnit.Framework; + using MathNet.Numerics.Statistics; + + [TestFixture] + public class StatisticsTests + { + private readonly IDictionary mData = new Dictionary(); + public StatisticsTests() + { + StatTestData lottery = new StatTestData("./data/NIST/Lottery.dat"); + mData.Add("lottery", lottery); + StatTestData lew = new StatTestData("./data/NIST/Lew.dat"); + mData.Add("lew", lew); + StatTestData mavro = new StatTestData("./data/NIST/Mavro.dat"); + mData.Add("mavro", mavro); + StatTestData michelso = new StatTestData("./data/NIST/Michelso.dat"); + mData.Add("michelso", michelso); + StatTestData numacc1 = new StatTestData("./data/NIST/NumAcc1.dat"); + mData.Add("numacc1", numacc1); + StatTestData numacc2 = new StatTestData("./data/NIST/NumAcc2.dat"); + mData.Add("numacc2", numacc2); + StatTestData numacc3 = new StatTestData("./data/NIST/NumAcc3.dat"); + mData.Add("numacc3", numacc3); + StatTestData numacc4 = new StatTestData("./data/NIST/NumAcc4.dat"); + mData.Add("numacc4", numacc4); + } + + [Test] + [Row("lottery")] + [Row("lew")] + [Row("mavro")] + [Row("michelso")] + [Row("numacc1")] + [Row("numacc2")] + [Row("numacc3")] + [Row("numacc4")] + public void Mean(string dataSet) + { + StatTestData data = mData[dataSet]; + AssertHelpers.AlmostEqual(data.Mean, data.Data.Mean(), 15); + } + + [Test] + [Row("lottery")] + [Row("lew")] + [Row("mavro")] + [Row("michelso")] + [Row("numacc1")] + [Row("numacc2")] + [Row("numacc3")] + [Row("numacc4")] + public void NullableMean(string dataSet) + { + StatTestData data = mData[dataSet]; + AssertHelpers.AlmostEqual(data.Mean, data.DataWithNulls.Mean(), 15); + } + + [Test] + [ExpectedException(typeof(ArgumentNullException))] + public void Mean_ThrowsArgumentNullException() + { + double[] data = null; + MathNet.Numerics.Statistics.Statistics.Mean(data); + } + + + [Test] + [Row("lottery", 15)] + [Row("lew", 15)] + [Row("mavro", 12)] + [Row("michelso", 12)] + [Row("numacc1", 15)] + [Row("numacc2", 14)] + [Row("numacc3", 9)] + [Row("numacc4", 8)] + public void StandardDeviation(string dataSet, int digits) + { + StatTestData data = mData[dataSet]; + AssertHelpers.AlmostEqual(data.StandardDeviation, data.Data.StandardDeviation(), digits); + } + + [Test] + [Row("lottery", 15)] + [Row("lew", 15)] + [Row("mavro", 12)] + [Row("michelso", 12)] + [Row("numacc1", 15)] + [Row("numacc2", 14)] + [Row("numacc3", 9)] + [Row("numacc4", 8)] + public void NullableStandardDeviation(string dataSet, int digits) + { + StatTestData data = mData[dataSet]; + AssertHelpers.AlmostEqual(data.StandardDeviation, data.DataWithNulls.StandardDeviation(), digits); + } + + [Test] + [ExpectedException(typeof(ArgumentNullException))] + public void StandardDeviation_ThrowsArgumentNullException() + { + double[] data = null; + MathNet.Numerics.Statistics.Statistics.StandardDeviation(data); + } + } +} diff --git a/src/Managed/Managed.csproj b/src/Managed/Managed.csproj index cc486c32..ed96e700 100644 --- a/src/Managed/Managed.csproj +++ b/src/Managed/Managed.csproj @@ -82,6 +82,8 @@ + + diff --git a/src/Managed/Properties/Resources.Designer.cs b/src/Managed/Properties/Resources.Designer.cs index 4d133fa1..19baa33c 100644 --- a/src/Managed/Properties/Resources.Designer.cs +++ b/src/Managed/Properties/Resources.Designer.cs @@ -375,6 +375,15 @@ namespace MathNet.Numerics.Properties { } } + /// + /// Looks up a localized string similar to The supplied collection is empty.. + /// + internal static string CollectionEmpty { + get { + return ResourceManager.GetString("CollectionEmpty", resourceCulture); + } + } + /// /// Looks up a localized string similar to This feature is not implemented yet (but is planned).. /// diff --git a/src/Managed/Properties/Resources.resx b/src/Managed/Properties/Resources.resx index 70a56854..5f7c0621 100644 --- a/src/Managed/Properties/Resources.resx +++ b/src/Managed/Properties/Resources.resx @@ -249,4 +249,7 @@ At least one item of {0} is a null reference (Nothing in Visual Basic). + + The supplied collection is empty. + \ No newline at end of file diff --git a/src/Managed/Sorting.cs b/src/Managed/Sorting.cs index 65796790..bccd69aa 100644 --- a/src/Managed/Sorting.cs +++ b/src/Managed/Sorting.cs @@ -566,9 +566,12 @@ namespace MathNet.Numerics /// The index of the second element of the swap. internal static void Swap(IList keys, int a, int b) { - T local = keys[a]; - keys[a] = keys[b]; - keys[b] = local; + if(a != b) + { + T local = keys[a]; + keys[a] = keys[b]; + keys[b] = local; + } } } } diff --git a/src/Managed/Statistics/DescriptiveStatistics.cs b/src/Managed/Statistics/DescriptiveStatistics.cs new file mode 100644 index 00000000..3e3a0b1c --- /dev/null +++ b/src/Managed/Statistics/DescriptiveStatistics.cs @@ -0,0 +1,343 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://mathnet.opensourcedotnet.info +// +// Copyright (c) 2009 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace MathNet.Numerics.Statistics +{ + using System.Collections.Generic; + + /// + /// Computes the basic statistics of data set. The class meets the + /// NIST standard of accuracy for mean, variance, and standard deviation + /// (the only statistics they provide exact values for) and exceeds them + /// in increased accuracy mode. + /// + public class DescriptiveStatistics + { + /// + /// Initializes a new instance of the class. + /// + /// The sample data. + public DescriptiveStatistics(IEnumerable data) : this(data, false) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The sample data. + public DescriptiveStatistics(IEnumerable data) : this(data, false) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The sample data. + /// if set to true, increased accuracy mode used. + /// Increased accuracy mode uses types for internal calculations. + /// Don't use increased accuracy for data sets containing large values (in absolute value). + /// This may cause the calculations to overflow. + public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy) + { + if (increasedAccuracy) + { + ComputeHA(data); + } + else + { + Compute(data); + } + + Median = data.Median(); + Maximum = data.Maximum(); + Minimum = data.Minimum(); + } + + /// + /// Initializes a new instance of the class. + /// + /// The sample data. + /// if set to true, increased accuracy mode used. + /// Increased accuracy mode uses types for internal calculations. + /// Don't use increased accuracy for data sets containing large values (in absolute value). + /// This may cause the calculations to overflow. + public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy) + { + if (increasedAccuracy) + { + ComputeHA(data); + } + else + { + Compute(data); + } + + Median = data.Median(); + Maximum = data.Maximum(); + Minimum = data.Minimum(); + } + + /// + /// Gets the size of the sample. + /// + /// The size of the sample. + public int Count { get; private set; } + + /// + /// Gets the sample mean. + /// + /// The sample mean. + public double Mean { get; private set; } + + /// + /// Gets the sample variance. + /// + /// The sample variance. + public double Variance { get; private set; } + + /// + /// Gets the sample standard deviation. + /// + /// The sample standard deviation. + public double StandardDeviation { get; private set; } + + /// + /// Gets the sample skewness. + /// + /// The sample skewness. + /// Returns zero if is less than three. + public double Skewness { get; private set; } + + /// + /// Gets the sample median. + /// + /// The sample median. + public double Median { get; private set; } + + /// + /// Gets the sample kurtosis. + /// + /// The sample kurtosis. + /// Returns zero if is less than four. + public double Kurtosis { get; private set; } + + /// + /// Gets the maximum sample value. + /// + /// The maximum sample value. + public double Maximum { get; private set; } + + /// + /// Gets the minimum sample value. + /// + /// The minimum sample value. + public double Minimum { get; private set; } + + /// + /// Computes descriptive statistics from a stream of data values. + /// + /// A sequence of datapoints. + private void Compute(IEnumerable data) + { + Mean = data.Mean(); + double variance = 0; + double correction = 0; + double skewness = 0; + double kurtosis = 0; + int n = 0; + foreach (var xi in data) + { + double diff = xi - Mean; + correction += diff; + double tmp = diff*diff; + variance += tmp; + tmp *= diff; + skewness += tmp; + tmp *= diff; + kurtosis += tmp; + n++; + } + + Count = n; + Variance = (variance - (correction*correction)/n)/(n - 1); + StandardDeviation = System.Math.Sqrt(Variance); + if (Variance != 0) + { + if (n > 2) + { + Skewness = (double) n/((n - 1)*(n - 2))*(skewness/(Variance*StandardDeviation)); + } + + if (n > 3) + { + Kurtosis = ((double) n*(n + 1))/((n - 1)*(n - 2)*(n - 3))*(kurtosis/(Variance*Variance)) - (3.0*(n - 1)*(n - 1))/((n - 2)*(n - 3)); + } + } + } + + /// + /// Computes descriptive statistics from a stream of nullable data values. + /// + /// A sequence of datapoints. + private void Compute(IEnumerable data) + { + Mean = data.Mean(); + double variance = 0; + double correction = 0; + double skewness = 0; + double kurtosis = 0; + int n = 0; + foreach (var xi in data) + { + if (xi.HasValue) + { + double diff = xi.Value - Mean; + double tmp = diff*diff; + correction += diff; + variance += tmp; + tmp *= diff; + skewness += tmp; + tmp *= diff; + kurtosis += tmp; + n++; + } + } + + Count = n; + if (n > 0) + { + Variance = (variance - (correction*correction)/n)/(n - 1); + StandardDeviation = System.Math.Sqrt(Variance); + if (Variance != 0) + { + if (n > 2) + { + Skewness = (double) n/((n - 1)*(n - 2))*(skewness/(Variance*StandardDeviation)); + } + + if (n > 3) + { + Kurtosis = ((double) n*(n + 1))/((n - 1)*(n - 2)*(n - 3))*(kurtosis/(Variance*Variance)) - (3.0*(n - 1)*(n - 1))/((n - 2)*(n - 3)); + } + } + } + } + + /// + /// Computes descriptive statistics from a stream of data values using high accuracy. + /// + /// A sequence of datapoints. + private void ComputeHA(IEnumerable data) + { + Mean = data.Mean(); + decimal mean = (decimal) Mean; + decimal variance = 0; + decimal correction = 0; + decimal skewness = 0; + decimal kurtosis = 0; + int n = 0; + foreach (decimal xi in data) + { + decimal diff = xi - mean; + decimal tmp = diff*diff; + correction += diff; + variance += tmp; + tmp *= diff; + skewness += tmp; + tmp *= diff; + kurtosis += tmp; + n++; + } + + Count = n; + Variance = (double) (variance - (correction*correction)/n)/(n - 1); + StandardDeviation = System.Math.Sqrt(Variance); + if (Variance != 0) + { + if (n > 2) + { + Skewness = (double) n/((n - 1)*(n - 2))*((double) skewness/(Variance*StandardDeviation)); + } + + if (n > 3) + { + Kurtosis = ((double) n*(n + 1))/((n - 1)*(n - 2)*(n - 3))*((double) kurtosis/(Variance*Variance)) - (3.0*(n - 1)*(n - 1))/((n - 2)*(n - 3)); + } + } + } + + /// + /// Computes descriptive statistics from a stream of nullable data values using high accuracy. + /// + /// A sequence of datapoints. + private void ComputeHA(IEnumerable data) + { + Mean = data.Mean(); + decimal mean = (decimal) Mean; + decimal variance = 0; + decimal correction = 0; + decimal skewness = 0; + decimal kurtosis = 0; + int n = 0; + foreach (decimal? xi in data) + { + if (xi.HasValue) + { + decimal diff = xi.Value - mean; + decimal tmp = diff*diff; + correction += diff; + variance += tmp; + tmp *= diff; + skewness += tmp; + tmp *= diff; + kurtosis += tmp; + n++; + } + } + + Count = n; + if (n > 0) + { + Variance = (double) (variance - (correction*correction)/n)/(n - 1); + StandardDeviation = System.Math.Sqrt(Variance); + if (Variance != 0) + { + if (n > 2) + { + Skewness = (double) n/((n - 1)*(n - 2))*((double) skewness/(Variance*StandardDeviation)); + } + + if (n > 3) + { + Kurtosis = ((double) n*(n + 1))/((n - 1)*(n - 2)*(n - 3))*((double) kurtosis/(Variance*Variance)) - (3.0*(n - 1)*(n - 1))/((n - 2)*(n - 3)); + } + } + } + } + } +} \ No newline at end of file diff --git a/src/Managed/Statistics/Statistics.cs b/src/Managed/Statistics/Statistics.cs new file mode 100644 index 00000000..3057fa08 --- /dev/null +++ b/src/Managed/Statistics/Statistics.cs @@ -0,0 +1,570 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://mathnet.opensourcedotnet.info +// +// Copyright (c) 2009 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +namespace MathNet.Numerics.Statistics +{ + using System; + using System.Collections.Generic; + using MathNet.Numerics.Properties; + using MathNet.Numerics.NumberTheory; + + /// + /// Extension methods to return basic statistics on set of data. + /// + public static class Statistics + { + /// + /// Calculates the sample mean. + /// + /// The data to calculate the mean of. + /// The mean of the sample. + public static double Mean(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double mean = 0; + int m = 0; + foreach (var item in data) + { + mean += (item - mean)/++m; + } + + return mean; + } + + /// + /// Calculates the sample mean. + /// + /// The data to calculate the mean of. + /// The mean of the sample. + public static double Mean(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double mean = 0; + int m = 0; + foreach (var item in data) + { + if (item.HasValue) + { + mean += (item.Value - mean)/++m; + } + } + + return mean; + } + + /// + /// Calculates the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer). + /// + /// The data to calculate the variance of. + /// The unbiased population variance of the sample. + public static double Variance(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double variance = 0; + double t = 0; + int j = 0; + + IEnumerator iterator = data.GetEnumerator(); + if (iterator.MoveNext()) + { + j++; + t = iterator.Current; + } + + while (iterator.MoveNext()) + { + j++; + double xi = iterator.Current; + t += xi; + double diff = j*xi - t; + variance += (diff*diff)/(j*(j - 1)); + } + + return variance/(j - 1); + } + + /// + /// Computes the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer) for nullable data. + /// + /// The data to calculate the variance of. + /// The population variance of the sample. + public static double Variance(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double variance = 0; + double t = 0; + int j = 0; + + IEnumerator iterator = data.GetEnumerator(); + + while (true) + { + bool hasNext = iterator.MoveNext(); + if (!hasNext) + { + break; + } + if (iterator.Current.HasValue) + { + j++; + t = iterator.Current.Value; + break; + } + } + + while (iterator.MoveNext()) + { + if (iterator.Current.HasValue) + { + j++; + double xi = iterator.Current.Value; + t += xi; + double diff = j*xi - t; + variance += (diff*diff)/(j*(j - 1)); + } + } + + return variance/(j - 1); + } + + /// + /// Calculates the biased population variance estimator (on a dataset of size N will use an N normalizer). + /// + /// The data to calculate the variance of. + /// The biased population variance of the sample. + public static double PopulationVariance(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double variance = 0; + double t = 0; + int j = 0; + + IEnumerator iterator = data.GetEnumerator(); + if (iterator.MoveNext()) + { + j++; + t = iterator.Current; + } + + while (iterator.MoveNext()) + { + j++; + double xi = iterator.Current; + t += xi; + double diff = j * xi - t; + variance += (diff * diff) / (j * (j - 1)); + } + + return variance / j; + } + + /// + /// Computes the biased population variance estimator (on a dataset of size N will use an N normalizer) for nullable data. + /// + /// The data to calculate the variance of. + /// The population variance of the sample. + public static double PopulationVariance(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double variance = 0; + double t = 0; + int j = 0; + + IEnumerator iterator = data.GetEnumerator(); + + while (true) + { + bool hasNext = iterator.MoveNext(); + if (!hasNext) + { + break; + } + if (iterator.Current.HasValue) + { + j++; + t = iterator.Current.Value; + break; + } + } + + while (iterator.MoveNext()) + { + if (iterator.Current.HasValue) + { + j++; + double xi = iterator.Current.Value; + t += xi; + double diff = j * xi - t; + variance += (diff * diff) / (j * (j - 1)); + } + } + + return variance / j; + } + + /// + /// Calculates the unbiased sample standard deviation (on a dataset of size N will use an N-1 normalizer). + /// + /// The data to calculate the standard deviation of. + /// The standard deviation of the sample. + public static double StandardDeviation(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + return System.Math.Sqrt(Variance(data)); + } + + /// + /// Calculates the unbiased sample standard deviation (on a dataset of size N will use an N-1 normalizer). + /// + /// The data to calculate the standard deviation of. + /// The standard deviation of the sample. + public static double StandardDeviation(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + return System.Math.Sqrt(Variance(data)); + } + + /// + /// Calculates the biased sample standard deviation (on a dataset of size N will use an N normalizer). + /// + /// The data to calculate the standard deviation of. + /// The standard deviation of the sample. + public static double PopulationStandardDeviation(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + return System.Math.Sqrt(PopulationVariance(data)); + } + + /// + /// Calculates the biased sample standard deviation (on a dataset of size N will use an N normalizer). + /// + /// The data to calculate the standard deviation of. + /// The standard deviation of the sample. + public static double PopulationStandardDeviation(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + return System.Math.Sqrt(PopulationVariance(data)); + } + + /// + /// Returns the minimum value in the sample data. + /// + /// The sample data. + /// The minimum value in the sample data. + public static double Minimum(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double min = double.MaxValue; + int count = 0; + foreach (double? d in data) + { + if (d.HasValue) + { + min = System.Math.Min(min, d.Value); + count++; + } + } + + if (count == 0) + { + throw new ArgumentException(Resources.CollectionEmpty, "data"); + } + + return min; + } + + /// + /// Returns the maximum value in the sample data. + /// + /// The sample data. + /// The maximum value in the sample data. + public static double Maximum(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double max = double.MinValue; + int count = 0; + foreach (double? d in data) + { + if (d.HasValue) + { + max = System.Math.Max(max, d.Value); + count++; + } + } + + if (count == 0) + { + throw new ArgumentException(Resources.CollectionEmpty, "data"); + } + + return max; + } + + /// + /// Returns the minimum value in the sample data. + /// + /// The sample data. + /// The minimum value in the sample data. + public static double Minimum(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double min = double.MaxValue; + int count = 0; + foreach (double d in data) + { + min = System.Math.Min(min, d); + count++; + } + + if (count == 0) + { + throw new ArgumentException(Resources.CollectionEmpty, "data"); + } + + return min; + } + + /// + /// Returns the maximum value in the sample data. + /// + /// The sample data. + /// The maximum value in the sample data. + public static double Maximum(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + double max = double.MinValue; + int count = 0; + foreach (double d in data) + { + max = System.Math.Max(max, d); + count++; + } + + if (count == 0) + { + throw new ArgumentException(Resources.CollectionEmpty, "data"); + } + + return max; + } + + /// + /// Calculates the sample median. + /// + /// The data to calculate the median of. + /// The median of the sample. + public static double Median(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + List dataArray = new List(data); + int index = dataArray.Count/2 + 1; + if (dataArray.Count % 2 == 0) + { + double lower = OrderSelect(dataArray, 0, dataArray.Count - 1, index - 1); + double upper = OrderSelect(dataArray, 0, dataArray.Count - 1, index); + return (lower + upper) / 2.0; + } + else + { + return OrderSelect(dataArray, 0, dataArray.Count - 1, index); + } + } + + /// + /// Calculates the sample median. + /// + /// The data to calculate the median of. + /// The median of the sample. + public static double Median(this IEnumerable data) + { + if (data == null) + { + throw new ArgumentNullException("data"); + } + + List nonNull = new List(); + foreach (double? value in data) + { + if (value.HasValue) + { + nonNull.Add(value.Value); + } + } + + if (nonNull.Count == 0) + { + throw new ArgumentException(Resources.CollectionEmpty, "data"); + } + + return nonNull.Median(); + } + + /// + /// Evaluate the i-order (1..N) statistic of the provided samples. + /// + /// The sample data. + /// The i'th order statistic in the sample data. + public static double OrderStatistic(IEnumerable samples, int order) + { + if (order == 1) + { + // Can be done in linear time by Min() + return Minimum(samples); + } + + List list = new List(samples); + if (order < 1 || order > list.Count) + { + throw new ArgumentOutOfRangeException("order", Resources.ArgumentInIntervalXYInclusive); + } + + if (order == list.Count) + { + // Can be done in linear time by Max() + return Maximum(list); + } + + return OrderSelect(list, 0, list.Count - 1, order); + } + + /// + /// Implementation of the order statistics finding algorithm based on the algorithm in + /// "Introduction to Algorithms", Cormen et al. section 7.1. + /// + /// The sample data. + /// The left bound in which to order select. + /// The right bound in which to order select. + /// The order we are trying to find. + /// The order statistic. + static double OrderSelect(IList samples, int left, int right, int order) + { + // Order most always be positive. + System.Diagnostics.Debug.Assert(order > 0); + // Left side must always be positive and smaller than right side. + System.Diagnostics.Debug.Assert(left >= 0 && left <= right); + // Right side must always be smaller than number of elements in list. + System.Diagnostics.Debug.Assert(right < samples.Count); + // Make sure there are at least order items in the segment [left, right]. + System.Diagnostics.Debug.Assert(right - left + 1 >= order); + + if (left == right) + { + return samples[left]; + } + + // The pivot point. + double pivot = samples[right]; + + // The partioning code. + int i = left - 1; + for(int j = left; j <= right - 1; j++) + { + if(samples[j] <= pivot) + { + i++; + Sorting.Swap(samples, i, j); + } + } + Sorting.Swap(samples, i+1, right); + + // Recursive order finding algorithm. + if(order == (i-left)+2) + { + return pivot; + } + else if (order < (i-left)+2) + { + return OrderSelect(samples, left, i, order); + } + else + { + return OrderSelect(samples, i+2, right, order - i + left - 2); + } + } + } +} \ No newline at end of file diff --git a/src/Native.UnitTests/Native.UnitTests.csproj b/src/Native.UnitTests/Native.UnitTests.csproj index 8422f4e0..22070fe8 100644 --- a/src/Native.UnitTests/Native.UnitTests.csproj +++ b/src/Native.UnitTests/Native.UnitTests.csproj @@ -95,6 +95,15 @@ SpecialFunctionsTest\ErfTests.cs + + StatisticsTests\DescriptiveStatisticsTests.cs + + + StatisticsTests\StatisticsTests.cs + + + StatisticsTests\StatTestData.cs + ThreadingTests\ParallelTest.cs diff --git a/src/Native/Native.csproj b/src/Native/Native.csproj index 889aa344..10948882 100644 --- a/src/Native/Native.csproj +++ b/src/Native/Native.csproj @@ -140,6 +140,12 @@ SpecialFunctions\Erf.cs + + Statistics\DescriptiveStatistics.cs + + + Statistics\Statistics.cs + Threading\AggregateException.cs