From 08320b1132d5ed436f5cd48e17a94c423e0a9f71 Mon Sep 17 00:00:00 2001 From: richard-allen Date: Tue, 14 Sep 2021 13:54:42 +0100 Subject: [PATCH] Move weighted descriptive statistics to own class. --- .../DescriptiveStatisticsTests.cs | 239 ---------- .../RunningWeightedStatisticsTests.cs | 106 ++++- .../WeightedDescriptiveStatisticsTests.cs | 432 ++++++++++++++++++ .../Statistics/DescriptiveStatistics.cs | 298 ++---------- .../Statistics/RunningWeightedStatistics.cs | 17 +- .../WeightedDescriptiveStatistics.cs | 411 +++++++++++++++++ 6 files changed, 984 insertions(+), 519 deletions(-) create mode 100644 src/Numerics.Tests/StatisticsTests/WeightedDescriptiveStatisticsTests.cs create mode 100644 src/Numerics/Statistics/WeightedDescriptiveStatistics.cs diff --git a/src/Numerics.Tests/StatisticsTests/DescriptiveStatisticsTests.cs b/src/Numerics.Tests/StatisticsTests/DescriptiveStatisticsTests.cs index 95914e6d..b3e2492d 100644 --- a/src/Numerics.Tests/StatisticsTests/DescriptiveStatisticsTests.cs +++ b/src/Numerics.Tests/StatisticsTests/DescriptiveStatisticsTests.cs @@ -29,7 +29,6 @@ using System; using System.Collections.Generic; -using System.Linq; #if NET5_0_OR_GREATER using System.Text.Json; using System.Text.Json.Serialization; @@ -76,10 +75,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests { const IEnumerable Data = null; const IEnumerable NullableData = null; - const IEnumerable> WeightedData = null; - Assert.That(() => new DescriptiveStatistics(WeightedData), Throws.TypeOf()); - Assert.That(() => new DescriptiveStatistics(WeightedData, true), Throws.TypeOf()); Assert.That(() => new DescriptiveStatistics(Data), Throws.TypeOf()); Assert.That(() => new DescriptiveStatistics(Data, true), Throws.TypeOf()); Assert.That(() => new DescriptiveStatistics(NullableData), Throws.TypeOf()); @@ -118,7 +114,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(stats.Minimum, min); Assert.AreEqual(stats.Maximum, max); Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); } /// @@ -150,7 +145,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(stats.Minimum, min); Assert.AreEqual(stats.Maximum, max); Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); } /// @@ -183,7 +177,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(stats.Minimum, min); Assert.AreEqual(stats.Maximum, max); Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); } /// @@ -216,7 +209,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(stats.Minimum, min); Assert.AreEqual(stats.Maximum, max); Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); } /// @@ -248,7 +240,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(stats.Minimum, min); Assert.AreEqual(stats.Maximum, max); Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); } /// @@ -281,205 +272,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(stats.Minimum, min); Assert.AreEqual(stats.Maximum, max); Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); - } - - /// - /// IEnumerable Double. - /// - /// Dataset name. - /// Digits count. - /// Skewness value. - /// Kurtosis value. - /// Median value. - /// Min value. - /// Max value. - /// Count value. - [TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] - [TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] - [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] - [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] - [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] - [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] - [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] - [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] - [TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)] - public void IEnumerableTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) - { - var data = _data[dataSet]; - var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x))); - - AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10); - AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); - AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8); - AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8); - Assert.AreEqual(stats.Minimum, min); - Assert.AreEqual(stats.Maximum, max); - Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); - } - - /// - /// IEnumerable Double high accuracy. - /// - /// Dataset name. - /// Skewness value. - /// Kurtosis value. - /// Median value. - /// Min value. - /// Max value. - /// Count value. - [TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] - [TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] - [TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] - [TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] - [TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)] - [TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] - [TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] - [TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] - public void IEnumerableTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count) - { - var data = _data[dataSet]; - var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), true); - AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); - AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14); - AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9); - AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9); - Assert.AreEqual(stats.Minimum, min); - Assert.AreEqual(stats.Maximum, max); - Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); - } - - /// - /// IEnumerable double low accuracy. - /// - /// Dataset name. - /// Digits count. - /// Skewness value. - /// Kurtosis value. - /// Median value. - /// Min value. - /// Max value. - /// Count value. - [TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] - [TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] - [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] - [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] - [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] - [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] - [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] - [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] - public void IEnumerableTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) - { - var data = _data[dataSet]; - var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), false); - AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); - AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); - AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7); - AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7); - Assert.AreEqual(stats.Minimum, min); - Assert.AreEqual(stats.Maximum, max); - Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); - } - - /// - /// IEnumerable Nullable double. - /// - /// Dataset name. - /// Digits count. - /// Skewness value. - /// Kurtosis value. - /// Median value. - /// Min value. - /// Max value. - /// Count value. - [TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] - [TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] - [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] - [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] - [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] - [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] - [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] - [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] - public void IEnumerableZeroWeightTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) - { - var data = _data[dataSet]; - var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159))); - AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); - AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); - AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7); - AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7); - Assert.AreEqual(stats.Minimum, min); - Assert.AreEqual(stats.Maximum, max); - Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); - } - - /// - /// IEnumerable Nullable double high accuracy. - /// - /// Dataset name. - /// Skewness value. - /// Kurtosis value. - /// Median value. - /// Min value. - /// Max value. - /// Count value. - [TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] - [TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] - [TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] - [TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] - [TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)] - [TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] - [TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] - [TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] - public void IEnumerableZeroWeightTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count) - { - var data = _data[dataSet]; - var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), true); - AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); - AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14); - AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9); - AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9); - Assert.AreEqual(stats.Minimum, min); - Assert.AreEqual(stats.Maximum, max); - Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); - } - - /// - /// IEnumerable Nullable Double Low Accuracy. - /// - /// Dataset name. - /// Digits count. - /// Skewness value. - /// Kurtosis value. - /// Median value. - /// Min value. - /// Max value. - /// Count value. - [TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] - [TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] - [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] - [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] - [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] - [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] - [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] - [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] - public void IEnumerableZeroWeightTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) - { - var data = _data[dataSet]; - var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), false); - AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); - AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); - AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7); - AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7); - Assert.AreEqual(stats.Minimum, min); - Assert.AreEqual(stats.Maximum, max); - Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); } [Test] @@ -504,32 +296,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests var stats4 = new DescriptiveStatistics(new[] { 1.0, 2.0, -3.0, -4.0 }); Assert.That(stats4.Skewness, Is.Not.NaN); Assert.That(stats4.Kurtosis, Is.Not.NaN); - - var stats5 = new DescriptiveStatistics(new Tuple[0]); - Assert.That(stats5.Skewness, Is.NaN); - Assert.That(stats5.Kurtosis, Is.NaN); - - var stats6 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0) }); - Assert.That(stats6.Skewness, Is.NaN); - Assert.That(stats6.Kurtosis, Is.NaN); - - var stats7 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0) }); - Assert.That(stats7.Skewness, Is.NaN); - Assert.That(stats7.Kurtosis, Is.NaN); - - var stats8 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0) }); - Assert.That(stats8.Skewness, Is.Not.NaN); - Assert.That(stats8.Kurtosis, Is.NaN); - - var stats9 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0), Tuple.Create(1.0, -4.0) }); - Assert.That(stats9.Skewness, Is.Not.NaN); - Assert.That(stats9.Kurtosis, Is.Not.NaN); - } - - [Test] - public void NegativeWeightsThrow() - { - Assert.That(() => new DescriptiveStatistics(new[] { Tuple.Create(-1.0, 1.0) }), Throws.TypeOf()); } [Test] @@ -538,10 +304,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests var stats = new DescriptiveStatistics(new[] { 2.0, 2.0, 2.0, 2.0 }); Assert.That(stats.Skewness, Is.NaN); Assert.That(stats.Kurtosis, Is.NaN); - - var stats2 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0) }); - Assert.That(stats2.Skewness, Is.NaN); - Assert.That(stats2.Kurtosis, Is.NaN); } #if NET5_0_OR_GREATER @@ -583,7 +345,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(stats.Minimum, min); Assert.AreEqual(stats.Maximum, max); Assert.AreEqual(stats.Count, count); - Assert.AreEqual(stats.TotalWeight, count); } #endif } diff --git a/src/Numerics.Tests/StatisticsTests/RunningWeightedStatisticsTests.cs b/src/Numerics.Tests/StatisticsTests/RunningWeightedStatisticsTests.cs index 977fefe5..884b9c54 100644 --- a/src/Numerics.Tests/StatisticsTests/RunningWeightedStatisticsTests.cs +++ b/src/Numerics.Tests/StatisticsTests/RunningWeightedStatisticsTests.cs @@ -88,7 +88,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void ConsistentWithNist(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) { var data = _data[dataSet]; - var stats = new RunningWeightedStatistics(data.Data.Select(x => System.Tuple.Create(1.0,x))); + var stats = new RunningWeightedStatistics(data.Data.Select(x => System.Tuple.Create(1.0, x))); AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10); AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); @@ -139,19 +139,19 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests public void NegativeWeightsThrow() { Assert.That(() => new RunningWeightedStatistics(new[] { System.Tuple.Create(-1.0, 1.0) }), Throws.TypeOf()); - var stats0 = new RunningWeightedStatistics(new System.Tuple[0]); - Assert.That(() =>stats0.Push(-1.0, 1.0), Throws.TypeOf()); + var stats0 = new RunningWeightedStatistics(new System.Tuple[0]); + Assert.That(() => stats0.Push(-1.0, 1.0), Throws.TypeOf()); Assert.That(() => stats0.PushRange(new[] { System.Tuple.Create(-1.0, 1.0) }), Throws.TypeOf()); } [Test] public void ShortSequences() { - var stats0 = new RunningWeightedStatistics(new System.Tuple[0]); + var stats0 = new RunningWeightedStatistics(new System.Tuple[0]); Assert.That(stats0.Skewness, Is.NaN); Assert.That(stats0.Kurtosis, Is.NaN); - var stats1 = new RunningWeightedStatistics(new [] { System.Tuple.Create(1.0,1.0) }); + var stats1 = new RunningWeightedStatistics(new[] { System.Tuple.Create(1.0, 1.0) }); Assert.That(stats1.Skewness, Is.NaN); Assert.That(stats1.Kurtosis, Is.NaN); @@ -159,7 +159,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.That(stats2.Skewness, Is.NaN); Assert.That(stats2.Kurtosis, Is.NaN); - var stats3 = new RunningWeightedStatistics(new[] { System.Tuple.Create(1.0, 1.0), System.Tuple.Create(1.0, 2.0), System.Tuple.Create(1.0, -3.0)}); + var stats3 = new RunningWeightedStatistics(new[] { System.Tuple.Create(1.0, 1.0), System.Tuple.Create(1.0, 2.0), System.Tuple.Create(1.0, -3.0) }); Assert.That(stats3.Skewness, Is.Not.NaN); Assert.That(stats3.Kurtosis, Is.NaN); @@ -177,7 +177,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests } [Test] - public void Combine() + public void CombineUnweighted() { var rnd = new SystemRandomSource(10); var a = Generate.Random(200, new Erlang(2, 0.2, rnd)).Select(datum => System.Tuple.Create(1.0, datum)).ToArray(); @@ -193,9 +193,9 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests y.PushRange(b); y.PushRange(c); - var za = new RunningWeightedStatistics(a); - var zb = new RunningWeightedStatistics(b); - var zc = new RunningWeightedStatistics(c); + var za = new RunningWeightedStatistics(a); + var zb = new RunningWeightedStatistics(b); + var zc = new RunningWeightedStatistics(c); var z = za + zb + zc; Assert.That(x.Mean, Is.EqualTo(direct.Mean()).Within(1e-12), "Mean Reference"); @@ -234,5 +234,91 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.That(y.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis y"); Assert.That(z.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis z"); } + + [Test] + /// Tests that combination of data via + / Combine is consistent with the incremental approach. + public void CombineWeighted() + { + var rnd = new SystemRandomSource(10); + var wa = Generate.Random(200, new ContinuousUniform(1.0, 10.0)); + var a = Generate.Random(200, new Erlang(2, 0.2, rnd)).Select((datum, i) => System.Tuple.Create(wa[i], datum)).ToArray(); + var wb = Generate.Random(100, new ContinuousUniform(1.0, 10.0)); + var b = Generate.Random(100, new Beta(1.2, 1.4, rnd)).Select((datum, i) => System.Tuple.Create(wb[i], datum)).ToArray(); + var wc = Generate.Random(150, new ContinuousUniform(1.0, 10.0)); + var c = Generate.Random(150, new Rayleigh(0.8, rnd)).Select((datum, i) => System.Tuple.Create(wc[i], datum)).ToArray(); + + var d = a.Concat(b).Concat(c); + + var x = new RunningWeightedStatistics(d); + + var y = new RunningWeightedStatistics(a); + y.PushRange(b); + y.PushRange(c); + + var za = new RunningWeightedStatistics(a); + var zb = new RunningWeightedStatistics(b); + var zc = new RunningWeightedStatistics(c); + var z = za + zb + zc; + + Assert.That(y.Mean, Is.EqualTo(x.Mean).Within(1e-12), "Mean y"); + Assert.That(z.Mean, Is.EqualTo(x.Mean).Within(1e-12), "Mean z"); + + Assert.That(y.Variance, Is.EqualTo(x.Variance).Within(1e-12), "Variance y"); + Assert.That(z.Variance, Is.EqualTo(x.Variance).Within(1e-12), "Variance z"); + + Assert.That(y.PopulationVariance, Is.EqualTo(x.PopulationVariance).Within(1e-12), "PopulationVariance y"); + Assert.That(z.PopulationVariance, Is.EqualTo(x.PopulationVariance).Within(1e-12), "PopulationVariance z"); + + Assert.That(y.StandardDeviation, Is.EqualTo(x.StandardDeviation).Within(1e-12), "StandardDeviation y"); + Assert.That(z.StandardDeviation, Is.EqualTo(x.StandardDeviation).Within(1e-12), "StandardDeviation z"); + + Assert.That(y.PopulationStandardDeviation, Is.EqualTo(x.PopulationStandardDeviation).Within(1e-12), "PopulationStandardDeviation y"); + Assert.That(z.PopulationStandardDeviation, Is.EqualTo(x.PopulationStandardDeviation).Within(1e-12), "PopulationStandardDeviation z"); + + Assert.That(y.Skewness, Is.EqualTo(x.Skewness).Within(1e-12), "Skewness y"); + Assert.That(z.Skewness, Is.EqualTo(x.Skewness).Within(1e-12), "Skewness z"); + + Assert.That(y.PopulationSkewness, Is.EqualTo(x.PopulationSkewness).Within(1e-12), "PopulationSkewness y"); + Assert.That(z.PopulationSkewness, Is.EqualTo(x.PopulationSkewness).Within(1e-12), "PopulationSkewness z"); + + Assert.That(y.Kurtosis, Is.EqualTo(x.Kurtosis).Within(1e-12), "Kurtosis y"); + Assert.That(z.Kurtosis, Is.EqualTo(x.Kurtosis).Within(1e-12), "Kurtosis z"); + + Assert.That(y.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis y"); + Assert.That(z.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis z"); + } + + [TestCase("lottery")] + [TestCase("lew")] + [TestCase("mavro")] + [TestCase("michelso")] + [TestCase("numacc1")] + [TestCase("meixner")] + /// Generates samples with weightings that are integral and compares that to the unweighted statistics result. Doesn't correspond with the + /// higher order sample statistics because our weightings represent reliability weights, *not* frequency weights, and the Bessel correction is + /// calculated appropriately - so don't let the construction of the test mislead you. + public void ConsistentWithUnweighted (string dataSet) + { + var data = _data[dataSet].Data.ToArray(); + var gen = new DiscreteUniform(1, 5); + var weights = new int[data.Length]; + gen.Samples(weights); + + var stats = new RunningWeightedStatistics( data.Select((x, i) => System.Tuple.Create((double)weights[i], x)) ); + var stats2 = new RunningStatistics(); + for (int i = 0; i < data.Length; ++i) + for(int j = 0; j < weights[i] ; ++j) + stats2.Push(data[i]); + var sumWeights = weights.Sum(); + Assert.That(stats.TotalWeight, Is.EqualTo(sumWeights), "TotalWeight"); + Assert.That(stats.Count, Is.EqualTo(weights.Length), "Count"); + Assert.That(stats2.Minimum, Is.EqualTo(stats.Minimum), "Minimum"); + Assert.That(stats2.Maximum, Is.EqualTo(stats.Maximum), "Maximum"); + Assert.That(stats2.Mean, Is.EqualTo(stats.Mean).Within(1e-8), "Mean"); + Assert.That(stats2.PopulationVariance, Is.EqualTo(stats.PopulationVariance).Within(1e-9), "PopulationVariance"); + Assert.That(stats2.PopulationStandardDeviation, Is.EqualTo(stats.PopulationStandardDeviation).Within(1e-9), "PopulationStandardDeviation"); + Assert.That(stats2.PopulationSkewness, Is.EqualTo(stats.PopulationSkewness).Within(1e-8), "PopulationSkewness"); + Assert.That(stats2.PopulationKurtosis, Is.EqualTo(stats.PopulationKurtosis).Within(1e-8), "PopulationKurtosis"); + } } } diff --git a/src/Numerics.Tests/StatisticsTests/WeightedDescriptiveStatisticsTests.cs b/src/Numerics.Tests/StatisticsTests/WeightedDescriptiveStatisticsTests.cs new file mode 100644 index 00000000..0f74db45 --- /dev/null +++ b/src/Numerics.Tests/StatisticsTests/WeightedDescriptiveStatisticsTests.cs @@ -0,0 +1,432 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// +// Copyright (c) 2009-2016 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; +using System.Linq; +#if NET5_0_OR_GREATER +using System.Text.Json; +using System.Text.Json.Serialization; +#endif +using NUnit.Framework; +using MathNet.Numerics.Statistics; + +namespace MathNet.Numerics.UnitTests.StatisticsTests +{ + /// + /// Descriptive statistics tests. + /// + /// NOTE: this class is not included into Silverlight version, because it uses data from local files. + /// In Silverlight access to local files is forbidden, except several cases. + [TestFixture, Category("Statistics")] + public class WeightedDescriptiveStatisticsTests + { + /// + /// Statistics data. + /// + readonly IDictionary _data = new Dictionary(); + + /// + /// Initializes a new instance of the DescriptiveStatisticsTests class. + /// + public WeightedDescriptiveStatisticsTests() + { + _data.Add("lottery", new StatTestData("NIST.Lottery.dat")); + _data.Add("lew", new StatTestData("NIST.Lew.dat")); + _data.Add("mavro", new StatTestData("NIST.Mavro.dat")); + _data.Add("michelso", new StatTestData("NIST.Michelso.dat")); + _data.Add("numacc1", new StatTestData("NIST.NumAcc1.dat")); + _data.Add("numacc2", new StatTestData("NIST.NumAcc2.dat")); + _data.Add("numacc3", new StatTestData("NIST.NumAcc3.dat")); + _data.Add("numacc4", new StatTestData("NIST.NumAcc4.dat")); + _data.Add("meixner", new StatTestData("NIST.Meixner.dat")); + } + + /// + /// Constructor with null throws ArgumentNullException. + /// + [Test] + public void ConstructorThrowArgumentNullException() + { + const IEnumerable> WeightedData = null; + const IEnumerable<(double, double)> WeightedData2 = null; + + Assert.That(() => new WeightedDescriptiveStatistics(WeightedData), Throws.TypeOf()); + Assert.That(() => new WeightedDescriptiveStatistics(WeightedData, true), Throws.TypeOf()); + Assert.That(() => new WeightedDescriptiveStatistics(WeightedData2), Throws.TypeOf()); + Assert.That(() => new WeightedDescriptiveStatistics(WeightedData2, true), Throws.TypeOf()); + + } + + /// + /// IEnumerable Double. + /// + /// Dataset name. + /// Digits count. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + [TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)] + public void IEnumerableTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x))); + + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + + /// + /// IEnumerable Double. + /// + /// Dataset name. + /// Digits count. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + [TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)] + public void IEnumerableValueTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => (1.0, x))); + + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + + /// + /// IEnumerable Double high accuracy. + /// + /// Dataset name. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), true); + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + + /// + /// IEnumerable Double high accuracy. + /// + /// Dataset name. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableValueTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => (1.0, x)), true); + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + /// + /// IEnumerable double low accuracy. + /// + /// Dataset name. + /// Digits count. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), false); + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + + /// + /// IEnumerable Nullable double. + /// + /// Dataset name. + /// Digits count. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableZeroWeightTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159))); + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + + /// + /// IEnumerable Nullable double high accuracy. + /// + /// Dataset name. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableZeroWeightTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), true); + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + + /// + /// IEnumerable Nullable Double Low Accuracy. + /// + /// Dataset name. + /// Digits count. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + public void IEnumerableZeroWeightTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var stats = new WeightedDescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), false); + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } + + [Test] + public void ShortSequences() + { + var stats5 = new WeightedDescriptiveStatistics(new Tuple[0]); + Assert.That(stats5.Skewness, Is.NaN); + Assert.That(stats5.Kurtosis, Is.NaN); + + var stats6 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0) }); + Assert.That(stats6.Skewness, Is.NaN); + Assert.That(stats6.Kurtosis, Is.NaN); + + var stats7 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0) }); + Assert.That(stats7.Skewness, Is.NaN); + Assert.That(stats7.Kurtosis, Is.NaN); + + var stats8 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0) }); + Assert.That(stats8.Skewness, Is.Not.NaN); + Assert.That(stats8.Kurtosis, Is.NaN); + + var stats9 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0), Tuple.Create(1.0, -4.0) }); + Assert.That(stats9.Skewness, Is.Not.NaN); + Assert.That(stats9.Kurtosis, Is.Not.NaN); + } + + [Test] + public void NegativeWeightsThrow() + { + Assert.That(() => new WeightedDescriptiveStatistics(new[] { Tuple.Create(-1.0, 1.0) }), Throws.TypeOf()); + } + + [Test] + public void ZeroVarianceSequence() + { + var stats2 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0) }); + Assert.That(stats2.Skewness, Is.NaN); + Assert.That(stats2.Kurtosis, Is.NaN); + } + +#if NET5_0_OR_GREATER + /// + /// IEnumerable Double. + /// + /// Dataset name. + /// Digits count. + /// Skewness value. + /// Kurtosis value. + /// Median value. + /// Min value. + /// Max value. + /// Count value. + [TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)] + [TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)] + [TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)] + [TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)] + [TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)] + [TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)] + [TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)] + [TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)] + [TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)] + public void JsonDeserializationTest(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count) + { + var data = _data[dataSet]; + var serialize = new WeightedDescriptiveStatistics(data.Data.Select(x => (1.0, x)), false); + var jsonSerializerOptions = new JsonSerializerOptions + { + NumberHandling = JsonNumberHandling.AllowNamedFloatingPointLiterals, + }; + var json = JsonSerializer.Serialize(serialize, jsonSerializerOptions); + var stats = JsonSerializer.Deserialize(json, jsonSerializerOptions); + Assert.NotNull(stats); + AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10); + AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits); + AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8); + AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8); + Assert.AreEqual(stats.Minimum, min); + Assert.AreEqual(stats.Maximum, max); + Assert.AreEqual(stats.Count, count); + Assert.AreEqual(stats.TotalWeight, count); + } +#endif + } +} diff --git a/src/Numerics/Statistics/DescriptiveStatistics.cs b/src/Numerics/Statistics/DescriptiveStatistics.cs index d3014205..637d5a4d 100644 --- a/src/Numerics/Statistics/DescriptiveStatistics.cs +++ b/src/Numerics/Statistics/DescriptiveStatistics.cs @@ -110,35 +110,6 @@ namespace MathNet.Numerics.Statistics } } - /// - /// Initializes a new instance of the class. - /// - /// The sample data. - /// - /// If set to true, increased accuracy mode used. - /// Increased accuracy mode uses types for internal calculations. - /// - /// - /// Don't use increased accuracy for data sets containing large values (in absolute value). - /// This may cause the calculations to overflow. - /// - public DescriptiveStatistics(IEnumerable> data, bool increasedAccuracy = false) - { - if (data == null) - { - throw new ArgumentNullException(nameof(data)); - } - - if (increasedAccuracy) - { - ComputeDecimal(data); - } - else - { - Compute(data); - } - } - #if NET5_0_OR_GREATER /// /// Initializes a new instance of the class. @@ -192,7 +163,7 @@ namespace MathNet.Numerics.Statistics public double StandardDeviation { get; private set; } /// - /// Gets the unbiased estimator of the population skewness. + /// Gets the sample skewness. /// /// The sample skewness. /// Returns zero if is less than three. @@ -203,7 +174,7 @@ namespace MathNet.Numerics.Statistics public double Skewness { get; private set; } /// - /// Gets the unbiased estimator of the population excess kurtosis using the G_2 estimator. + /// Gets the sample kurtosis. /// /// The sample kurtosis. /// Returns zero if is less than four. @@ -233,17 +204,6 @@ namespace MathNet.Numerics.Statistics #endif public double Minimum { get; private set; } - - /// - /// Gets the total weight. When used with unweighted data, returns the number of samples. - /// - /// The total weight. - [DataMember(Order = 9)] -#if NET5_0_OR_GREATER - [JsonInclude] -#endif - public double TotalWeight { get; private set; } - /// /// Computes descriptive statistics from a stream of data values. /// @@ -261,17 +221,17 @@ namespace MathNet.Numerics.Statistics foreach (var xi in data) { double delta = xi - mean; - double scaleDelta = delta/++n; - double scaleDeltaSqr = scaleDelta*scaleDelta; - double tmpDelta = delta*(n - 1); + double scaleDelta = delta / ++n; + double scaleDeltaSqr = scaleDelta * scaleDelta; + double tmpDelta = delta * (n - 1); mean += scaleDelta; - kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3) - + 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness; + kurtosis += tmpDelta * scaleDelta * scaleDeltaSqr * (n * n - 3 * n + 3) + + 6 * scaleDeltaSqr * variance - 4 * scaleDelta * skewness; - skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance; - variance += tmpDelta*scaleDelta; + skewness += tmpDelta * scaleDeltaSqr * (n - 2) - 3 * scaleDelta * variance; + variance += tmpDelta * scaleDelta; if (minimum > xi) { @@ -306,17 +266,17 @@ namespace MathNet.Numerics.Statistics if (xi.HasValue) { double delta = xi.Value - mean; - double scaleDelta = delta/++n; - double scaleDeltaSqr = scaleDelta*scaleDelta; - double tmpDelta = delta*(n - 1); + double scaleDelta = delta / ++n; + double scaleDeltaSqr = scaleDelta * scaleDelta; + double tmpDelta = delta * (n - 1); mean += scaleDelta; - kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3) - + 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness; + kurtosis += tmpDelta * scaleDelta * scaleDeltaSqr * (n * n - 3 * n + 3) + + 6 * scaleDeltaSqr * variance - 4 * scaleDelta * skewness; - skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance; - variance += tmpDelta*scaleDelta; + skewness += tmpDelta * scaleDeltaSqr * (n - 2) - 3 * scaleDelta * variance; + variance += tmpDelta * scaleDelta; if (minimum > xi) { @@ -351,17 +311,17 @@ namespace MathNet.Numerics.Statistics { decimal xi = (decimal)x; decimal delta = xi - mean; - decimal scaleDelta = delta/++n; - decimal scaleDelta2 = scaleDelta*scaleDelta; - decimal tmpDelta = delta*(n - 1); + decimal scaleDelta = delta / ++n; + decimal scaleDelta2 = scaleDelta * scaleDelta; + decimal tmpDelta = delta * (n - 1); mean += scaleDelta; - kurtosis += tmpDelta*scaleDelta*scaleDelta2*(n*n - 3*n + 3) - + 6*scaleDelta2*variance - 4*scaleDelta*skewness; + kurtosis += tmpDelta * scaleDelta * scaleDelta2 * (n * n - 3 * n + 3) + + 6 * scaleDelta2 * variance - 4 * scaleDelta * skewness; - skewness += tmpDelta*scaleDelta2*(n - 2) - 3*scaleDelta*variance; - variance += tmpDelta*scaleDelta; + skewness += tmpDelta * scaleDelta2 * (n - 2) - 3 * scaleDelta * variance; + variance += tmpDelta * scaleDelta; if (minimum > xi) { @@ -397,17 +357,17 @@ namespace MathNet.Numerics.Statistics { decimal xi = (decimal)x.Value; decimal delta = xi - mean; - decimal scaleDelta = delta/++n; - decimal scaleDeltaSquared = scaleDelta*scaleDelta; - decimal tmpDelta = delta*(n - 1); + decimal scaleDelta = delta / ++n; + decimal scaleDeltaSquared = scaleDelta * scaleDelta; + decimal tmpDelta = delta * (n - 1); mean += scaleDelta; - kurtosis += tmpDelta*scaleDelta*scaleDeltaSquared*(n*n - 3*n + 3) - + 6*scaleDeltaSquared*variance - 4*scaleDelta*skewness; + kurtosis += tmpDelta * scaleDelta * scaleDeltaSquared * (n * n - 3 * n + 3) + + 6 * scaleDeltaSquared * variance - 4 * scaleDelta * skewness; - skewness += tmpDelta*scaleDeltaSquared*(n - 2) - 3*scaleDelta*variance; - variance += tmpDelta*scaleDelta; + skewness += tmpDelta * scaleDeltaSquared * (n - 2) - 3 * scaleDelta * variance; + variance += tmpDelta * scaleDelta; if (minimum > xi) { @@ -438,190 +398,6 @@ namespace MathNet.Numerics.Statistics { Mean = mean; Count = n; - TotalWeight = n; - - Minimum = double.NaN; - Maximum = double.NaN; - Variance = double.NaN; - StandardDeviation = double.NaN; - Skewness = double.NaN; - Kurtosis = double.NaN; - - if (n > 0) - { - Minimum = minimum; - Maximum = maximum; - - if (n > 1) - { - Variance = variance/(n - 1); - StandardDeviation = Math.Sqrt(Variance); - } - - if (Variance != 0) - { - if (n > 2) - { - Skewness = (double)n/((n - 1)*(n - 2))*(skewness/(Variance*StandardDeviation)); - } - - if (n > 3) - { - Kurtosis = ((double)n*n - 1)/((n - 2)*(n - 3)) - *(n*kurtosis/(variance*variance) - 3 + 6.0/(n + 1)); - } - } - } - } - - /// - /// Computes descriptive statistics from a stream of data values and weights. - /// - /// A sequence of datapoints. - void Compute(IEnumerable> data) - { - double mean = 0; - double variance = 0; - double skewness = 0; - double kurtosis = 0; - double minimum = double.PositiveInfinity; - double maximum = double.NegativeInfinity; - long n = 0; - double totalWeight = 0; - double den = 0; - - double V2 = 0; - double V3 = 0; - double V4 = 0; - - foreach (var (w, xi) in data) - { - if (w < 0) - { - throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample"); - } - else if (w > 0) - { - ++n; - double delta = xi - mean; - double prevWeight = totalWeight; - totalWeight += w; - V2 += w * w; - V3 += w * w * w; - V4 += w * w * w * w; - - den += w * (2.0 * prevWeight - den) / totalWeight; - - double scaleDelta = delta * w / totalWeight; - double scaleDeltaSqr = scaleDelta * scaleDelta; - double tmpDelta = delta * scaleDelta * prevWeight; - double r = prevWeight / w; - - mean += scaleDelta; - - kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0) - + 6.0 * scaleDeltaSqr * variance - 4.0 * scaleDelta * skewness; - - skewness += tmpDelta * scaleDelta * (r - 1.0) - 3.0 * scaleDelta * variance; - variance += tmpDelta; - - if (minimum > xi) - { - minimum = xi; - } - - if (maximum < xi) - { - maximum = xi; - } - } - } - - SetStatisticsWeighted(mean, variance, skewness, kurtosis, minimum, maximum, n, totalWeight, den, V2, V3, V4); - } - - /// - /// Computes descriptive statistics from a stream of data values and weights. - /// - /// A sequence of datapoints. - void ComputeDecimal(IEnumerable> data) - { - decimal mean = 0; - decimal variance = 0; - decimal skewness = 0; - decimal kurtosis = 0; - decimal minimum = decimal.MaxValue; - decimal maximum = decimal.MinValue; - decimal totalWeight = 0; - long n = 0; - decimal den = 0; - - decimal V2 = 0; - decimal V3 = 0; - decimal V4 = 0; - - foreach (var (w, x) in data) - { - if (w < 0) - { - throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample"); - } - else if (w > 0) - { - - decimal xi = (decimal)x; - decimal decW = (decimal)w; - ++n; - decimal delta = xi - mean; - decimal prevWeight = totalWeight; - totalWeight += decW; - V2 += decW * decW; - V3 += decW * decW * decW; - V4 += decW * decW * decW * decW; - den += decW * (2.0m * prevWeight - den) / totalWeight; - decimal scaleDelta = delta * decW / totalWeight; - decimal scaleDeltaSqr = scaleDelta * scaleDelta; - decimal tmpDelta = delta * scaleDelta * prevWeight; - decimal r = prevWeight / decW; - - mean += scaleDelta; - - kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0m) - + 6.0m * scaleDeltaSqr * variance - 4.0m * scaleDelta * skewness; - - skewness += tmpDelta * scaleDelta * (r - 1.0m) - 3.0m * scaleDelta * variance; - variance += tmpDelta; - - if (minimum > xi) - { - minimum = xi; - } - - if (maximum < xi) - { - maximum = xi; - } - } - } - - SetStatisticsWeighted((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n, (double)totalWeight, (double)den, (double)V2, (double)V3, (double)V4); - } - - /// - /// Internal use. Method use for setting the statistics. - /// - /// For setting Mean. - /// For setting Variance. - /// For setting Skewness. - /// For setting Kurtosis. - /// For setting Minimum. - /// For setting Maximum. - /// For setting Count. - void SetStatisticsWeighted(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, long n, double w1, double den, double w2, double w3, double w4) - { - Mean = mean; - Count = n; - TotalWeight = w1; Minimum = double.NaN; Maximum = double.NaN; @@ -637,7 +413,7 @@ namespace MathNet.Numerics.Statistics if (n > 1) { - Variance = variance / den; + Variance = variance / (n - 1); StandardDeviation = Math.Sqrt(Variance); } @@ -645,19 +421,13 @@ namespace MathNet.Numerics.Statistics { if (n > 2) { - var skewDen = (w1 * (w1 * w1 - 3.0 * w2) + 2.0 * w3) / (w1 * w1); - Skewness = skewness / (skewDen * Variance * StandardDeviation); + Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation)); } if (n > 3) { - double p2 = w1 * w1; - double p4 = p2 * p2; - double w2p2 = w2 * w2; - double poly = p4 - 6.0 * p2 * w2 + 8.0 * w1 * w3 + 3.0 * w2p2 - 6.0 * w4; - double a = p4 - 4.0 * w1 * w3 + 3.0 * w2p2; - double b = 3.0 * (p4 - 2.0 * p2 * w2 + 4.0 * w1 * w3 - 3.0 * w2p2); - Kurtosis = (a * w1 * kurtosis / (variance * variance) - b) * (den / (w1 * poly)); + Kurtosis = ((double)n * n - 1) / ((n - 2) * (n - 3)) + * (n * kurtosis / (variance * variance) - 3 + 6.0 / (n + 1)); } } } diff --git a/src/Numerics/Statistics/RunningWeightedStatistics.cs b/src/Numerics/Statistics/RunningWeightedStatistics.cs index 306fdf45..932066a5 100644 --- a/src/Numerics/Statistics/RunningWeightedStatistics.cs +++ b/src/Numerics/Statistics/RunningWeightedStatistics.cs @@ -37,8 +37,8 @@ using System.Runtime.Serialization; namespace MathNet.Numerics.Statistics { /// - /// Running statistics accumulator, allows updating by adding values - /// or by combining two accumulators. + /// Running weighted statistics accumulator, allows updating by adding values + /// or by combining two accumulators. Weights are reliability weights, not frequency weights. /// /// /// This type declares a DataContract for out of the box ephemeral serialization @@ -144,7 +144,7 @@ namespace MathNet.Numerics.Statistics /// /// Estimates the unbiased population variance from the provided samples. - /// On a dataset of size N will use an N-1 normalizer (Bessel's correction). + /// Will use the Bessel correction for reliability weighting. /// Returns NaN if data has less than two entries or if any entry is NaN. /// public double Variance => _n < 2 ? double.NaN : _m2 / _den; @@ -158,7 +158,7 @@ namespace MathNet.Numerics.Statistics /// /// Estimates the unbiased population standard deviation from the provided samples. - /// On a dataset of size N will use an N-1 normalizer (Bessel's correction). + /// Will use the Bessel correction for reliability weighting. /// Returns NaN if data has less than two entries or if any entry is NaN. /// public double StandardDeviation => _n < 2 ? double.NaN : Math.Sqrt(_m2 / _den); @@ -172,7 +172,7 @@ namespace MathNet.Numerics.Statistics /// /// Estimates the unbiased population skewness from the provided samples. - /// Uses a normalizer (Bessel's correction; type 2). + /// Will use the Bessel correction for reliability weighting. /// Returns NaN if data has less than three entries or if any entry is NaN. /// public double Skewness @@ -198,7 +198,7 @@ namespace MathNet.Numerics.Statistics /// /// Estimates the unbiased population excess kurtosis from the provided samples. - /// Uses a normalizer (Bessel's correction; type 2). + /// Will use the Bessel correction for reliability weighting. /// Returns NaN if data has less than four entries or if any entry is NaN. /// Equivalent formula for this for weighted distributions are unknown. /// @@ -233,6 +233,11 @@ namespace MathNet.Numerics.Statistics /// public double TotalWeight => _w1; + /// + /// The Kish's Effective Sample Size + /// + public double EffectiveSampleSize => _w2 / _w1; + /// /// Update the running statistics by adding another observed sample (in-place). /// diff --git a/src/Numerics/Statistics/WeightedDescriptiveStatistics.cs b/src/Numerics/Statistics/WeightedDescriptiveStatistics.cs new file mode 100644 index 00000000..d89c5579 --- /dev/null +++ b/src/Numerics/Statistics/WeightedDescriptiveStatistics.cs @@ -0,0 +1,411 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// +// Copyright (c) 2009-2015 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; +using System.Runtime.Serialization; +using System.Linq; +#if NET5_0_OR_GREATER +using System.Text.Json.Serialization; +#endif + +namespace MathNet.Numerics.Statistics +{ + /// + /// Computes the basic statistics of data set. The class meets the + /// NIST standard of accuracy for mean, variance, and standard deviation + /// (the only statistics they provide exact values for) and exceeds them + /// in increased accuracy mode. + /// Recommendation: consider to use RunningWeightedStatistics instead. + /// + /// + /// This type declares a DataContract for out of the box ephemeral serialization + /// with engines like DataContractSerializer, Protocol Buffers and FsPickler, + /// but does not guarantee any compatibility between versions. + /// It is not recommended to rely on this mechanism for durable persistence. + /// + [DataContract(Namespace = "urn:MathNet/Numerics")] + public class WeightedDescriptiveStatistics + { + + /// + /// Initializes a new instance of the class. + /// + /// The sample data. + /// + /// If set to true, increased accuracy mode used. + /// Increased accuracy mode uses types for internal calculations. + /// + /// + /// Don't use increased accuracy for data sets containing large values (in absolute value). + /// This may cause the calculations to overflow. + /// + public WeightedDescriptiveStatistics(IEnumerable> data, bool increasedAccuracy = false) + { + if (data == null) + { + throw new ArgumentNullException(nameof(data)); + } + + var data2 = data.Select(x => (x.Item1, x.Item2)); + if (increasedAccuracy) + ComputeDecimal(data2); + else + Compute(data2); + } + + /// + /// Initializes a new instance of the class. + /// + /// The sample data. + /// + /// If set to true, increased accuracy mode used. + /// Increased accuracy mode uses types for internal calculations. + /// + /// + /// Don't use increased accuracy for data sets containing large values (in absolute value). + /// This may cause the calculations to overflow. + /// + public WeightedDescriptiveStatistics(IEnumerable<(double, double)> data, bool increasedAccuracy = false) + { + if (data == null) + { + throw new ArgumentNullException(nameof(data)); + } + + if (increasedAccuracy) + ComputeDecimal(data); + else + Compute(data); + } +#if NET5_0_OR_GREATER + /// + /// Initializes a new instance of the class. + /// + /// + /// Used for Json serialization + /// + public DescriptiveStatistics() + { + } +#endif + + /// + /// Gets the size of the sample. + /// + /// The size of the sample. + [DataMember(Order = 1)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public long Count { get; private set; } + + /// + /// Gets the sample mean. + /// + /// The sample mean. + [DataMember(Order = 2)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double Mean { get; private set; } + + /// + /// Gets the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer). + /// + /// The sample variance. + [DataMember(Order = 3)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double Variance { get; private set; } + + /// + /// Gets the unbiased population standard deviation (on a dataset of size N will use an N-1 normalizer). + /// + /// The sample standard deviation. + [DataMember(Order = 4)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double StandardDeviation { get; private set; } + + /// + /// Gets the unbiased estimator of the population skewness. + /// + /// The sample skewness. + /// Returns zero if is less than three. + [DataMember(Order = 5)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double Skewness { get; private set; } + + /// + /// Gets the unbiased estimator of the population excess kurtosis using the G_2 estimator. + /// + /// The sample kurtosis. + /// Returns zero if is less than four. + [DataMember(Order = 6)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double Kurtosis { get; private set; } + + /// + /// Gets the maximum sample value. + /// + /// The maximum sample value. + [DataMember(Order = 7)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double Maximum { get; private set; } + + /// + /// Gets the minimum sample value. + /// + /// The minimum sample value. + [DataMember(Order = 8)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double Minimum { get; private set; } + + + /// + /// Gets the total weight. When used with unweighted data, returns the number of samples. + /// + /// The total weight. + [DataMember(Order = 9)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double TotalWeight { get; private set; } + + /// + /// The Kish's effective sample size https://en.wikipedia.org/wiki/Effective_sample_size + /// + /// The Kish's effective sample size. + [DataMember(Order = 10)] +#if NET5_0_OR_GREATER + [JsonInclude] +#endif + public double EffectiveSampleSize { get; private set; } + + /// + /// Computes descriptive statistics from a stream of data values and reliability weights. + /// + /// A sequence of datapoints. + void Compute(IEnumerable<(double, double)> data) + { + double mean = 0; + double variance = 0; + double skewness = 0; + double kurtosis = 0; + double minimum = double.PositiveInfinity; + double maximum = double.NegativeInfinity; + long n = 0; + double totalWeight = 0; + double den = 0; + + double V2 = 0; + double V3 = 0; + double V4 = 0; + + foreach (var (w, xi) in data) + { + if (w < 0) + { + throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample"); + } + else if (w > 0) + { + ++n; + double delta = xi - mean; + double prevWeight = totalWeight; + totalWeight += w; + V2 += w * w; + V3 += w * w * w; + V4 += w * w * w * w; + + den += w * (2.0 * prevWeight - den) / totalWeight; + + double scaleDelta = delta * w / totalWeight; + double scaleDeltaSqr = scaleDelta * scaleDelta; + double tmpDelta = delta * scaleDelta * prevWeight; + double r = prevWeight / w; + + mean += scaleDelta; + + kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0) + + 6.0 * scaleDeltaSqr * variance - 4.0 * scaleDelta * skewness; + + skewness += tmpDelta * scaleDelta * (r - 1.0) - 3.0 * scaleDelta * variance; + variance += tmpDelta; + + if (minimum > xi) + { + minimum = xi; + } + + if (maximum < xi) + { + maximum = xi; + } + } + } + + SetStatisticsWeighted(mean, variance, skewness, kurtosis, minimum, maximum, n, totalWeight, den, V2, V3, V4); + } + + /// + /// Computes descriptive statistics from a stream of data values and reliability weights. + /// + /// A sequence of datapoints. + void ComputeDecimal(IEnumerable<(double, double)> data) + { + decimal mean = 0; + decimal variance = 0; + decimal skewness = 0; + decimal kurtosis = 0; + decimal minimum = decimal.MaxValue; + decimal maximum = decimal.MinValue; + decimal totalWeight = 0; + long n = 0; + decimal den = 0; + + decimal V2 = 0; + decimal V3 = 0; + decimal V4 = 0; + + foreach (var (w, x) in data) + { + if (w < 0) + { + throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample"); + } + else if (w > 0) + { + + decimal xi = (decimal)x; + decimal decW = (decimal)w; + ++n; + decimal delta = xi - mean; + decimal prevWeight = totalWeight; + totalWeight += decW; + V2 += decW * decW; + V3 += decW * decW * decW; + V4 += decW * decW * decW * decW; + den += decW * (2.0m * prevWeight - den) / totalWeight; + decimal scaleDelta = delta * decW / totalWeight; + decimal scaleDeltaSqr = scaleDelta * scaleDelta; + decimal tmpDelta = delta * scaleDelta * prevWeight; + decimal r = prevWeight / decW; + + mean += scaleDelta; + + kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0m) + + 6.0m * scaleDeltaSqr * variance - 4.0m * scaleDelta * skewness; + + skewness += tmpDelta * scaleDelta * (r - 1.0m) - 3.0m * scaleDelta * variance; + variance += tmpDelta; + + if (minimum > xi) + { + minimum = xi; + } + + if (maximum < xi) + { + maximum = xi; + } + } + } + + SetStatisticsWeighted((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n, (double)totalWeight, (double)den, (double)V2, (double)V3, (double)V4); + } + + /// + /// Internal use. Method use for setting the statistics. + /// + /// For setting Mean. + /// For setting Variance. + /// For setting Skewness. + /// For setting Kurtosis. + /// For setting Minimum. + /// For setting Maximum. + /// For setting Count. + void SetStatisticsWeighted(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, long n, double w1, double den, double w2, double w3, double w4) + { + Mean = mean; + Count = n; + TotalWeight = w1; + EffectiveSampleSize = w1 * w1 / w2; + Minimum = double.NaN; + Maximum = double.NaN; + Variance = double.NaN; + StandardDeviation = double.NaN; + Skewness = double.NaN; + Kurtosis = double.NaN; + + if (n > 0) + { + Minimum = minimum; + Maximum = maximum; + + if (n > 1) + { + Variance = variance / den; + StandardDeviation = Math.Sqrt(Variance); + } + + if (Variance != 0) + { + if (n > 2) + { + var skewDen = (w1 * (w1 * w1 - 3.0 * w2) + 2.0 * w3) / (w1 * w1); + Skewness = skewness / (skewDen * Variance * StandardDeviation); + } + + if (n > 3) + { + double p2 = w1 * w1; + double p4 = p2 * p2; + double w2p2 = w2 * w2; + double poly = p4 - 6.0 * p2 * w2 + 8.0 * w1 * w3 + 3.0 * w2p2 - 6.0 * w4; + double a = p4 - 4.0 * w1 * w3 + 3.0 * w2p2; + double b = 3.0 * (p4 - 2.0 * p2 * w2 + 4.0 * w1 * w3 - 3.0 * w2p2); + Kurtosis = (a * w1 * kurtosis / (variance * variance) - b) * (den / (w1 * poly)); + } + } + } + } + } +}