Browse Source

Move weighted descriptive statistics to own class.

pull/865/head
richard-allen 5 years ago
parent
commit
08320b1132
  1. 239
      src/Numerics.Tests/StatisticsTests/DescriptiveStatisticsTests.cs
  2. 106
      src/Numerics.Tests/StatisticsTests/RunningWeightedStatisticsTests.cs
  3. 432
      src/Numerics.Tests/StatisticsTests/WeightedDescriptiveStatisticsTests.cs
  4. 298
      src/Numerics/Statistics/DescriptiveStatistics.cs
  5. 17
      src/Numerics/Statistics/RunningWeightedStatistics.cs
  6. 411
      src/Numerics/Statistics/WeightedDescriptiveStatistics.cs

239
src/Numerics.Tests/StatisticsTests/DescriptiveStatisticsTests.cs

@ -29,7 +29,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
#if NET5_0_OR_GREATER
using System.Text.Json;
using System.Text.Json.Serialization;
@ -76,10 +75,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
{
const IEnumerable<double> Data = null;
const IEnumerable<double?> NullableData = null;
const IEnumerable<Tuple<double, double>> WeightedData = null;
Assert.That(() => new DescriptiveStatistics(WeightedData), Throws.TypeOf<ArgumentNullException>());
Assert.That(() => new DescriptiveStatistics(WeightedData, true), Throws.TypeOf<ArgumentNullException>());
Assert.That(() => new DescriptiveStatistics(Data), Throws.TypeOf<ArgumentNullException>());
Assert.That(() => new DescriptiveStatistics(Data, true), Throws.TypeOf<ArgumentNullException>());
Assert.That(() => new DescriptiveStatistics(NullableData), Throws.TypeOf<ArgumentNullException>());
@ -118,7 +114,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
@ -150,7 +145,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
@ -183,7 +177,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
@ -216,7 +209,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
@ -248,7 +240,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
@ -281,205 +272,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> Double.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
[TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)]
public void IEnumerableTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)));
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> Double high accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), true);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> double low accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new DescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), false);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> <c>Nullable</c> double.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableZeroWeightTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)));
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> <c>Nullable</c> double high accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableZeroWeightTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), true);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> <c>Nullable</c> Double Low Accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableZeroWeightTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new DescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), false);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
[Test]
@ -504,32 +296,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
var stats4 = new DescriptiveStatistics(new[] { 1.0, 2.0, -3.0, -4.0 });
Assert.That(stats4.Skewness, Is.Not.NaN);
Assert.That(stats4.Kurtosis, Is.Not.NaN);
var stats5 = new DescriptiveStatistics(new Tuple<double, double>[0]);
Assert.That(stats5.Skewness, Is.NaN);
Assert.That(stats5.Kurtosis, Is.NaN);
var stats6 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0) });
Assert.That(stats6.Skewness, Is.NaN);
Assert.That(stats6.Kurtosis, Is.NaN);
var stats7 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0) });
Assert.That(stats7.Skewness, Is.NaN);
Assert.That(stats7.Kurtosis, Is.NaN);
var stats8 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0) });
Assert.That(stats8.Skewness, Is.Not.NaN);
Assert.That(stats8.Kurtosis, Is.NaN);
var stats9 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0), Tuple.Create(1.0, -4.0) });
Assert.That(stats9.Skewness, Is.Not.NaN);
Assert.That(stats9.Kurtosis, Is.Not.NaN);
}
[Test]
public void NegativeWeightsThrow()
{
Assert.That(() => new DescriptiveStatistics(new[] { Tuple.Create(-1.0, 1.0) }), Throws.TypeOf<ArgumentOutOfRangeException>());
}
[Test]
@ -538,10 +304,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
var stats = new DescriptiveStatistics(new[] { 2.0, 2.0, 2.0, 2.0 });
Assert.That(stats.Skewness, Is.NaN);
Assert.That(stats.Kurtosis, Is.NaN);
var stats2 = new DescriptiveStatistics(new[] { Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0) });
Assert.That(stats2.Skewness, Is.NaN);
Assert.That(stats2.Kurtosis, Is.NaN);
}
#if NET5_0_OR_GREATER
@ -583,7 +345,6 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
#endif
}

106
src/Numerics.Tests/StatisticsTests/RunningWeightedStatisticsTests.cs

@ -88,7 +88,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
public void ConsistentWithNist(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new RunningWeightedStatistics(data.Data.Select(x => System.Tuple.Create(1.0,x)));
var stats = new RunningWeightedStatistics(data.Data.Select(x => System.Tuple.Create(1.0, x)));
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
@ -139,19 +139,19 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
public void NegativeWeightsThrow()
{
Assert.That(() => new RunningWeightedStatistics(new[] { System.Tuple.Create(-1.0, 1.0) }), Throws.TypeOf<System.ArgumentOutOfRangeException>());
var stats0 = new RunningWeightedStatistics(new System.Tuple<double,double>[0]);
Assert.That(() =>stats0.Push(-1.0, 1.0), Throws.TypeOf<System.ArgumentOutOfRangeException>());
var stats0 = new RunningWeightedStatistics(new System.Tuple<double, double>[0]);
Assert.That(() => stats0.Push(-1.0, 1.0), Throws.TypeOf<System.ArgumentOutOfRangeException>());
Assert.That(() => stats0.PushRange(new[] { System.Tuple.Create(-1.0, 1.0) }), Throws.TypeOf<System.ArgumentOutOfRangeException>());
}
[Test]
public void ShortSequences()
{
var stats0 = new RunningWeightedStatistics(new System.Tuple<double,double>[0]);
var stats0 = new RunningWeightedStatistics(new System.Tuple<double, double>[0]);
Assert.That(stats0.Skewness, Is.NaN);
Assert.That(stats0.Kurtosis, Is.NaN);
var stats1 = new RunningWeightedStatistics(new [] { System.Tuple.Create(1.0,1.0) });
var stats1 = new RunningWeightedStatistics(new[] { System.Tuple.Create(1.0, 1.0) });
Assert.That(stats1.Skewness, Is.NaN);
Assert.That(stats1.Kurtosis, Is.NaN);
@ -159,7 +159,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.That(stats2.Skewness, Is.NaN);
Assert.That(stats2.Kurtosis, Is.NaN);
var stats3 = new RunningWeightedStatistics(new[] { System.Tuple.Create(1.0, 1.0), System.Tuple.Create(1.0, 2.0), System.Tuple.Create(1.0, -3.0)});
var stats3 = new RunningWeightedStatistics(new[] { System.Tuple.Create(1.0, 1.0), System.Tuple.Create(1.0, 2.0), System.Tuple.Create(1.0, -3.0) });
Assert.That(stats3.Skewness, Is.Not.NaN);
Assert.That(stats3.Kurtosis, Is.NaN);
@ -177,7 +177,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
}
[Test]
public void Combine()
public void CombineUnweighted()
{
var rnd = new SystemRandomSource(10);
var a = Generate.Random(200, new Erlang(2, 0.2, rnd)).Select(datum => System.Tuple.Create(1.0, datum)).ToArray();
@ -193,9 +193,9 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
y.PushRange(b);
y.PushRange(c);
var za = new RunningWeightedStatistics(a);
var zb = new RunningWeightedStatistics(b);
var zc = new RunningWeightedStatistics(c);
var za = new RunningWeightedStatistics(a);
var zb = new RunningWeightedStatistics(b);
var zc = new RunningWeightedStatistics(c);
var z = za + zb + zc;
Assert.That(x.Mean, Is.EqualTo(direct.Mean()).Within(1e-12), "Mean Reference");
@ -234,5 +234,91 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.That(y.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis y");
Assert.That(z.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis z");
}
[Test]
/// Tests that combination of data via + / Combine is consistent with the incremental approach.
public void CombineWeighted()
{
var rnd = new SystemRandomSource(10);
var wa = Generate.Random(200, new ContinuousUniform(1.0, 10.0));
var a = Generate.Random(200, new Erlang(2, 0.2, rnd)).Select((datum, i) => System.Tuple.Create(wa[i], datum)).ToArray();
var wb = Generate.Random(100, new ContinuousUniform(1.0, 10.0));
var b = Generate.Random(100, new Beta(1.2, 1.4, rnd)).Select((datum, i) => System.Tuple.Create(wb[i], datum)).ToArray();
var wc = Generate.Random(150, new ContinuousUniform(1.0, 10.0));
var c = Generate.Random(150, new Rayleigh(0.8, rnd)).Select((datum, i) => System.Tuple.Create(wc[i], datum)).ToArray();
var d = a.Concat(b).Concat(c);
var x = new RunningWeightedStatistics(d);
var y = new RunningWeightedStatistics(a);
y.PushRange(b);
y.PushRange(c);
var za = new RunningWeightedStatistics(a);
var zb = new RunningWeightedStatistics(b);
var zc = new RunningWeightedStatistics(c);
var z = za + zb + zc;
Assert.That(y.Mean, Is.EqualTo(x.Mean).Within(1e-12), "Mean y");
Assert.That(z.Mean, Is.EqualTo(x.Mean).Within(1e-12), "Mean z");
Assert.That(y.Variance, Is.EqualTo(x.Variance).Within(1e-12), "Variance y");
Assert.That(z.Variance, Is.EqualTo(x.Variance).Within(1e-12), "Variance z");
Assert.That(y.PopulationVariance, Is.EqualTo(x.PopulationVariance).Within(1e-12), "PopulationVariance y");
Assert.That(z.PopulationVariance, Is.EqualTo(x.PopulationVariance).Within(1e-12), "PopulationVariance z");
Assert.That(y.StandardDeviation, Is.EqualTo(x.StandardDeviation).Within(1e-12), "StandardDeviation y");
Assert.That(z.StandardDeviation, Is.EqualTo(x.StandardDeviation).Within(1e-12), "StandardDeviation z");
Assert.That(y.PopulationStandardDeviation, Is.EqualTo(x.PopulationStandardDeviation).Within(1e-12), "PopulationStandardDeviation y");
Assert.That(z.PopulationStandardDeviation, Is.EqualTo(x.PopulationStandardDeviation).Within(1e-12), "PopulationStandardDeviation z");
Assert.That(y.Skewness, Is.EqualTo(x.Skewness).Within(1e-12), "Skewness y");
Assert.That(z.Skewness, Is.EqualTo(x.Skewness).Within(1e-12), "Skewness z");
Assert.That(y.PopulationSkewness, Is.EqualTo(x.PopulationSkewness).Within(1e-12), "PopulationSkewness y");
Assert.That(z.PopulationSkewness, Is.EqualTo(x.PopulationSkewness).Within(1e-12), "PopulationSkewness z");
Assert.That(y.Kurtosis, Is.EqualTo(x.Kurtosis).Within(1e-12), "Kurtosis y");
Assert.That(z.Kurtosis, Is.EqualTo(x.Kurtosis).Within(1e-12), "Kurtosis z");
Assert.That(y.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis y");
Assert.That(z.PopulationKurtosis, Is.EqualTo(x.PopulationKurtosis).Within(1e-12), "PopulationKurtosis z");
}
[TestCase("lottery")]
[TestCase("lew")]
[TestCase("mavro")]
[TestCase("michelso")]
[TestCase("numacc1")]
[TestCase("meixner")]
/// Generates samples with weightings that are integral and compares that to the unweighted statistics result. Doesn't correspond with the
/// higher order sample statistics because our weightings represent reliability weights, *not* frequency weights, and the Bessel correction is
/// calculated appropriately - so don't let the construction of the test mislead you.
public void ConsistentWithUnweighted (string dataSet)
{
var data = _data[dataSet].Data.ToArray();
var gen = new DiscreteUniform(1, 5);
var weights = new int[data.Length];
gen.Samples(weights);
var stats = new RunningWeightedStatistics( data.Select((x, i) => System.Tuple.Create((double)weights[i], x)) );
var stats2 = new RunningStatistics();
for (int i = 0; i < data.Length; ++i)
for(int j = 0; j < weights[i] ; ++j)
stats2.Push(data[i]);
var sumWeights = weights.Sum();
Assert.That(stats.TotalWeight, Is.EqualTo(sumWeights), "TotalWeight");
Assert.That(stats.Count, Is.EqualTo(weights.Length), "Count");
Assert.That(stats2.Minimum, Is.EqualTo(stats.Minimum), "Minimum");
Assert.That(stats2.Maximum, Is.EqualTo(stats.Maximum), "Maximum");
Assert.That(stats2.Mean, Is.EqualTo(stats.Mean).Within(1e-8), "Mean");
Assert.That(stats2.PopulationVariance, Is.EqualTo(stats.PopulationVariance).Within(1e-9), "PopulationVariance");
Assert.That(stats2.PopulationStandardDeviation, Is.EqualTo(stats.PopulationStandardDeviation).Within(1e-9), "PopulationStandardDeviation");
Assert.That(stats2.PopulationSkewness, Is.EqualTo(stats.PopulationSkewness).Within(1e-8), "PopulationSkewness");
Assert.That(stats2.PopulationKurtosis, Is.EqualTo(stats.PopulationKurtosis).Within(1e-8), "PopulationKurtosis");
}
}
}

432
src/Numerics.Tests/StatisticsTests/WeightedDescriptiveStatisticsTests.cs

@ -0,0 +1,432 @@
// <copyright file="DescriptiveStatisticsTests.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2016 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
using System;
using System.Collections.Generic;
using System.Linq;
#if NET5_0_OR_GREATER
using System.Text.Json;
using System.Text.Json.Serialization;
#endif
using NUnit.Framework;
using MathNet.Numerics.Statistics;
namespace MathNet.Numerics.UnitTests.StatisticsTests
{
/// <summary>
/// Descriptive statistics tests.
/// </summary>
/// <remarks>NOTE: this class is not included into Silverlight version, because it uses data from local files.
/// In Silverlight access to local files is forbidden, except several cases.</remarks>
[TestFixture, Category("Statistics")]
public class WeightedDescriptiveStatisticsTests
{
/// <summary>
/// Statistics data.
/// </summary>
readonly IDictionary<string, StatTestData> _data = new Dictionary<string, StatTestData>();
/// <summary>
/// Initializes a new instance of the DescriptiveStatisticsTests class.
/// </summary>
public WeightedDescriptiveStatisticsTests()
{
_data.Add("lottery", new StatTestData("NIST.Lottery.dat"));
_data.Add("lew", new StatTestData("NIST.Lew.dat"));
_data.Add("mavro", new StatTestData("NIST.Mavro.dat"));
_data.Add("michelso", new StatTestData("NIST.Michelso.dat"));
_data.Add("numacc1", new StatTestData("NIST.NumAcc1.dat"));
_data.Add("numacc2", new StatTestData("NIST.NumAcc2.dat"));
_data.Add("numacc3", new StatTestData("NIST.NumAcc3.dat"));
_data.Add("numacc4", new StatTestData("NIST.NumAcc4.dat"));
_data.Add("meixner", new StatTestData("NIST.Meixner.dat"));
}
/// <summary>
/// Constructor with <c>null</c> throws <c>ArgumentNullException</c>.
/// </summary>
[Test]
public void ConstructorThrowArgumentNullException()
{
const IEnumerable<Tuple<double, double>> WeightedData = null;
const IEnumerable<(double, double)> WeightedData2 = null;
Assert.That(() => new WeightedDescriptiveStatistics(WeightedData), Throws.TypeOf<ArgumentNullException>());
Assert.That(() => new WeightedDescriptiveStatistics(WeightedData, true), Throws.TypeOf<ArgumentNullException>());
Assert.That(() => new WeightedDescriptiveStatistics(WeightedData2), Throws.TypeOf<ArgumentNullException>());
Assert.That(() => new WeightedDescriptiveStatistics(WeightedData2, true), Throws.TypeOf<ArgumentNullException>());
}
/// <summary>
/// <c>IEnumerable</c> Double.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
[TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)]
public void IEnumerableTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)));
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> Double.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
[TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)]
public void IEnumerableValueTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => (1.0, x)));
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> Double high accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), true);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> Double high accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableValueTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => (1.0, x)), true);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> double low accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.Data.Select(x => Tuple.Create(1.0, x)), false);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> <c>Nullable</c> double.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableZeroWeightTuple(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)));
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> <c>Nullable</c> double high accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableZeroWeightTupleHighAccuracy(string dataSet, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), true);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, 14);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 9);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 9);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
/// <summary>
/// <c>IEnumerable</c> <c>Nullable</c> Double Low Accuracy.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 14, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 14, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
public void IEnumerableZeroWeightTupleLowAccuracy(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var stats = new WeightedDescriptiveStatistics(data.DataWithNulls.Select(x => x.HasValue ? Tuple.Create(1.0, x.Value) : Tuple.Create(0.0, 3.14159)), false);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 14);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 7);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 7);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
[Test]
public void ShortSequences()
{
var stats5 = new WeightedDescriptiveStatistics(new Tuple<double, double>[0]);
Assert.That(stats5.Skewness, Is.NaN);
Assert.That(stats5.Kurtosis, Is.NaN);
var stats6 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0) });
Assert.That(stats6.Skewness, Is.NaN);
Assert.That(stats6.Kurtosis, Is.NaN);
var stats7 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0) });
Assert.That(stats7.Skewness, Is.NaN);
Assert.That(stats7.Kurtosis, Is.NaN);
var stats8 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0) });
Assert.That(stats8.Skewness, Is.Not.NaN);
Assert.That(stats8.Kurtosis, Is.NaN);
var stats9 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 1.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, -3.0), Tuple.Create(1.0, -4.0) });
Assert.That(stats9.Skewness, Is.Not.NaN);
Assert.That(stats9.Kurtosis, Is.Not.NaN);
}
[Test]
public void NegativeWeightsThrow()
{
Assert.That(() => new WeightedDescriptiveStatistics(new[] { Tuple.Create(-1.0, 1.0) }), Throws.TypeOf<ArgumentOutOfRangeException>());
}
[Test]
public void ZeroVarianceSequence()
{
var stats2 = new WeightedDescriptiveStatistics(new[] { Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0), Tuple.Create(1.0, 2.0) });
Assert.That(stats2.Skewness, Is.NaN);
Assert.That(stats2.Kurtosis, Is.NaN);
}
#if NET5_0_OR_GREATER
/// <summary>
/// <c>IEnumerable</c> Double.
/// </summary>
/// <param name="dataSet">Dataset name.</param>
/// <param name="digits">Digits count.</param>
/// <param name="skewness">Skewness value.</param>
/// <param name="kurtosis">Kurtosis value.</param>
/// <param name="median">Median value.</param>
/// <param name="min">Min value.</param>
/// <param name="max">Max value.</param>
/// <param name="count">Count value.</param>
[TestCase("lottery", 12, -0.09333165310779, -1.19256091074856, 522.5, 4, 999, 218)]
[TestCase("lew", 12, -0.050606638756334, -1.49604979214447, -162, -579, 300, 200)]
[TestCase("mavro", 11, 0.64492948110824, -0.82052379677456, 2.0018, 2.0013, 2.0027, 50)]
[TestCase("michelso", 11, -0.0185388637725746, 0.33968459842539, 299.85, 299.62, 300.07, 100)]
[TestCase("numacc1", 15, 0, double.NaN, 10000002, 10000001, 10000003, 3)]
[TestCase("numacc2", 13, 0, -2.003003003003, 1.2, 1.1, 1.3, 1001)]
[TestCase("numacc3", 9, 0, -2.003003003003, 1000000.2, 1000000.1, 1000000.3, 1001)]
[TestCase("numacc4", 7, 0, -2.00300300299913, 10000000.2, 10000000.1, 10000000.3, 1001)]
[TestCase("meixner", 8, -0.016649617280859657, 0.8171318629552635, -0.002042931016531602, -4.825626912281697, 5.3018298664184913, 10000)]
public void JsonDeserializationTest(string dataSet, int digits, double skewness, double kurtosis, double median, double min, double max, int count)
{
var data = _data[dataSet];
var serialize = new WeightedDescriptiveStatistics(data.Data.Select(x => (1.0, x)), false);
var jsonSerializerOptions = new JsonSerializerOptions
{
NumberHandling = JsonNumberHandling.AllowNamedFloatingPointLiterals,
};
var json = JsonSerializer.Serialize(serialize, jsonSerializerOptions);
var stats = JsonSerializer.Deserialize<DescriptiveStatistics>(json, jsonSerializerOptions);
Assert.NotNull(stats);
AssertHelpers.AlmostEqualRelative(data.Mean, stats.Mean, 10);
AssertHelpers.AlmostEqualRelative(data.StandardDeviation, stats.StandardDeviation, digits);
AssertHelpers.AlmostEqualRelative(skewness, stats.Skewness, 8);
AssertHelpers.AlmostEqualRelative(kurtosis, stats.Kurtosis, 8);
Assert.AreEqual(stats.Minimum, min);
Assert.AreEqual(stats.Maximum, max);
Assert.AreEqual(stats.Count, count);
Assert.AreEqual(stats.TotalWeight, count);
}
#endif
}
}

298
src/Numerics/Statistics/DescriptiveStatistics.cs

@ -110,35 +110,6 @@ namespace MathNet.Numerics.Statistics
}
}
/// <summary>
/// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class.
/// </summary>
/// <param name="data">The sample data.</param>
/// <param name="increasedAccuracy">
/// If set to <c>true</c>, increased accuracy mode used.
/// Increased accuracy mode uses <see cref="decimal"/> types for internal calculations.
/// </param>
/// <remarks>
/// Don't use increased accuracy for data sets containing large values (in absolute value).
/// This may cause the calculations to overflow.
/// </remarks>
public DescriptiveStatistics(IEnumerable<Tuple<double, double>> data, bool increasedAccuracy = false)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
if (increasedAccuracy)
{
ComputeDecimal(data);
}
else
{
Compute(data);
}
}
#if NET5_0_OR_GREATER
/// <summary>
/// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class.
@ -192,7 +163,7 @@ namespace MathNet.Numerics.Statistics
public double StandardDeviation { get; private set; }
/// <summary>
/// Gets the unbiased estimator of the population skewness.
/// Gets the sample skewness.
/// </summary>
/// <value>The sample skewness.</value>
/// <remarks>Returns zero if <see cref="Count"/> is less than three. </remarks>
@ -203,7 +174,7 @@ namespace MathNet.Numerics.Statistics
public double Skewness { get; private set; }
/// <summary>
/// Gets the unbiased estimator of the population excess kurtosis using the G_2 estimator.
/// Gets the sample kurtosis.
/// </summary>
/// <value>The sample kurtosis.</value>
/// <remarks>Returns zero if <see cref="Count"/> is less than four. </remarks>
@ -233,17 +204,6 @@ namespace MathNet.Numerics.Statistics
#endif
public double Minimum { get; private set; }
/// <summary>
/// Gets the total weight. When used with unweighted data, returns the number of samples.
/// </summary>
/// <value>The total weight.</value>
[DataMember(Order = 9)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double TotalWeight { get; private set; }
/// <summary>
/// Computes descriptive statistics from a stream of data values.
/// </summary>
@ -261,17 +221,17 @@ namespace MathNet.Numerics.Statistics
foreach (var xi in data)
{
double delta = xi - mean;
double scaleDelta = delta/++n;
double scaleDeltaSqr = scaleDelta*scaleDelta;
double tmpDelta = delta*(n - 1);
double scaleDelta = delta / ++n;
double scaleDeltaSqr = scaleDelta * scaleDelta;
double tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3)
+ 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness;
kurtosis += tmpDelta * scaleDelta * scaleDeltaSqr * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSqr * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
skewness += tmpDelta * scaleDeltaSqr * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi)
{
@ -306,17 +266,17 @@ namespace MathNet.Numerics.Statistics
if (xi.HasValue)
{
double delta = xi.Value - mean;
double scaleDelta = delta/++n;
double scaleDeltaSqr = scaleDelta*scaleDelta;
double tmpDelta = delta*(n - 1);
double scaleDelta = delta / ++n;
double scaleDeltaSqr = scaleDelta * scaleDelta;
double tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDeltaSqr*(n*n - 3*n + 3)
+ 6*scaleDeltaSqr*variance - 4*scaleDelta*skewness;
kurtosis += tmpDelta * scaleDelta * scaleDeltaSqr * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSqr * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta*scaleDeltaSqr*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
skewness += tmpDelta * scaleDeltaSqr * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi)
{
@ -351,17 +311,17 @@ namespace MathNet.Numerics.Statistics
{
decimal xi = (decimal)x;
decimal delta = xi - mean;
decimal scaleDelta = delta/++n;
decimal scaleDelta2 = scaleDelta*scaleDelta;
decimal tmpDelta = delta*(n - 1);
decimal scaleDelta = delta / ++n;
decimal scaleDelta2 = scaleDelta * scaleDelta;
decimal tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDelta2*(n*n - 3*n + 3)
+ 6*scaleDelta2*variance - 4*scaleDelta*skewness;
kurtosis += tmpDelta * scaleDelta * scaleDelta2 * (n * n - 3 * n + 3)
+ 6 * scaleDelta2 * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta*scaleDelta2*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
skewness += tmpDelta * scaleDelta2 * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi)
{
@ -397,17 +357,17 @@ namespace MathNet.Numerics.Statistics
{
decimal xi = (decimal)x.Value;
decimal delta = xi - mean;
decimal scaleDelta = delta/++n;
decimal scaleDeltaSquared = scaleDelta*scaleDelta;
decimal tmpDelta = delta*(n - 1);
decimal scaleDelta = delta / ++n;
decimal scaleDeltaSquared = scaleDelta * scaleDelta;
decimal tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta*scaleDelta*scaleDeltaSquared*(n*n - 3*n + 3)
+ 6*scaleDeltaSquared*variance - 4*scaleDelta*skewness;
kurtosis += tmpDelta * scaleDelta * scaleDeltaSquared * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSquared * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta*scaleDeltaSquared*(n - 2) - 3*scaleDelta*variance;
variance += tmpDelta*scaleDelta;
skewness += tmpDelta * scaleDeltaSquared * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi)
{
@ -438,190 +398,6 @@ namespace MathNet.Numerics.Statistics
{
Mean = mean;
Count = n;
TotalWeight = n;
Minimum = double.NaN;
Maximum = double.NaN;
Variance = double.NaN;
StandardDeviation = double.NaN;
Skewness = double.NaN;
Kurtosis = double.NaN;
if (n > 0)
{
Minimum = minimum;
Maximum = maximum;
if (n > 1)
{
Variance = variance/(n - 1);
StandardDeviation = Math.Sqrt(Variance);
}
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n/((n - 1)*(n - 2))*(skewness/(Variance*StandardDeviation));
}
if (n > 3)
{
Kurtosis = ((double)n*n - 1)/((n - 2)*(n - 3))
*(n*kurtosis/(variance*variance) - 3 + 6.0/(n + 1));
}
}
}
}
/// <summary>
/// Computes descriptive statistics from a stream of data values and weights.
/// </summary>
/// <param name="data">A sequence of datapoints.</param>
void Compute(IEnumerable<Tuple<double, double>> data)
{
double mean = 0;
double variance = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = double.PositiveInfinity;
double maximum = double.NegativeInfinity;
long n = 0;
double totalWeight = 0;
double den = 0;
double V2 = 0;
double V3 = 0;
double V4 = 0;
foreach (var (w, xi) in data)
{
if (w < 0)
{
throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample");
}
else if (w > 0)
{
++n;
double delta = xi - mean;
double prevWeight = totalWeight;
totalWeight += w;
V2 += w * w;
V3 += w * w * w;
V4 += w * w * w * w;
den += w * (2.0 * prevWeight - den) / totalWeight;
double scaleDelta = delta * w / totalWeight;
double scaleDeltaSqr = scaleDelta * scaleDelta;
double tmpDelta = delta * scaleDelta * prevWeight;
double r = prevWeight / w;
mean += scaleDelta;
kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0)
+ 6.0 * scaleDeltaSqr * variance - 4.0 * scaleDelta * skewness;
skewness += tmpDelta * scaleDelta * (r - 1.0) - 3.0 * scaleDelta * variance;
variance += tmpDelta;
if (minimum > xi)
{
minimum = xi;
}
if (maximum < xi)
{
maximum = xi;
}
}
}
SetStatisticsWeighted(mean, variance, skewness, kurtosis, minimum, maximum, n, totalWeight, den, V2, V3, V4);
}
/// <summary>
/// Computes descriptive statistics from a stream of data values and weights.
/// </summary>
/// <param name="data">A sequence of datapoints.</param>
void ComputeDecimal(IEnumerable<Tuple<double, double>> data)
{
decimal mean = 0;
decimal variance = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = decimal.MaxValue;
decimal maximum = decimal.MinValue;
decimal totalWeight = 0;
long n = 0;
decimal den = 0;
decimal V2 = 0;
decimal V3 = 0;
decimal V4 = 0;
foreach (var (w, x) in data)
{
if (w < 0)
{
throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample");
}
else if (w > 0)
{
decimal xi = (decimal)x;
decimal decW = (decimal)w;
++n;
decimal delta = xi - mean;
decimal prevWeight = totalWeight;
totalWeight += decW;
V2 += decW * decW;
V3 += decW * decW * decW;
V4 += decW * decW * decW * decW;
den += decW * (2.0m * prevWeight - den) / totalWeight;
decimal scaleDelta = delta * decW / totalWeight;
decimal scaleDeltaSqr = scaleDelta * scaleDelta;
decimal tmpDelta = delta * scaleDelta * prevWeight;
decimal r = prevWeight / decW;
mean += scaleDelta;
kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0m)
+ 6.0m * scaleDeltaSqr * variance - 4.0m * scaleDelta * skewness;
skewness += tmpDelta * scaleDelta * (r - 1.0m) - 3.0m * scaleDelta * variance;
variance += tmpDelta;
if (minimum > xi)
{
minimum = xi;
}
if (maximum < xi)
{
maximum = xi;
}
}
}
SetStatisticsWeighted((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n, (double)totalWeight, (double)den, (double)V2, (double)V3, (double)V4);
}
/// <summary>
/// Internal use. Method use for setting the statistics.
/// </summary>
/// <param name="mean">For setting Mean.</param>
/// <param name="variance">For setting Variance.</param>
/// <param name="skewness">For setting Skewness.</param>
/// <param name="kurtosis">For setting Kurtosis.</param>
/// <param name="minimum">For setting Minimum.</param>
/// <param name="maximum">For setting Maximum.</param>
/// <param name="n">For setting Count.</param>
void SetStatisticsWeighted(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, long n, double w1, double den, double w2, double w3, double w4)
{
Mean = mean;
Count = n;
TotalWeight = w1;
Minimum = double.NaN;
Maximum = double.NaN;
@ -637,7 +413,7 @@ namespace MathNet.Numerics.Statistics
if (n > 1)
{
Variance = variance / den;
Variance = variance / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
}
@ -645,19 +421,13 @@ namespace MathNet.Numerics.Statistics
{
if (n > 2)
{
var skewDen = (w1 * (w1 * w1 - 3.0 * w2) + 2.0 * w3) / (w1 * w1);
Skewness = skewness / (skewDen * Variance * StandardDeviation);
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
double p2 = w1 * w1;
double p4 = p2 * p2;
double w2p2 = w2 * w2;
double poly = p4 - 6.0 * p2 * w2 + 8.0 * w1 * w3 + 3.0 * w2p2 - 6.0 * w4;
double a = p4 - 4.0 * w1 * w3 + 3.0 * w2p2;
double b = 3.0 * (p4 - 2.0 * p2 * w2 + 4.0 * w1 * w3 - 3.0 * w2p2);
Kurtosis = (a * w1 * kurtosis / (variance * variance) - b) * (den / (w1 * poly));
Kurtosis = ((double)n * n - 1) / ((n - 2) * (n - 3))
* (n * kurtosis / (variance * variance) - 3 + 6.0 / (n + 1));
}
}
}

17
src/Numerics/Statistics/RunningWeightedStatistics.cs

@ -37,8 +37,8 @@ using System.Runtime.Serialization;
namespace MathNet.Numerics.Statistics
{
/// <summary>
/// Running statistics accumulator, allows updating by adding values
/// or by combining two accumulators.
/// Running weighted statistics accumulator, allows updating by adding values
/// or by combining two accumulators. Weights are reliability weights, not frequency weights.
/// </summary>
/// <remarks>
/// This type declares a DataContract for out of the box ephemeral serialization
@ -144,7 +144,7 @@ namespace MathNet.Numerics.Statistics
/// <summary>
/// Estimates the unbiased population variance from the provided samples.
/// On a dataset of size N will use an N-1 normalizer (Bessel's correction).
/// Will use the Bessel correction for reliability weighting.
/// Returns NaN if data has less than two entries or if any entry is NaN.
/// </summary>
public double Variance => _n < 2 ? double.NaN : _m2 / _den;
@ -158,7 +158,7 @@ namespace MathNet.Numerics.Statistics
/// <summary>
/// Estimates the unbiased population standard deviation from the provided samples.
/// On a dataset of size N will use an N-1 normalizer (Bessel's correction).
/// Will use the Bessel correction for reliability weighting.
/// Returns NaN if data has less than two entries or if any entry is NaN.
/// </summary>
public double StandardDeviation => _n < 2 ? double.NaN : Math.Sqrt(_m2 / _den);
@ -172,7 +172,7 @@ namespace MathNet.Numerics.Statistics
/// <summary>
/// Estimates the unbiased population skewness from the provided samples.
/// Uses a normalizer (Bessel's correction; type 2).
/// Will use the Bessel correction for reliability weighting.
/// Returns NaN if data has less than three entries or if any entry is NaN.
/// </summary>
public double Skewness
@ -198,7 +198,7 @@ namespace MathNet.Numerics.Statistics
/// <summary>
/// Estimates the unbiased population excess kurtosis from the provided samples.
/// Uses a normalizer (Bessel's correction; type 2).
/// Will use the Bessel correction for reliability weighting.
/// Returns NaN if data has less than four entries or if any entry is NaN.
/// Equivalent formula for this for weighted distributions are unknown.
/// </summary>
@ -233,6 +233,11 @@ namespace MathNet.Numerics.Statistics
/// </summary>
public double TotalWeight => _w1;
/// <summary>
/// The Kish's Effective Sample Size
/// </summary>
public double EffectiveSampleSize => _w2 / _w1;
/// <summary>
/// Update the running statistics by adding another observed sample (in-place).
/// </summary>

411
src/Numerics/Statistics/WeightedDescriptiveStatistics.cs

@ -0,0 +1,411 @@
// <copyright file="DescriptiveStatistics.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
//
// Copyright (c) 2009-2015 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
using System;
using System.Collections.Generic;
using System.Runtime.Serialization;
using System.Linq;
#if NET5_0_OR_GREATER
using System.Text.Json.Serialization;
#endif
namespace MathNet.Numerics.Statistics
{
/// <summary>
/// Computes the basic statistics of data set. The class meets the
/// NIST standard of accuracy for mean, variance, and standard deviation
/// (the only statistics they provide exact values for) and exceeds them
/// in increased accuracy mode.
/// Recommendation: consider to use RunningWeightedStatistics instead.
/// </summary>
/// <remarks>
/// This type declares a DataContract for out of the box ephemeral serialization
/// with engines like DataContractSerializer, Protocol Buffers and FsPickler,
/// but does not guarantee any compatibility between versions.
/// It is not recommended to rely on this mechanism for durable persistence.
/// </remarks>
[DataContract(Namespace = "urn:MathNet/Numerics")]
public class WeightedDescriptiveStatistics
{
/// <summary>
/// Initializes a new instance of the <see cref="WeightedDescriptiveStatistics"/> class.
/// </summary>
/// <param name="data">The sample data.</param>
/// <param name="increasedAccuracy">
/// If set to <c>true</c>, increased accuracy mode used.
/// Increased accuracy mode uses <see cref="decimal"/> types for internal calculations.
/// </param>
/// <remarks>
/// Don't use increased accuracy for data sets containing large values (in absolute value).
/// This may cause the calculations to overflow.
/// </remarks>
public WeightedDescriptiveStatistics(IEnumerable<Tuple<double, double>> data, bool increasedAccuracy = false)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
var data2 = data.Select(x => (x.Item1, x.Item2));
if (increasedAccuracy)
ComputeDecimal(data2);
else
Compute(data2);
}
/// <summary>
/// Initializes a new instance of the <see cref="WeightedDescriptiveStatistics"/> class.
/// </summary>
/// <param name="data">The sample data.</param>
/// <param name="increasedAccuracy">
/// If set to <c>true</c>, increased accuracy mode used.
/// Increased accuracy mode uses <see cref="decimal"/> types for internal calculations.
/// </param>
/// <remarks>
/// Don't use increased accuracy for data sets containing large values (in absolute value).
/// This may cause the calculations to overflow.
/// </remarks>
public WeightedDescriptiveStatistics(IEnumerable<(double, double)> data, bool increasedAccuracy = false)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}
if (increasedAccuracy)
ComputeDecimal(data);
else
Compute(data);
}
#if NET5_0_OR_GREATER
/// <summary>
/// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class.
/// </summary>
/// <remarks>
/// Used for Json serialization
/// </remarks>
public DescriptiveStatistics()
{
}
#endif
/// <summary>
/// Gets the size of the sample.
/// </summary>
/// <value>The size of the sample.</value>
[DataMember(Order = 1)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public long Count { get; private set; }
/// <summary>
/// Gets the sample mean.
/// </summary>
/// <value>The sample mean.</value>
[DataMember(Order = 2)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double Mean { get; private set; }
/// <summary>
/// Gets the unbiased population variance estimator (on a dataset of size N will use an N-1 normalizer).
/// </summary>
/// <value>The sample variance.</value>
[DataMember(Order = 3)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double Variance { get; private set; }
/// <summary>
/// Gets the unbiased population standard deviation (on a dataset of size N will use an N-1 normalizer).
/// </summary>
/// <value>The sample standard deviation.</value>
[DataMember(Order = 4)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double StandardDeviation { get; private set; }
/// <summary>
/// Gets the unbiased estimator of the population skewness.
/// </summary>
/// <value>The sample skewness.</value>
/// <remarks>Returns zero if <see cref="Count"/> is less than three. </remarks>
[DataMember(Order = 5)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double Skewness { get; private set; }
/// <summary>
/// Gets the unbiased estimator of the population excess kurtosis using the G_2 estimator.
/// </summary>
/// <value>The sample kurtosis.</value>
/// <remarks>Returns zero if <see cref="Count"/> is less than four. </remarks>
[DataMember(Order = 6)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double Kurtosis { get; private set; }
/// <summary>
/// Gets the maximum sample value.
/// </summary>
/// <value>The maximum sample value.</value>
[DataMember(Order = 7)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double Maximum { get; private set; }
/// <summary>
/// Gets the minimum sample value.
/// </summary>
/// <value>The minimum sample value.</value>
[DataMember(Order = 8)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double Minimum { get; private set; }
/// <summary>
/// Gets the total weight. When used with unweighted data, returns the number of samples.
/// </summary>
/// <value>The total weight.</value>
[DataMember(Order = 9)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double TotalWeight { get; private set; }
/// <summary>
/// The Kish's effective sample size https://en.wikipedia.org/wiki/Effective_sample_size
/// </summary>
/// <value>The Kish's effective sample size.</value>
[DataMember(Order = 10)]
#if NET5_0_OR_GREATER
[JsonInclude]
#endif
public double EffectiveSampleSize { get; private set; }
/// <summary>
/// Computes descriptive statistics from a stream of data values and reliability weights.
/// </summary>
/// <param name="data">A sequence of datapoints.</param>
void Compute(IEnumerable<(double, double)> data)
{
double mean = 0;
double variance = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = double.PositiveInfinity;
double maximum = double.NegativeInfinity;
long n = 0;
double totalWeight = 0;
double den = 0;
double V2 = 0;
double V3 = 0;
double V4 = 0;
foreach (var (w, xi) in data)
{
if (w < 0)
{
throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample");
}
else if (w > 0)
{
++n;
double delta = xi - mean;
double prevWeight = totalWeight;
totalWeight += w;
V2 += w * w;
V3 += w * w * w;
V4 += w * w * w * w;
den += w * (2.0 * prevWeight - den) / totalWeight;
double scaleDelta = delta * w / totalWeight;
double scaleDeltaSqr = scaleDelta * scaleDelta;
double tmpDelta = delta * scaleDelta * prevWeight;
double r = prevWeight / w;
mean += scaleDelta;
kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0)
+ 6.0 * scaleDeltaSqr * variance - 4.0 * scaleDelta * skewness;
skewness += tmpDelta * scaleDelta * (r - 1.0) - 3.0 * scaleDelta * variance;
variance += tmpDelta;
if (minimum > xi)
{
minimum = xi;
}
if (maximum < xi)
{
maximum = xi;
}
}
}
SetStatisticsWeighted(mean, variance, skewness, kurtosis, minimum, maximum, n, totalWeight, den, V2, V3, V4);
}
/// <summary>
/// Computes descriptive statistics from a stream of data values and reliability weights.
/// </summary>
/// <param name="data">A sequence of datapoints.</param>
void ComputeDecimal(IEnumerable<(double, double)> data)
{
decimal mean = 0;
decimal variance = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = decimal.MaxValue;
decimal maximum = decimal.MinValue;
decimal totalWeight = 0;
long n = 0;
decimal den = 0;
decimal V2 = 0;
decimal V3 = 0;
decimal V4 = 0;
foreach (var (w, x) in data)
{
if (w < 0)
{
throw new ArgumentOutOfRangeException(nameof(data), w, "Expected non-negative weighting of sample");
}
else if (w > 0)
{
decimal xi = (decimal)x;
decimal decW = (decimal)w;
++n;
decimal delta = xi - mean;
decimal prevWeight = totalWeight;
totalWeight += decW;
V2 += decW * decW;
V3 += decW * decW * decW;
V4 += decW * decW * decW * decW;
den += decW * (2.0m * prevWeight - den) / totalWeight;
decimal scaleDelta = delta * decW / totalWeight;
decimal scaleDeltaSqr = scaleDelta * scaleDelta;
decimal tmpDelta = delta * scaleDelta * prevWeight;
decimal r = prevWeight / decW;
mean += scaleDelta;
kurtosis += tmpDelta * scaleDeltaSqr * (r * r - r + 1.0m)
+ 6.0m * scaleDeltaSqr * variance - 4.0m * scaleDelta * skewness;
skewness += tmpDelta * scaleDelta * (r - 1.0m) - 3.0m * scaleDelta * variance;
variance += tmpDelta;
if (minimum > xi)
{
minimum = xi;
}
if (maximum < xi)
{
maximum = xi;
}
}
}
SetStatisticsWeighted((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n, (double)totalWeight, (double)den, (double)V2, (double)V3, (double)V4);
}
/// <summary>
/// Internal use. Method use for setting the statistics.
/// </summary>
/// <param name="mean">For setting Mean.</param>
/// <param name="variance">For setting Variance.</param>
/// <param name="skewness">For setting Skewness.</param>
/// <param name="kurtosis">For setting Kurtosis.</param>
/// <param name="minimum">For setting Minimum.</param>
/// <param name="maximum">For setting Maximum.</param>
/// <param name="n">For setting Count.</param>
void SetStatisticsWeighted(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, long n, double w1, double den, double w2, double w3, double w4)
{
Mean = mean;
Count = n;
TotalWeight = w1;
EffectiveSampleSize = w1 * w1 / w2;
Minimum = double.NaN;
Maximum = double.NaN;
Variance = double.NaN;
StandardDeviation = double.NaN;
Skewness = double.NaN;
Kurtosis = double.NaN;
if (n > 0)
{
Minimum = minimum;
Maximum = maximum;
if (n > 1)
{
Variance = variance / den;
StandardDeviation = Math.Sqrt(Variance);
}
if (Variance != 0)
{
if (n > 2)
{
var skewDen = (w1 * (w1 * w1 - 3.0 * w2) + 2.0 * w3) / (w1 * w1);
Skewness = skewness / (skewDen * Variance * StandardDeviation);
}
if (n > 3)
{
double p2 = w1 * w1;
double p4 = p2 * p2;
double w2p2 = w2 * w2;
double poly = p4 - 6.0 * p2 * w2 + 8.0 * w1 * w3 + 3.0 * w2p2 - 6.0 * w4;
double a = p4 - 4.0 * w1 * w3 + 3.0 * w2p2;
double b = 3.0 * (p4 - 2.0 * p2 * w2 + 4.0 * w1 * w3 - 3.0 * w2p2);
Kurtosis = (a * w1 * kurtosis / (variance * variance) - b) * (den / (w1 * poly));
}
}
}
}
}
}
Loading…
Cancel
Save