From 539c1b4331b27f27e1ad09d8d4053073cf9cc2ff Mon Sep 17 00:00:00 2001 From: Marcus Cuda Date: Thu, 23 Apr 2015 14:11:47 +0300 Subject: [PATCH] playing around with moving stats --- src/Numerics/Numerics.csproj | 1 + src/Numerics/Statistics/MovingStatistics.cs | 251 ++++++++++++++++++ .../StatisticsTests/MovingStatisticsTests.cs | 66 +++++ src/UnitTests/UnitTests.csproj | 1 + 4 files changed, 319 insertions(+) create mode 100644 src/Numerics/Statistics/MovingStatistics.cs create mode 100644 src/UnitTests/StatisticsTests/MovingStatisticsTests.cs diff --git a/src/Numerics/Numerics.csproj b/src/Numerics/Numerics.csproj index b783234e..f286723b 100644 --- a/src/Numerics/Numerics.csproj +++ b/src/Numerics/Numerics.csproj @@ -211,6 +211,7 @@ + diff --git a/src/Numerics/Statistics/MovingStatistics.cs b/src/Numerics/Statistics/MovingStatistics.cs new file mode 100644 index 00000000..6e97b720 --- /dev/null +++ b/src/Numerics/Statistics/MovingStatistics.cs @@ -0,0 +1,251 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2015 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; +using MathNet.Numerics.Properties; + +namespace MathNet.Numerics.Statistics +{ + /// + /// Running statistics over a window of data, allows updating by adding values. + /// + public class MovingStatistics + { + readonly double[] _oldValues; + readonly int _windowSize; + int _lastIndex; + double _m1; + double _m2; + double _m3; + double _m4; + double _max = double.NegativeInfinity; + double _min = double.PositiveInfinity; + + public MovingStatistics(int windowSize) + { + if (windowSize < 1) + { + throw new ArgumentException(string.Format(Resources.ArgumentMustBePositive), "windowSize"); + } + _windowSize = windowSize; + _oldValues = new double[_windowSize]; + } + + public MovingStatistics(int windowSize, IEnumerable values) : this(windowSize) + { + PushRange(values); + } + + public int WindowSize + { + get { return _windowSize; } + } + + /// + /// Gets the total number of samples. + /// + public long Count { get; private set; } + + /// + /// Returns the minimum value in the sample data. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double Minimum + { + get { return Count > 0 ? _min : double.NaN; } + } + + /// + /// Returns the maximum value in the sample data. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double Maximum + { + get { return Count > 0 ? _max : double.NaN; } + } + + /// + /// Evaluates the sample mean, an estimate of the population mean. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double Mean + { + get { return Count > 0 ? _m1 : double.NaN; } + } + + /// + /// Estimates the unbiased population variance from the provided samples. + /// On a dataset of size N will use an N-1 normalizer (Bessel's correction). + /// Returns NaN if data has less than two entries or if any entry is NaN. + /// + public double Variance + { + get { return Count < 2 ? double.NaN : _m2/(Count - 1); } + } + + /// + /// Evaluates the variance from the provided full population. + /// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double PopulationVariance + { + get { return Count < 2 ? double.NaN : _m2/Count; } + } + + /// + /// Estimates the unbiased population standard deviation from the provided samples. + /// On a dataset of size N will use an N-1 normalizer (Bessel's correction). + /// Returns NaN if data has less than two entries or if any entry is NaN. + /// + public double StandardDeviation + { + get { return Count < 2 ? double.NaN : Math.Sqrt(_m2/(Count - 1)); } + } + + /// + /// Evaluates the standard deviation from the provided full population. + /// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double PopulationStandardDeviation + { + get { return Count < 2 ? double.NaN : Math.Sqrt(_m2/Count); } + } + +/* /// + /// Estimates the unbiased population skewness from the provided samples. + /// Uses a normalizer (Bessel's correction; type 2). + /// Returns NaN if data has less than three entries or if any entry is NaN. + /// + public double Skewness + { + get { return Count < 3 ? double.NaN : (Count*_m3*Math.Sqrt(_m2/(Count - 1))/(_m2*_m2*(Count - 2)))*(Count - 1); } + } + + /// + /// Evaluates the population skewness from the full population. + /// Does not use a normalizer and would thus be biased if applied to a subset (type 1). + /// Returns NaN if data has less than two entries or if any entry is NaN. + /// + public double PopulationSkewness + { + get { return Count < 2 ? double.NaN : Math.Sqrt(Count)*_m3/Math.Pow(_m2, 1.5); } + } + + /// + /// Estimates the unbiased population kurtosis from the provided samples. + /// Uses a normalizer (Bessel's correction; type 2). + /// Returns NaN if data has less than four entries or if any entry is NaN. + /// + public double Kurtosis + { + get { return Count < 4 ? double.NaN : ((double) Count*Count - 1)/((Count - 2)*(Count - 3))*(Count*_m4/(_m2*_m2) - 3 + 6.0/(Count + 1)); } + } + + /// + /// Evaluates the population kurtosis from the full population. + /// Does not use a normalizer and would thus be biased if applied to a subset (type 1). + /// Returns NaN if data has less than three entries or if any entry is NaN. + /// + public double PopulationKurtosis + { + get { return Count < 3 ? double.NaN : Count*_m4/(_m2*_m2) - 3.0; } + }*/ + + /// + /// Update the running statistics by adding another observed sample (in-place). + /// + public void Push(double value) + { + if (Count < _windowSize) + { + _oldValues[Count] = value; + Count++; + var d = value - _m1; + var s = d/Count; +// var s2 = s * s; + var t = d*s*(Count - 1); + + _m1 += s; +// _m4 += t * s2 * (Count * Count - 3 * Count + 3) + 6 * s2 * _m2 - 4 * s * _m3; +// _m3 += t * s * (Count - 2) - 3 * s * _m2; + _m2 += t; + + if (value < _min || double.IsNaN(value)) + { + _min = value; + } + + if (value > _max || double.IsNaN(value)) + { + _max = value; + } + } + else + { + var oldValue = _oldValues[_lastIndex]; + var d = value - oldValue; + var s = d/Count; +// var s2 = s * s; + var oldM1 = _m1; + _m1 += s; + + var x = (value - _m1 + oldValue - oldM1); + var t = d*x; + _m2 += t; + +// _m4 += t * s2 * (Count * Count - 3 * Count + 3) + 6 * s2 * _m2 - 4 * s * _m3; +// _m3 += t * (x /(Count-1) - 3 * s * _m2; + + _oldValues[_lastIndex] = value; + _lastIndex++; + if (_lastIndex == WindowSize) + { + _lastIndex = 0; + } + _max = value > _max || double.IsNaN(value) ? value : _oldValues.Maximum(); + _min = value < _min || double.IsNaN(value)? value : _oldValues.Minimum(); + } + } + + /// + /// Update the running statistics by adding a sequence of observed sample (in-place). + /// + public void PushRange(IEnumerable values) + { + foreach (var value in values) + { + Push(value); + } + } + } +} diff --git a/src/UnitTests/StatisticsTests/MovingStatisticsTests.cs b/src/UnitTests/StatisticsTests/MovingStatisticsTests.cs new file mode 100644 index 00000000..60d77f07 --- /dev/null +++ b/src/UnitTests/StatisticsTests/MovingStatisticsTests.cs @@ -0,0 +1,66 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2015 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.InteropServices; +using MathNet.Numerics.Distributions; +using MathNet.Numerics.Random; +using MathNet.Numerics.Statistics; +using NUnit.Framework; + +namespace MathNet.Numerics.UnitTests.StatisticsTests +{ +#if !PORTABLE + [TestFixture, Category("Statistics")] + public class MovingStatisticsTests + { + [Test] + public void QuickTest() + { + var data = new double[1000000]; + (new Normal(50, 10, new SystemRandomSource(0))).Samples(data); + var ms = new MovingStatistics(5, data); + ms.PushRange(new[] { 11.11, 22.22, 33.33, 44.44, 55.55 }); + Assert.AreEqual(5, ms.Count); + Assert.AreEqual(11.11, ms.Minimum); + Assert.AreEqual(55.55, ms.Maximum); + + Assert.AreEqual(33.33, ms.Mean, 1e-11); + Assert.AreEqual(308.58025, ms.Variance, 1e-10); + + //AssertHelpers.AlmostEqualRelative(stats0.Mean, ms.Mean, 14); + //AssertHelpers.AlmostEqualRelative(stats0.Variance, ms.Variance, 14); + //AssertHelpers.AlmostEqualRelative(stats0.StandardDeviation, ms.StandardDeviation, 14); + } + } +#endif +} diff --git a/src/UnitTests/UnitTests.csproj b/src/UnitTests/UnitTests.csproj index 3b9505a2..713d8740 100644 --- a/src/UnitTests/UnitTests.csproj +++ b/src/UnitTests/UnitTests.csproj @@ -377,6 +377,7 @@ +