diff --git a/src/Numerics/Numerics.csproj b/src/Numerics/Numerics.csproj index b451cf78..7d4fccd6 100644 --- a/src/Numerics/Numerics.csproj +++ b/src/Numerics/Numerics.csproj @@ -217,6 +217,7 @@ + diff --git a/src/Numerics/Statistics/MovingStatistics.cs b/src/Numerics/Statistics/MovingStatistics.cs new file mode 100644 index 00000000..44cf1662 --- /dev/null +++ b/src/Numerics/Statistics/MovingStatistics.cs @@ -0,0 +1,359 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2015 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; +using MathNet.Numerics.Properties; + +namespace MathNet.Numerics.Statistics +{ + /// + /// Running statistics over a window of data, allows updating by adding values. + /// + public class MovingStatistics + { + readonly double[] _oldValues; + readonly int _windowSize; + + long _count; + long _totalCountOffset; + int _lastIndex; + int _lastNaNTimeToLive; + int _lastPosInfTimeToLive; + int _lastNegInfTimeToLive; + + double _m1; + double _m2; + double _max = double.NegativeInfinity; + double _min = double.PositiveInfinity; + + public MovingStatistics(int windowSize) + { + if (windowSize < 1) + { + throw new ArgumentException(string.Format(Resources.ArgumentMustBePositive), "windowSize"); + } + _windowSize = windowSize; + _oldValues = new double[_windowSize]; + } + + public MovingStatistics(int windowSize, IEnumerable values) + : this(windowSize) + { + PushRange(values); + } + + public int WindowSize + { + get { return _windowSize; } + } + + /// + /// Gets the total number of samples. + /// + public long Count + { + get { return _totalCountOffset + _count; } + } + + /// + /// Returns the minimum value in the sample data. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double Minimum + { + get + { + if (_lastNaNTimeToLive > 0) + { + return double.NaN; + } + + if (_lastNegInfTimeToLive > 0) + { + return double.NegativeInfinity; + } + + return (_count > 0 || _lastPosInfTimeToLive > 0) ? _min : double.NaN; + } + } + + /// + /// Returns the maximum value in the sample data. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double Maximum + { + get + { + if (_lastNaNTimeToLive > 0) + { + return double.NaN; + } + + if (_lastPosInfTimeToLive > 0) + { + return double.PositiveInfinity; + } + + return (_count > 0 || _lastNegInfTimeToLive > 0) ? _max : double.NaN; + } + } + + /// + /// Evaluates the sample mean, an estimate of the population mean. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double Mean + { + get + { + if (_lastNaNTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0)) + { + return double.NaN; + } + + if (_lastPosInfTimeToLive > 0) + { + return double.PositiveInfinity; + } + + if (_lastNegInfTimeToLive > 0) + { + return double.NegativeInfinity; + } + + return _count == 0 ? double.NaN : _m1; + } + } + + /// + /// Estimates the unbiased population variance from the provided samples. + /// On a dataset of size N will use an N-1 normalizer (Bessel's correction). + /// Returns NaN if data has less than two entries or if any entry is NaN. + /// + public double Variance + { + get + { + if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0)) + { + return double.NaN; + } + + if (_lastPosInfTimeToLive > 0) + { + return double.PositiveInfinity; + } + + return _count < 2 ? double.NaN : _m2 / (_count - 1); + } + } + + /// + /// Evaluates the variance from the provided full population. + /// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double PopulationVariance + { + get + { + if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0)) + { + return double.NaN; + } + + if (_lastPosInfTimeToLive > 0) + { + return double.PositiveInfinity; + } + + return _count < 2 ? double.NaN : _m2 / _count; + } + } + + /// + /// Estimates the unbiased population standard deviation from the provided samples. + /// On a dataset of size N will use an N-1 normalizer (Bessel's correction). + /// Returns NaN if data has less than two entries or if any entry is NaN. + /// + public double StandardDeviation + { + get + { + if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0)) + { + return double.NaN; + } + + if (_lastPosInfTimeToLive > 0) + { + return double.PositiveInfinity; + } + + return _count < 2 ? double.NaN : Math.Sqrt(_m2 / (_count - 1)); + } + } + + /// + /// Evaluates the standard deviation from the provided full population. + /// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset. + /// Returns NaN if data is empty or if any entry is NaN. + /// + public double PopulationStandardDeviation + { + get + { + if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0)) + { + return double.NaN; + } + + if (_lastPosInfTimeToLive > 0) + { + return double.PositiveInfinity; + } + + return _count < 2 ? double.NaN : Math.Sqrt(_m2 / _count); + } + } + + /// + /// Update the running statistics by adding another observed sample (in-place). + /// + public void Push(double value) + { + DecrementTimeToLive(); + + if (double.IsNaN(value)) + { + _lastNaNTimeToLive = _windowSize; + Reset(double.PositiveInfinity, double.NegativeInfinity); + return; + } + + if (double.IsPositiveInfinity(value)) + { + _lastPosInfTimeToLive = _windowSize; + Reset(_min, double.NegativeInfinity); + return; + } + + if (double.IsNegativeInfinity(value)) + { + _lastNegInfTimeToLive = _windowSize; + Reset(double.PositiveInfinity, _max); + return; + } + + if (_count < _windowSize) + { + _oldValues[_count] = value; + _count++; + var d = value - _m1; + var s = d / _count; + var t = d * s * (_count - 1); + + _m1 += s; + _m2 += t; + + if (value < _min) + { + _min = value; + } + + if (value > _max) + { + _max = value; + } + } + else + { + var oldValue = _oldValues[_lastIndex]; + var d = value - oldValue; + var s = d / _count; + var oldM1 = _m1; + _m1 += s; + + var x = (value - _m1 + oldValue - oldM1); + var t = d * x; + _m2 += t; + + _oldValues[_lastIndex] = value; + _lastIndex++; + if (_lastIndex == WindowSize) + { + _lastIndex = 0; + } + _max = value > _max ? value : _oldValues.Maximum(); + _min = value < _min ? value : _oldValues.Minimum(); + } + } + + /// + /// Update the running statistics by adding a sequence of observed sample (in-place). + /// + public void PushRange(IEnumerable values) + { + foreach (var value in values) + { + Push(value); + } + } + + private void DecrementTimeToLive() + { + if (_lastNaNTimeToLive > 0) + { + _lastNaNTimeToLive--; + } + + if (_lastPosInfTimeToLive > 0) + { + _lastPosInfTimeToLive--; + } + + if (_lastNegInfTimeToLive > 0) + { + _lastNegInfTimeToLive--; + } + } + + private void Reset(double min, double max) + { + _totalCountOffset += _count + 1; + _count = 0; + _m1 = 0; + _max = max; + _min = min; + } + } +} diff --git a/src/Numerics/Statistics/Statistics.cs b/src/Numerics/Statistics/Statistics.cs index 3fe30641..9a92fafe 100644 --- a/src/Numerics/Statistics/Statistics.cs +++ b/src/Numerics/Statistics/Statistics.cs @@ -931,5 +931,21 @@ namespace MathNet.Numerics.Statistics { return StreamingStatistics.Entropy(data.Where(d => d.HasValue).Select(d => d.Value)); } + + /// + /// Evaluates the sample mean over a moving window, for each samples. + /// Returns NaN if no data is empty or if any entry is NaN. + /// + /// The sample stream to calculate the mean of. + /// The number of last samples to consider. + public static IEnumerable MovingAverage(this IEnumerable samples, int windowSize) + { + var movingStatistics = new MovingStatistics(windowSize); + return samples.Select(sample => + { + movingStatistics.Push(sample); + return movingStatistics.Mean; + }); + } } } diff --git a/src/UnitTests/StatisticsTests/MovingStatisticsTests.cs b/src/UnitTests/StatisticsTests/MovingStatisticsTests.cs new file mode 100644 index 00000000..867ab4a6 --- /dev/null +++ b/src/UnitTests/StatisticsTests/MovingStatisticsTests.cs @@ -0,0 +1,255 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// http://mathnetnumerics.codeplex.com +// +// Copyright (c) 2009-2015 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using MathNet.Numerics.Distributions; +using MathNet.Numerics.Random; +using MathNet.Numerics.Statistics; +using NUnit.Framework; + +namespace MathNet.Numerics.UnitTests.StatisticsTests +{ + [TestFixture, Category("Statistics")] + public class MovingStatisticsTests + { + [Test] + public void PositiveInfinityTest() + { + var ms = new MovingStatistics(3); + + ms.Push(1.0); + ms.Push(2.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.Not.EqualTo(double.PositiveInfinity)); + + ms.Push(double.PositiveInfinity); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.EqualTo(double.PositiveInfinity)); + + ms.Push(1.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.EqualTo(double.PositiveInfinity)); + + ms.Push(double.PositiveInfinity); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.EqualTo(double.PositiveInfinity)); + + ms.Push(2.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.EqualTo(double.PositiveInfinity)); + + ms.Push(3.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.EqualTo(double.PositiveInfinity)); + + ms.Push(4.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.Not.EqualTo(double.PositiveInfinity)); + } + + [Test] + public void NegativeInfinityTest() + { + var ms = new MovingStatistics(3); + + ms.Push(1.0); + ms.Push(2.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.Not.EqualTo(double.NegativeInfinity)); + + ms.Push(double.NegativeInfinity); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(1.0); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(double.NegativeInfinity); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(2.0); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(3.0); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(4.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.Not.NaN); + } + + [Test] + public void MixedInfinityTest() + { + var ms = new MovingStatistics(3); + + ms.Push(1.0); + ms.Push(2.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.Not.EqualTo(double.PositiveInfinity)); + + ms.Push(double.NegativeInfinity); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(1.0); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(double.PositiveInfinity); + Assert.That(ms.Minimum, Is.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.NaN); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(2.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.EqualTo(double.PositiveInfinity)); + + ms.Push(3.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.EqualTo(double.PositiveInfinity)); + + ms.Push(4.0); + Assert.That(ms.Minimum, Is.Not.EqualTo(double.NegativeInfinity)); + Assert.That(ms.Maximum, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.Mean, Is.Not.EqualTo(double.PositiveInfinity)); + Assert.That(ms.StandardDeviation, Is.Not.EqualTo(double.PositiveInfinity)); + } + + [Test] + public void StabilityTest() + { + var data = new double[1000000]; + Normal.Samples(new SystemRandomSource(0), data, 50, 10); + + var ms = new MovingStatistics(5, data); + ms.PushRange(new[] { 11.11, 22.22, 33.33, 44.44, 55.55 }); + + Assert.AreEqual(5, ms.Count); + Assert.AreEqual(11.11, ms.Minimum); + Assert.AreEqual(55.55, ms.Maximum); + + Assert.AreEqual(33.33, ms.Mean, 1e-11); + Assert.AreEqual(308.58025, ms.Variance, 1e-10); + Assert.AreEqual(17.5664524022354, ms.StandardDeviation, 1e-11); + Assert.AreEqual(246.8642, ms.PopulationVariance, 1e-10); + Assert.AreEqual(15.7119126779651, ms.PopulationStandardDeviation, 1e-10); + } + + [Test] + public void NaNTest() + { + var ms = new MovingStatistics(3); + Assert.That(ms.Minimum, Is.NaN); + Assert.That(ms.Maximum, Is.NaN); + Assert.That(ms.Mean, Is.NaN); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(1.0); + Assert.That(ms.Minimum, Is.Not.NaN); + Assert.That(ms.Maximum, Is.Not.NaN); + Assert.That(ms.Mean, Is.Not.NaN); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(2.0); + Assert.That(ms.Minimum, Is.Not.NaN); + Assert.That(ms.Maximum, Is.Not.NaN); + Assert.That(ms.Mean, Is.Not.NaN); + Assert.That(ms.StandardDeviation, Is.Not.NaN); + + ms.Push(double.NaN); + Assert.That(ms.Minimum, Is.NaN); + Assert.That(ms.Maximum, Is.NaN); + Assert.That(ms.Mean, Is.NaN); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(1.0); + Assert.That(ms.Minimum, Is.NaN); + Assert.That(ms.Maximum, Is.NaN); + Assert.That(ms.Mean, Is.NaN); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(2.0); + Assert.That(ms.Minimum, Is.NaN); + Assert.That(ms.Maximum, Is.NaN); + Assert.That(ms.Mean, Is.NaN); + Assert.That(ms.StandardDeviation, Is.NaN); + + ms.Push(3.0); + Assert.That(ms.Minimum, Is.Not.NaN); + Assert.That(ms.Maximum, Is.Not.NaN); + Assert.That(ms.Mean, Is.Not.NaN); + Assert.That(ms.StandardDeviation, Is.Not.NaN); + } + } +} diff --git a/src/UnitTests/UnitTests.csproj b/src/UnitTests/UnitTests.csproj index 3b9505a2..713d8740 100644 --- a/src/UnitTests/UnitTests.csproj +++ b/src/UnitTests/UnitTests.csproj @@ -377,6 +377,7 @@ +