diff --git a/src/Numerics/Statistics/StreamingStatistics.cs b/src/Numerics/Statistics/StreamingStatistics.cs index 422799d8..0c09e65c 100644 --- a/src/Numerics/Statistics/StreamingStatistics.cs +++ b/src/Numerics/Statistics/StreamingStatistics.cs @@ -309,5 +309,44 @@ namespace MathNet.Numerics.Statistics } return comoment/n; } + + /// + /// Calculates the entropy of a stream of double values. + /// Returns NaN if any of the values in the stream are NaN. + /// + /// The input stream to evaluate. + /// + public static double Entropy(IEnumerable stream) + { + // http://en.wikipedia.org/wiki/Shannon_entropy + + var index = new Dictionary(); + + // count the number of occurrences of each item in the stream + int totalCount = 0; + foreach (double value in stream) + { + if (double.IsNaN(value)) return double.NaN; + + double currentValueCount = 0; + + if (index.TryGetValue(value, out currentValueCount)) + index[value] = ++currentValueCount; + else + index.Add(value, 1); + + ++totalCount; + } + + // calculate the entropy of the stream + double entropy = 0; + foreach (var item in index) + { + double p = item.Value / totalCount; + entropy += p * Math.Log(p, 2); + } + + return -entropy; + } } } diff --git a/src/UnitTests/StatisticsTests/StatisticsTests.cs b/src/UnitTests/StatisticsTests/StatisticsTests.cs index c64ab9e7..39b5a92d 100644 --- a/src/UnitTests/StatisticsTests/StatisticsTests.cs +++ b/src/UnitTests/StatisticsTests/StatisticsTests.cs @@ -115,6 +115,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.That(() => StreamingStatistics.PopulationStandardDeviation(data), Throws.Exception.TypeOf()); Assert.That(() => StreamingStatistics.Covariance(data, data), Throws.Exception.TypeOf()); Assert.That(() => StreamingStatistics.PopulationCovariance(data, data), Throws.Exception.TypeOf()); + Assert.That(() => StreamingStatistics.Entropy(data), Throws.Exception.TypeOf()); Assert.That(() => new RunningStatistics(data), Throws.Exception); Assert.That(() => new RunningStatistics().PushRange(data), Throws.Exception); @@ -173,6 +174,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.DoesNotThrow(() => StreamingStatistics.PopulationStandardDeviation(data)); Assert.DoesNotThrow(() => StreamingStatistics.Covariance(data, data)); Assert.DoesNotThrow(() => StreamingStatistics.PopulationCovariance(data, data)); + Assert.DoesNotThrow(() => StreamingStatistics.Entropy(data)); Assert.That(() => new RunningStatistics(data), Throws.Nothing); Assert.That(() => new RunningStatistics().PushRange(data), Throws.Nothing); @@ -1013,6 +1015,33 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.That(new DescriptiveStatistics(shorter).Kurtosis, Is.EqualTo(-1.36).Within(1e-4), "DescriptiveStatistics.Kurtosis: shorter"); Assert.That(new DescriptiveStatistics(longer).Kurtosis, Is.EqualTo(-1.36).Within(1e-4), "DescriptiveStatistics.Kurtosis: longer"); } + + [Test] + public void EntropyIsMinimum() + { + var data1 = new double[] { 1, 1, 1, 1, 1 }; + Assert.That(StreamingStatistics.Entropy(data1) == 0); + + var data2 = new double[] { 0, 0 }; + Assert.That(StreamingStatistics.Entropy(data2) == 0); + } + + [Test] + public void EntropyIsMaximum() + { + var data1 = new double[] { 1, 2 }; + Assert.That(StreamingStatistics.Entropy(data1) == 1.0); + + var data2 = new double[] { 1, 2, 3, 4 }; + Assert.That(StreamingStatistics.Entropy(data2) == 2.0); + } + + [Test] + public void EntropyOfNaNIsNaN() + { + var data = new double[] { 1, 2, double.NaN }; + Assert.That(double.IsNaN(StreamingStatistics.Entropy(data))); + } } }