Browse Source

Added StreamingStatistics.Entropy function

pull/236/head
Jeff Mastry 12 years ago
parent
commit
aefe55dae2
  1. 39
      src/Numerics/Statistics/StreamingStatistics.cs
  2. 29
      src/UnitTests/StatisticsTests/StatisticsTests.cs

39
src/Numerics/Statistics/StreamingStatistics.cs

@ -309,5 +309,44 @@ namespace MathNet.Numerics.Statistics
}
return comoment/n;
}
/// <summary>
/// Calculates the entropy of a stream of double values.
/// Returns NaN if any of the values in the stream are NaN.
/// </summary>
/// <param name="stream">The input stream to evaluate.</param>
/// <returns></returns>
public static double Entropy(IEnumerable<double> stream)
{
// http://en.wikipedia.org/wiki/Shannon_entropy
var index = new Dictionary<double, double>();
// count the number of occurrences of each item in the stream
int totalCount = 0;
foreach (double value in stream)
{
if (double.IsNaN(value)) return double.NaN;
double currentValueCount = 0;
if (index.TryGetValue(value, out currentValueCount))
index[value] = ++currentValueCount;
else
index.Add(value, 1);
++totalCount;
}
// calculate the entropy of the stream
double entropy = 0;
foreach (var item in index)
{
double p = item.Value / totalCount;
entropy += p * Math.Log(p, 2);
}
return -entropy;
}
}
}

29
src/UnitTests/StatisticsTests/StatisticsTests.cs

@ -115,6 +115,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.That(() => StreamingStatistics.PopulationStandardDeviation(data), Throws.Exception.TypeOf<NullReferenceException>());
Assert.That(() => StreamingStatistics.Covariance(data, data), Throws.Exception.TypeOf<NullReferenceException>());
Assert.That(() => StreamingStatistics.PopulationCovariance(data, data), Throws.Exception.TypeOf<NullReferenceException>());
Assert.That(() => StreamingStatistics.Entropy(data), Throws.Exception.TypeOf<NullReferenceException>());
Assert.That(() => new RunningStatistics(data), Throws.Exception);
Assert.That(() => new RunningStatistics().PushRange(data), Throws.Exception);
@ -173,6 +174,7 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.DoesNotThrow(() => StreamingStatistics.PopulationStandardDeviation(data));
Assert.DoesNotThrow(() => StreamingStatistics.Covariance(data, data));
Assert.DoesNotThrow(() => StreamingStatistics.PopulationCovariance(data, data));
Assert.DoesNotThrow(() => StreamingStatistics.Entropy(data));
Assert.That(() => new RunningStatistics(data), Throws.Nothing);
Assert.That(() => new RunningStatistics().PushRange(data), Throws.Nothing);
@ -1013,6 +1015,33 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.That(new DescriptiveStatistics(shorter).Kurtosis, Is.EqualTo(-1.36).Within(1e-4), "DescriptiveStatistics.Kurtosis: shorter");
Assert.That(new DescriptiveStatistics(longer).Kurtosis, Is.EqualTo(-1.36).Within(1e-4), "DescriptiveStatistics.Kurtosis: longer");
}
[Test]
public void EntropyIsMinimum()
{
var data1 = new double[] { 1, 1, 1, 1, 1 };
Assert.That(StreamingStatistics.Entropy(data1) == 0);
var data2 = new double[] { 0, 0 };
Assert.That(StreamingStatistics.Entropy(data2) == 0);
}
[Test]
public void EntropyIsMaximum()
{
var data1 = new double[] { 1, 2 };
Assert.That(StreamingStatistics.Entropy(data1) == 1.0);
var data2 = new double[] { 1, 2, 3, 4 };
Assert.That(StreamingStatistics.Entropy(data2) == 2.0);
}
[Test]
public void EntropyOfNaNIsNaN()
{
var data = new double[] { 1, 2, double.NaN };
Assert.That(double.IsNaN(StreamingStatistics.Entropy(data)));
}
}
}

Loading…
Cancel
Save