Browse Source

Statistics: Array and Streaming PopulationVariance

pull/109/head
Christoph Ruegg 13 years ago
parent
commit
3d039413b9
  1. 28
      src/Numerics/Statistics/ArrayStatistics.cs
  2. 36
      src/Numerics/Statistics/SortedArrayStatistics.cs
  3. 75
      src/Numerics/Statistics/Statistics.cs
  4. 43
      src/Numerics/Statistics/StreamingStatistics.cs
  5. 10
      src/UnitTests/StatisticsTests/StatisticsTests.cs

28
src/Numerics/Statistics/ArrayStatistics.cs

@ -93,7 +93,7 @@ namespace MathNet.Numerics.Statistics
ulong m = 0;
for (int i = 0; i < data.Length; i++)
{
mean += (data[i] - mean) / ++m;
mean += (data[i] - mean)/++m;
}
return mean;
}
@ -114,10 +114,32 @@ namespace MathNet.Numerics.Statistics
for (int i = 1; i < data.Length; i++)
{
t += data[i];
double diff = ((i + 1) * data[i]) - t;
variance += (diff * diff) / ((i + 1) * i);
double diff = ((i + 1)*data[i]) - t;
variance += (diff*diff)/((i + 1)*i);
}
return variance/(data.Length - 1);
}
/// <summary>
/// Estimates the biased population variance from the unsorted data array.
/// On a dataset of size N will use an N normalizer
/// Returns NaN if data is empty or any entry is NaN.
/// </summary>
/// <param name="data">Sample array, no sorting is assumed.</param>
public static double PopulationVariance(double[] data)
{
if (data == null) throw new ArgumentNullException("data");
if (data.Length == 0) return double.NaN;
double variance = 0;
double t = data[0];
for (int i = 1; i < data.Length; i++)
{
t += data[i];
double diff = ((i + 1)*data[i]) - t;
variance += (diff*diff)/((i + 1)*i);
}
return variance/data.Length;
}
}
}

36
src/Numerics/Statistics/SortedArrayStatistics.cs

@ -42,8 +42,8 @@ namespace MathNet.Numerics.Statistics
public static class SortedArrayStatistics
{
const double Third = 1d / 3d;
const double Half = 1d / 2d;
const double Third = 1d/3d;
const double Half = 1d/2d;
/// <summary>
/// Returns the smallest value from the sorted data array (ascending).
@ -88,7 +88,7 @@ namespace MathNet.Numerics.Statistics
/// <param name="p">Percentile selector, between 0 and 100 (inclusive).</param>
public static double Percentile(double[] data, int p)
{
return Quantile(data, p / 100d);
return Quantile(data, p/100d);
}
/// <summary>
@ -180,7 +180,7 @@ namespace MathNet.Numerics.Statistics
case QuantileCompatibility.R2:
case QuantileCompatibility.SAS5:
{
double h = data.Length * tau + Half;
double h = data.Length*tau + Half;
return (data[(int) Math.Ceiling(h - Half) - 1] + data[(int) (h + Half) - 1])*Half;
}
case QuantileCompatibility.R3:
@ -194,42 +194,42 @@ namespace MathNet.Numerics.Statistics
case QuantileCompatibility.SAS1:
{
double h = data.Length*tau;
var hf = (int)h;
return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
var hf = (int) h;
return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R5:
{
double h = data.Length*tau + Half;
var hf = (int)h;
return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
var hf = (int) h;
return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R6:
case QuantileCompatibility.SAS4:
case QuantileCompatibility.Nist:
{
double h = (data.Length + 1)*tau;
var hf = (int)h;
return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
var hf = (int) h;
return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R7:
case QuantileCompatibility.Excel:
{
double h = (data.Length - 1)*tau + 1d;
var hf = (int)h;
return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
var hf = (int) h;
return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R8:
case QuantileCompatibility.Default:
{
double h = (data.Length + Third) * tau + Third;
var hf = (int)h;
return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
double h = (data.Length + Third)*tau + Third;
var hf = (int) h;
return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R9:
{
double h = (data.Length + 1d/4d) * tau + 3d/8d;
var hf = (int)h;
return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
double h = (data.Length + 1d/4d)*tau + 3d/8d;
var hf = (int) h;
return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
default:
throw new NotSupportedException();

75
src/Numerics/Statistics/Statistics.cs

@ -142,34 +142,10 @@ namespace MathNet.Numerics.Statistics
/// <returns>The biased population variance of the sample.</returns>
public static double PopulationVariance(this IEnumerable<double> data)
{
if (data == null)
{
throw new ArgumentNullException("data");
}
double variance = 0;
double t = 0;
ulong j = 0;
using (IEnumerator<double> iterator = data.GetEnumerator())
{
if (iterator.MoveNext())
{
j++;
t = iterator.Current;
}
while (iterator.MoveNext())
{
j++;
double xi = iterator.Current;
t += xi;
double diff = (j * xi) - t;
variance += (diff * diff) / (j * (j - 1));
}
}
return variance / j;
var array = data as double[];
return array != null
? ArrayStatistics.PopulationVariance(array)
: StreamingStatistics.PopulationVariance(data);
}
/// <summary>
@ -179,47 +155,8 @@ namespace MathNet.Numerics.Statistics
/// <returns>The population variance of the sample.</returns>
public static double PopulationVariance(this IEnumerable<double?> data)
{
if (data == null)
{
throw new ArgumentNullException("data");
}
double variance = 0;
double t = 0;
ulong j = 0;
using (IEnumerator<double?> iterator = data.GetEnumerator())
{
while (true)
{
bool hasNext = iterator.MoveNext();
if (!hasNext)
{
break;
}
if (iterator.Current.HasValue)
{
j++;
t = iterator.Current.Value;
break;
}
}
while (iterator.MoveNext())
{
if (iterator.Current.HasValue)
{
j++;
double xi = iterator.Current.Value;
t += xi;
double diff = (j * xi) - t;
variance += (diff * diff) / (j * (j - 1));
}
}
}
return variance / j;
if (data == null) throw new ArgumentNullException("data");
return StreamingStatistics.PopulationVariance(data.Where(d => d.HasValue).Select(d => d.Value));
}
/// <summary>

43
src/Numerics/Statistics/StreamingStatistics.cs

@ -54,7 +54,7 @@ namespace MathNet.Numerics.Statistics
}
any = true;
}
return any ? min : double.NaN;
return any ? min : double.NaN;
}
/// <summary>
@ -93,7 +93,7 @@ namespace MathNet.Numerics.Statistics
bool any = false;
foreach (var d in stream)
{
mean += (d - mean) / ++m;
mean += (d - mean)/++m;
any = true;
}
return any ? mean : double.NaN;
@ -125,11 +125,44 @@ namespace MathNet.Numerics.Statistics
j++;
double xi = iterator.Current;
t += xi;
double diff = (j * xi) - t;
variance += (diff * diff) / (j * (j - 1));
double diff = (j*xi) - t;
variance += (diff*diff)/(j*(j - 1));
}
}
return j > 1 ? variance / (j - 1) : double.NaN;
return j > 1 ? variance/(j - 1) : double.NaN;
}
/// <summary>
/// Estimates the biased population variance from the enumerable, in a single pass without memoization.
/// On a dataset of size N will use an N normalizer
/// Returns NaN if data is empty or any entry is NaN.
/// </summary>
/// <param name="stream">Sample stream, no sorting is assumed.</param>
public static double PopulationVariance(IEnumerable<double> stream)
{
if (stream == null) throw new ArgumentNullException("stream");
double variance = 0;
double t = 0;
ulong j = 0;
using (var iterator = stream.GetEnumerator())
{
if (iterator.MoveNext())
{
j++;
t = iterator.Current;
}
while (iterator.MoveNext())
{
j++;
double xi = iterator.Current;
t += xi;
double diff = (j*xi) - t;
variance += (diff*diff)/(j*(j - 1));
}
}
return variance/j;
}
}
}

10
src/UnitTests/StatisticsTests/StatisticsTests.cs

@ -84,11 +84,13 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.Throws<ArgumentNullException>(() => ArrayStatistics.Maximum(data));
Assert.Throws<ArgumentNullException>(() => ArrayStatistics.Mean(data));
Assert.Throws<ArgumentNullException>(() => ArrayStatistics.Variance(data));
Assert.Throws<ArgumentNullException>(() => ArrayStatistics.PopulationVariance(data));
Assert.Throws<ArgumentNullException>(() => StreamingStatistics.Minimum(data));
Assert.Throws<ArgumentNullException>(() => StreamingStatistics.Maximum(data));
Assert.Throws<ArgumentNullException>(() => StreamingStatistics.Mean(data));
Assert.Throws<ArgumentNullException>(() => StreamingStatistics.Variance(data));
Assert.Throws<ArgumentNullException>(() => StreamingStatistics.PopulationVariance(data));
}
[Test]
@ -120,11 +122,13 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.DoesNotThrow(() => ArrayStatistics.Maximum(data));
Assert.DoesNotThrow(() => ArrayStatistics.Mean(data));
Assert.DoesNotThrow(() => ArrayStatistics.Variance(data));
Assert.DoesNotThrow(() => ArrayStatistics.PopulationVariance(data));
Assert.DoesNotThrow(() => StreamingStatistics.Minimum(data));
Assert.DoesNotThrow(() => StreamingStatistics.Maximum(data));
Assert.DoesNotThrow(() => StreamingStatistics.Mean(data));
Assert.DoesNotThrow(() => StreamingStatistics.Variance(data));
Assert.DoesNotThrow(() => StreamingStatistics.PopulationVariance(data));
}
[TestCase("lottery")]
@ -298,6 +302,12 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.That(Statistics.PopulationVariance(new double[0]), Is.NaN);
Assert.That(Statistics.PopulationVariance(new[] { 2d }), Is.Not.NaN);
Assert.That(Statistics.PopulationVariance(new[] { 2d, 3d }), Is.Not.NaN);
Assert.That(ArrayStatistics.PopulationVariance(new double[0]), Is.NaN);
Assert.That(ArrayStatistics.PopulationVariance(new[] { 2d }), Is.Not.NaN);
Assert.That(ArrayStatistics.PopulationVariance(new[] { 2d, 3d }), Is.Not.NaN);
Assert.That(StreamingStatistics.PopulationVariance(new double[0]), Is.NaN);
Assert.That(StreamingStatistics.PopulationVariance(new[] { 2d }), Is.Not.NaN);
Assert.That(StreamingStatistics.PopulationVariance(new[] { 2d, 3d }), Is.Not.NaN);
}
/// <summary>

Loading…
Cancel
Save