diff --git a/src/Numerics/Statistics/ArrayStatistics.cs b/src/Numerics/Statistics/ArrayStatistics.cs
index b37818cc..ccb5afb4 100644
--- a/src/Numerics/Statistics/ArrayStatistics.cs
+++ b/src/Numerics/Statistics/ArrayStatistics.cs
@@ -93,7 +93,7 @@ namespace MathNet.Numerics.Statistics
ulong m = 0;
for (int i = 0; i < data.Length; i++)
{
- mean += (data[i] - mean) / ++m;
+ mean += (data[i] - mean)/++m;
}
return mean;
}
@@ -114,10 +114,32 @@ namespace MathNet.Numerics.Statistics
for (int i = 1; i < data.Length; i++)
{
t += data[i];
- double diff = ((i + 1) * data[i]) - t;
- variance += (diff * diff) / ((i + 1) * i);
+ double diff = ((i + 1)*data[i]) - t;
+ variance += (diff*diff)/((i + 1)*i);
}
return variance/(data.Length - 1);
}
+
+ ///
+ /// Estimates the biased population variance from the unsorted data array.
+ /// On a dataset of size N will use an N normalizer
+ /// Returns NaN if data is empty or any entry is NaN.
+ ///
+ /// Sample array, no sorting is assumed.
+ public static double PopulationVariance(double[] data)
+ {
+ if (data == null) throw new ArgumentNullException("data");
+ if (data.Length == 0) return double.NaN;
+
+ double variance = 0;
+ double t = data[0];
+ for (int i = 1; i < data.Length; i++)
+ {
+ t += data[i];
+ double diff = ((i + 1)*data[i]) - t;
+ variance += (diff*diff)/((i + 1)*i);
+ }
+ return variance/data.Length;
+ }
}
}
\ No newline at end of file
diff --git a/src/Numerics/Statistics/SortedArrayStatistics.cs b/src/Numerics/Statistics/SortedArrayStatistics.cs
index 9cabbd79..fe9ab77c 100644
--- a/src/Numerics/Statistics/SortedArrayStatistics.cs
+++ b/src/Numerics/Statistics/SortedArrayStatistics.cs
@@ -42,8 +42,8 @@ namespace MathNet.Numerics.Statistics
public static class SortedArrayStatistics
{
- const double Third = 1d / 3d;
- const double Half = 1d / 2d;
+ const double Third = 1d/3d;
+ const double Half = 1d/2d;
///
/// Returns the smallest value from the sorted data array (ascending).
@@ -88,7 +88,7 @@ namespace MathNet.Numerics.Statistics
/// Percentile selector, between 0 and 100 (inclusive).
public static double Percentile(double[] data, int p)
{
- return Quantile(data, p / 100d);
+ return Quantile(data, p/100d);
}
///
@@ -180,7 +180,7 @@ namespace MathNet.Numerics.Statistics
case QuantileCompatibility.R2:
case QuantileCompatibility.SAS5:
{
- double h = data.Length * tau + Half;
+ double h = data.Length*tau + Half;
return (data[(int) Math.Ceiling(h - Half) - 1] + data[(int) (h + Half) - 1])*Half;
}
case QuantileCompatibility.R3:
@@ -194,42 +194,42 @@ namespace MathNet.Numerics.Statistics
case QuantileCompatibility.SAS1:
{
double h = data.Length*tau;
- var hf = (int)h;
- return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
+ var hf = (int) h;
+ return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R5:
{
double h = data.Length*tau + Half;
- var hf = (int)h;
- return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
+ var hf = (int) h;
+ return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R6:
case QuantileCompatibility.SAS4:
case QuantileCompatibility.Nist:
{
double h = (data.Length + 1)*tau;
- var hf = (int)h;
- return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
+ var hf = (int) h;
+ return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R7:
case QuantileCompatibility.Excel:
{
double h = (data.Length - 1)*tau + 1d;
- var hf = (int)h;
- return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
+ var hf = (int) h;
+ return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R8:
case QuantileCompatibility.Default:
{
- double h = (data.Length + Third) * tau + Third;
- var hf = (int)h;
- return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
+ double h = (data.Length + Third)*tau + Third;
+ var hf = (int) h;
+ return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
case QuantileCompatibility.R9:
{
- double h = (data.Length + 1d/4d) * tau + 3d/8d;
- var hf = (int)h;
- return data[hf - 1] + (h - hf) * (data[hf] - data[hf - 1]);
+ double h = (data.Length + 1d/4d)*tau + 3d/8d;
+ var hf = (int) h;
+ return data[hf - 1] + (h - hf)*(data[hf] - data[hf - 1]);
}
default:
throw new NotSupportedException();
diff --git a/src/Numerics/Statistics/Statistics.cs b/src/Numerics/Statistics/Statistics.cs
index e673b294..1c345882 100644
--- a/src/Numerics/Statistics/Statistics.cs
+++ b/src/Numerics/Statistics/Statistics.cs
@@ -142,34 +142,10 @@ namespace MathNet.Numerics.Statistics
/// The biased population variance of the sample.
public static double PopulationVariance(this IEnumerable data)
{
- if (data == null)
- {
- throw new ArgumentNullException("data");
- }
-
- double variance = 0;
- double t = 0;
- ulong j = 0;
-
- using (IEnumerator iterator = data.GetEnumerator())
- {
- if (iterator.MoveNext())
- {
- j++;
- t = iterator.Current;
- }
-
- while (iterator.MoveNext())
- {
- j++;
- double xi = iterator.Current;
- t += xi;
- double diff = (j * xi) - t;
- variance += (diff * diff) / (j * (j - 1));
- }
- }
-
- return variance / j;
+ var array = data as double[];
+ return array != null
+ ? ArrayStatistics.PopulationVariance(array)
+ : StreamingStatistics.PopulationVariance(data);
}
///
@@ -179,47 +155,8 @@ namespace MathNet.Numerics.Statistics
/// The population variance of the sample.
public static double PopulationVariance(this IEnumerable data)
{
- if (data == null)
- {
- throw new ArgumentNullException("data");
- }
-
- double variance = 0;
- double t = 0;
- ulong j = 0;
-
- using (IEnumerator iterator = data.GetEnumerator())
- {
- while (true)
- {
- bool hasNext = iterator.MoveNext();
- if (!hasNext)
- {
- break;
- }
-
- if (iterator.Current.HasValue)
- {
- j++;
- t = iterator.Current.Value;
- break;
- }
- }
-
- while (iterator.MoveNext())
- {
- if (iterator.Current.HasValue)
- {
- j++;
- double xi = iterator.Current.Value;
- t += xi;
- double diff = (j * xi) - t;
- variance += (diff * diff) / (j * (j - 1));
- }
- }
- }
-
- return variance / j;
+ if (data == null) throw new ArgumentNullException("data");
+ return StreamingStatistics.PopulationVariance(data.Where(d => d.HasValue).Select(d => d.Value));
}
///
diff --git a/src/Numerics/Statistics/StreamingStatistics.cs b/src/Numerics/Statistics/StreamingStatistics.cs
index 5f717a2b..86146c25 100644
--- a/src/Numerics/Statistics/StreamingStatistics.cs
+++ b/src/Numerics/Statistics/StreamingStatistics.cs
@@ -54,7 +54,7 @@ namespace MathNet.Numerics.Statistics
}
any = true;
}
- return any ? min : double.NaN;
+ return any ? min : double.NaN;
}
///
@@ -93,7 +93,7 @@ namespace MathNet.Numerics.Statistics
bool any = false;
foreach (var d in stream)
{
- mean += (d - mean) / ++m;
+ mean += (d - mean)/++m;
any = true;
}
return any ? mean : double.NaN;
@@ -125,11 +125,44 @@ namespace MathNet.Numerics.Statistics
j++;
double xi = iterator.Current;
t += xi;
- double diff = (j * xi) - t;
- variance += (diff * diff) / (j * (j - 1));
+ double diff = (j*xi) - t;
+ variance += (diff*diff)/(j*(j - 1));
}
}
- return j > 1 ? variance / (j - 1) : double.NaN;
+ return j > 1 ? variance/(j - 1) : double.NaN;
+ }
+
+ ///
+ /// Estimates the biased population variance from the enumerable, in a single pass without memoization.
+ /// On a dataset of size N will use an N normalizer
+ /// Returns NaN if data is empty or any entry is NaN.
+ ///
+ /// Sample stream, no sorting is assumed.
+ public static double PopulationVariance(IEnumerable stream)
+ {
+ if (stream == null) throw new ArgumentNullException("stream");
+
+ double variance = 0;
+ double t = 0;
+ ulong j = 0;
+ using (var iterator = stream.GetEnumerator())
+ {
+ if (iterator.MoveNext())
+ {
+ j++;
+ t = iterator.Current;
+ }
+
+ while (iterator.MoveNext())
+ {
+ j++;
+ double xi = iterator.Current;
+ t += xi;
+ double diff = (j*xi) - t;
+ variance += (diff*diff)/(j*(j - 1));
+ }
+ }
+ return variance/j;
}
}
}
diff --git a/src/UnitTests/StatisticsTests/StatisticsTests.cs b/src/UnitTests/StatisticsTests/StatisticsTests.cs
index f3a0c192..7b4e31ef 100644
--- a/src/UnitTests/StatisticsTests/StatisticsTests.cs
+++ b/src/UnitTests/StatisticsTests/StatisticsTests.cs
@@ -84,11 +84,13 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.Throws(() => ArrayStatistics.Maximum(data));
Assert.Throws(() => ArrayStatistics.Mean(data));
Assert.Throws(() => ArrayStatistics.Variance(data));
+ Assert.Throws(() => ArrayStatistics.PopulationVariance(data));
Assert.Throws(() => StreamingStatistics.Minimum(data));
Assert.Throws(() => StreamingStatistics.Maximum(data));
Assert.Throws(() => StreamingStatistics.Mean(data));
Assert.Throws(() => StreamingStatistics.Variance(data));
+ Assert.Throws(() => StreamingStatistics.PopulationVariance(data));
}
[Test]
@@ -120,11 +122,13 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.DoesNotThrow(() => ArrayStatistics.Maximum(data));
Assert.DoesNotThrow(() => ArrayStatistics.Mean(data));
Assert.DoesNotThrow(() => ArrayStatistics.Variance(data));
+ Assert.DoesNotThrow(() => ArrayStatistics.PopulationVariance(data));
Assert.DoesNotThrow(() => StreamingStatistics.Minimum(data));
Assert.DoesNotThrow(() => StreamingStatistics.Maximum(data));
Assert.DoesNotThrow(() => StreamingStatistics.Mean(data));
Assert.DoesNotThrow(() => StreamingStatistics.Variance(data));
+ Assert.DoesNotThrow(() => StreamingStatistics.PopulationVariance(data));
}
[TestCase("lottery")]
@@ -298,6 +302,12 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.That(Statistics.PopulationVariance(new double[0]), Is.NaN);
Assert.That(Statistics.PopulationVariance(new[] { 2d }), Is.Not.NaN);
Assert.That(Statistics.PopulationVariance(new[] { 2d, 3d }), Is.Not.NaN);
+ Assert.That(ArrayStatistics.PopulationVariance(new double[0]), Is.NaN);
+ Assert.That(ArrayStatistics.PopulationVariance(new[] { 2d }), Is.Not.NaN);
+ Assert.That(ArrayStatistics.PopulationVariance(new[] { 2d, 3d }), Is.Not.NaN);
+ Assert.That(StreamingStatistics.PopulationVariance(new double[0]), Is.NaN);
+ Assert.That(StreamingStatistics.PopulationVariance(new[] { 2d }), Is.Not.NaN);
+ Assert.That(StreamingStatistics.PopulationVariance(new[] { 2d, 3d }), Is.Not.NaN);
}
///