Browse Source

One Pass Statistics

(cherry picked from commit 005fba9c76)

Signed-off-by: Christoph Ruegg <git@cdrnet.ch>
pull/38/merge
manyue 14 years ago
committed by Christoph Ruegg
parent
commit
ecaaeb2af6
  1. 29
      src/Numerics/Statistics/Correlation.cs
  2. 213
      src/Numerics/Statistics/DescriptiveStatistics.cs
  3. 20
      src/Numerics/Statistics/Statistics.cs

29
src/Numerics/Statistics/Correlation.cs

@ -32,6 +32,7 @@ namespace MathNet.Numerics.Statistics
{ {
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using Properties;
/// <summary> /// <summary>
/// A class with correlation measures between two datasets. /// A class with correlation measures between two datasets.
@ -49,12 +50,10 @@ namespace MathNet.Numerics.Statistics
int n = 0; int n = 0;
double r = 0.0; double r = 0.0;
// BUG: PERFORMANCE degraded due to tripple iteration over both IEnumerables double meanA = 0;
double meanB = 0;
double meanA = dataA.Mean(); double varA = 0;
double meanB = dataB.Mean(); double varB = 0;
double sdevA = dataA.StandardDeviation();
double sdevB = dataB.StandardDeviation();
using (IEnumerator<double> ieA = dataA.GetEnumerator()) using (IEnumerator<double> ieA = dataA.GetEnumerator())
using (IEnumerator<double> ieB = dataB.GetEnumerator()) using (IEnumerator<double> ieB = dataB.GetEnumerator())
@ -65,9 +64,21 @@ namespace MathNet.Numerics.Statistics
{ {
throw new ArgumentOutOfRangeException("dataB", "Datasets dataA and dataB need to have the same length. dataB is shorter."); throw new ArgumentOutOfRangeException("dataB", "Datasets dataA and dataB need to have the same length. dataB is shorter.");
} }
double Acurrent = ieA.Current;
double Bcurrent = ieB.Current;
n++; double deltaA = Acurrent - meanA;
r += (ieA.Current - meanA) * (ieB.Current - meanB) / (sdevA * sdevB); double scaleDeltaA = deltaA / ++n;
double deltaB = Bcurrent - meanB;
double scaleDeltaB = deltaB / n;
meanA += scaleDeltaA;
meanB += scaleDeltaB;
varA += scaleDeltaA * deltaA * (n - 1);
varB += scaleDeltaB * deltaB * (n - 1);
r += ((deltaA * deltaB * (n - 1)) / n);
} }
if (ieB.MoveNext()) if (ieB.MoveNext())
{ {
@ -75,7 +86,7 @@ namespace MathNet.Numerics.Statistics
} }
} }
return r / (n - 1); return r / Math.Sqrt(varA * varB);
} }
} }
} }

213
src/Numerics/Statistics/DescriptiveStatistics.cs

@ -45,7 +45,8 @@ namespace MathNet.Numerics.Statistics
/// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class. /// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class.
/// </summary> /// </summary>
/// <param name="data">The sample data.</param> /// <param name="data">The sample data.</param>
public DescriptiveStatistics(IEnumerable<double> data) : this(data, false) public DescriptiveStatistics(IEnumerable<double> data)
: this(data, false)
{ {
} }
@ -53,7 +54,8 @@ namespace MathNet.Numerics.Statistics
/// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class. /// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class.
/// </summary> /// </summary>
/// <param name="data">The sample data.</param> /// <param name="data">The sample data.</param>
public DescriptiveStatistics(IEnumerable<double?> data) : this(data, false) public DescriptiveStatistics(IEnumerable<double?> data)
: this(data, false)
{ {
} }
@ -71,6 +73,11 @@ namespace MathNet.Numerics.Statistics
/// </remarks> /// </remarks>
public DescriptiveStatistics(IEnumerable<double> data, bool increasedAccuracy) public DescriptiveStatistics(IEnumerable<double> data, bool increasedAccuracy)
{ {
if (data == null)
{
throw new ArgumentNullException("data");
}
if (increasedAccuracy) if (increasedAccuracy)
{ {
ComputeHA(data); ComputeHA(data);
@ -97,6 +104,12 @@ namespace MathNet.Numerics.Statistics
/// </remarks> /// </remarks>
public DescriptiveStatistics(IEnumerable<double?> data, bool increasedAccuracy) public DescriptiveStatistics(IEnumerable<double?> data, bool increasedAccuracy)
{ {
if (data == null)
{
throw new ArgumentNullException("data");
}
if (increasedAccuracy) if (increasedAccuracy)
{ {
ComputeHA(data); ComputeHA(data);
@ -171,9 +184,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param> /// <param name="data">A sequence of datapoints.</param>
private void Compute(IEnumerable<double> data) private void Compute(IEnumerable<double> data)
{ {
Mean = data.Mean(); double mean = 0;
double variance = 0; double variance = 0;
double correction = 0;
double skewness = 0; double skewness = 0;
double kurtosis = 0; double kurtosis = 0;
double minimum = Double.PositiveInfinity; double minimum = Double.PositiveInfinity;
@ -181,40 +193,26 @@ namespace MathNet.Numerics.Statistics
int n = 0; int n = 0;
foreach (var xi in data) foreach (var xi in data)
{ {
double diff = xi - Mean; double delta = xi - mean;
correction += diff; double scaleDelta = delta / ++n;
double tmp = diff * diff; double scaleDeltaSQR = scaleDelta * scaleDelta;
variance += tmp; double tmpDelta = delta * (n - 1);
tmp *= diff;
skewness += tmp; mean += scaleDelta;
tmp *= diff;
kurtosis += tmp; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi; } if (minimum > xi) { minimum = xi; }
if (maximum < xi) { maximum = xi; } if (maximum < xi) { maximum = xi; }
n++;
} }
SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
}
Count = n;
Minimum = minimum;
Maximum = maximum;
Variance = (variance - (correction * correction / n)) / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
/// <summary> /// <summary>
/// Computes descriptive statistics from a stream of nullable data values. /// Computes descriptive statistics from a stream of nullable data values.
@ -222,9 +220,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param> /// <param name="data">A sequence of datapoints.</param>
private void Compute(IEnumerable<double?> data) private void Compute(IEnumerable<double?> data)
{ {
Mean = data.Mean(); double mean = 0;
double variance = 0; double variance = 0;
double correction = 0;
double skewness = 0; double skewness = 0;
double kurtosis = 0; double kurtosis = 0;
double minimum = Double.PositiveInfinity; double minimum = Double.PositiveInfinity;
@ -234,43 +231,25 @@ namespace MathNet.Numerics.Statistics
{ {
if (xi.HasValue) if (xi.HasValue)
{ {
double diff = xi.Value - Mean; double delta = xi.Value - mean;
double tmp = diff * diff; double scaleDelta = delta / ++n;
correction += diff; double scaleDeltaSQR = scaleDelta * scaleDelta;
variance += tmp; double tmpDelta = delta * (n - 1);
tmp *= diff;
skewness += tmp; mean += scaleDelta;
tmp *= diff;
kurtosis += tmp; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi.Value; } if (minimum > xi) { minimum = xi.Value; }
if (maximum < xi) { maximum = xi.Value; } if (maximum < xi) { maximum = xi.Value; }
n++;
} }
} }
Count = n; SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
if (n > 0)
{
Minimum = minimum;
Maximum = maximum;
Variance = (variance - (correction * correction / n)) / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
} }
/// <summary> /// <summary>
@ -279,10 +258,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param> /// <param name="data">A sequence of datapoints.</param>
private void ComputeHA(IEnumerable<double> data) private void ComputeHA(IEnumerable<double> data)
{ {
Mean = data.Mean(); decimal mean = 0;
decimal mean = (decimal)Mean;
decimal variance = 0; decimal variance = 0;
decimal correction = 0;
decimal skewness = 0; decimal skewness = 0;
decimal kurtosis = 0; decimal kurtosis = 0;
decimal minimum = Decimal.MaxValue; decimal minimum = Decimal.MaxValue;
@ -290,39 +267,24 @@ namespace MathNet.Numerics.Statistics
int n = 0; int n = 0;
foreach (decimal xi in data) foreach (decimal xi in data)
{ {
decimal diff = xi - mean; decimal delta = xi - mean;
decimal tmp = diff * diff; decimal scaleDelta = delta / ++n;
correction += diff; decimal scaleDeltaSQR = scaleDelta * scaleDelta;
variance += tmp; decimal tmpDelta = delta * (n - 1);
tmp *= diff;
skewness += tmp; mean += scaleDelta;
tmp *= diff;
kurtosis += tmp; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi; } if (minimum > xi) { minimum = xi; }
if (maximum < xi) { maximum = xi; } if (maximum < xi) { maximum = xi; }
n++;
} }
Count = n; SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
Minimum = (double)minimum;
Maximum = (double)maximum;
Variance = (double)(variance - (correction * correction / n)) / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* ((double)kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
} }
/// <summary> /// <summary>
@ -331,10 +293,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param> /// <param name="data">A sequence of datapoints.</param>
private void ComputeHA(IEnumerable<double?> data) private void ComputeHA(IEnumerable<double?> data)
{ {
Mean = data.Mean(); decimal mean = 0;
decimal mean = (decimal)Mean;
decimal variance = 0; decimal variance = 0;
decimal correction = 0;
decimal skewness = 0; decimal skewness = 0;
decimal kurtosis = 0; decimal kurtosis = 0;
decimal minimum = Decimal.MaxValue; decimal minimum = Decimal.MaxValue;
@ -344,43 +304,66 @@ namespace MathNet.Numerics.Statistics
{ {
if (xi.HasValue) if (xi.HasValue)
{ {
decimal diff = xi.Value - mean; decimal delta = xi.Value - mean;
decimal tmp = diff * diff; decimal scaleDelta = delta / ++n;
correction += diff; decimal scaleDeltaSQR = scaleDelta * scaleDelta;
variance += tmp; decimal tmpDelta = delta * (n - 1);
tmp *= diff;
skewness += tmp; mean += scaleDelta;
tmp *= diff;
kurtosis += tmp; kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi.Value; } if (minimum > xi) { minimum = xi.Value; }
if (maximum < xi) { maximum = xi.Value; } if (maximum < xi) { maximum = xi.Value; }
n++;
} }
} }
SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
}
/// <summary>
/// Internal use. Method use for setting the statistics.
/// </summary>
/// <param name="mean">For setting Mean.</param>
/// <param name="variance">For setting Variance.</param>
/// <param name="skewness">For setting Skewness.</param>
/// <param name="kurtosis">For setting Kurtosis.</param>
/// <param name="minimum">For setting Minimum.</param>
/// <param name="maximum">For setting Maximum.</param>
/// <param name="n">For setting Count.</param>
private void SetStatistics(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, int n)
{
Mean = mean;
Count = n; Count = n;
if (n > 0) if (n > 0)
{ {
Minimum = (double) minimum; Minimum = minimum;
Maximum = (double) maximum; Maximum = maximum;
Variance = (double)(variance - (correction * correction / n)) / (n - 1); if (n > 1)
StandardDeviation = Math.Sqrt(Variance); {
Variance = variance / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
}
if (Variance != 0) if (Variance != 0)
{ {
if (n > 2) if (n > 2)
{ {
Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation)); Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
} }
if (n > 3) if (n > 3)
{ {
Kurtosis = (((double)n * (n + 1)) Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3)) / ((n - 1) * (n - 2) * (n - 3))
* ((double)kurtosis / (Variance * Variance))) * (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3))); - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
} }
} }
} }
} }
} }
} }

20
src/Numerics/Statistics/Statistics.cs

@ -32,6 +32,7 @@ namespace MathNet.Numerics.Statistics
{ {
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq;
using Properties; using Properties;
/// <summary> /// <summary>
@ -455,7 +456,7 @@ namespace MathNet.Numerics.Statistics
if (dataArray.Count % 2 == 0) if (dataArray.Count % 2 == 0)
{ {
double lower = OrderSelect(dataArray, 0, dataArray.Count - 1, index - 1); double lower = OrderSelect(dataArray, 0, dataArray.Count - 1, index - 1);
double upper = OrderSelect(dataArray, 0, dataArray.Count - 1, index); double upper = dataArray.Skip(index - 1).Minimum();
return (lower + upper) / 2.0; return (lower + upper) / 2.0;
} }
@ -543,12 +544,23 @@ namespace MathNet.Numerics.Statistics
return samples[left]; return samples[left];
} }
// The pivot point.
// The pivot point. Choose median of left, right and center
//to be the pivot and arrange so that
//samples[left]<=samples[right]<=samples[center]
int center = (left + right) / 2;
if (samples[center] < samples[left])
Sorting.Swap(samples, left, center);
if (samples[center] < samples[right])
Sorting.Swap(samples, right, center);
if (samples[right] < samples[left])
Sorting.Swap(samples, right, left);
double pivot = samples[right]; double pivot = samples[right];
// The partioning code. // The partioning code.
int i = left - 1; int i = left;
for (int j = left; j <= right - 1; j++) for (int j = left+1; j <= right - 1; j++)
{ {
if (samples[j] <= pivot) if (samples[j] <= pivot)
{ {

Loading…
Cancel
Save