Browse Source

One Pass Statistics

(cherry picked from commit 005fba9c76)

Signed-off-by: Christoph Ruegg <git@cdrnet.ch>
pull/38/merge
manyue 14 years ago
committed by Christoph Ruegg
parent
commit
ecaaeb2af6
  1. 29
      src/Numerics/Statistics/Correlation.cs
  2. 213
      src/Numerics/Statistics/DescriptiveStatistics.cs
  3. 20
      src/Numerics/Statistics/Statistics.cs

29
src/Numerics/Statistics/Correlation.cs

@ -32,6 +32,7 @@ namespace MathNet.Numerics.Statistics
{
using System;
using System.Collections.Generic;
using Properties;
/// <summary>
/// A class with correlation measures between two datasets.
@ -49,12 +50,10 @@ namespace MathNet.Numerics.Statistics
int n = 0;
double r = 0.0;
// BUG: PERFORMANCE degraded due to tripple iteration over both IEnumerables
double meanA = dataA.Mean();
double meanB = dataB.Mean();
double sdevA = dataA.StandardDeviation();
double sdevB = dataB.StandardDeviation();
double meanA = 0;
double meanB = 0;
double varA = 0;
double varB = 0;
using (IEnumerator<double> ieA = dataA.GetEnumerator())
using (IEnumerator<double> ieB = dataB.GetEnumerator())
@ -65,9 +64,21 @@ namespace MathNet.Numerics.Statistics
{
throw new ArgumentOutOfRangeException("dataB", "Datasets dataA and dataB need to have the same length. dataB is shorter.");
}
double Acurrent = ieA.Current;
double Bcurrent = ieB.Current;
n++;
r += (ieA.Current - meanA) * (ieB.Current - meanB) / (sdevA * sdevB);
double deltaA = Acurrent - meanA;
double scaleDeltaA = deltaA / ++n;
double deltaB = Bcurrent - meanB;
double scaleDeltaB = deltaB / n;
meanA += scaleDeltaA;
meanB += scaleDeltaB;
varA += scaleDeltaA * deltaA * (n - 1);
varB += scaleDeltaB * deltaB * (n - 1);
r += ((deltaA * deltaB * (n - 1)) / n);
}
if (ieB.MoveNext())
{
@ -75,7 +86,7 @@ namespace MathNet.Numerics.Statistics
}
}
return r / (n - 1);
return r / Math.Sqrt(varA * varB);
}
}
}

213
src/Numerics/Statistics/DescriptiveStatistics.cs

@ -45,7 +45,8 @@ namespace MathNet.Numerics.Statistics
/// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class.
/// </summary>
/// <param name="data">The sample data.</param>
public DescriptiveStatistics(IEnumerable<double> data) : this(data, false)
public DescriptiveStatistics(IEnumerable<double> data)
: this(data, false)
{
}
@ -53,7 +54,8 @@ namespace MathNet.Numerics.Statistics
/// Initializes a new instance of the <see cref="DescriptiveStatistics"/> class.
/// </summary>
/// <param name="data">The sample data.</param>
public DescriptiveStatistics(IEnumerable<double?> data) : this(data, false)
public DescriptiveStatistics(IEnumerable<double?> data)
: this(data, false)
{
}
@ -71,6 +73,11 @@ namespace MathNet.Numerics.Statistics
/// </remarks>
public DescriptiveStatistics(IEnumerable<double> data, bool increasedAccuracy)
{
if (data == null)
{
throw new ArgumentNullException("data");
}
if (increasedAccuracy)
{
ComputeHA(data);
@ -97,6 +104,12 @@ namespace MathNet.Numerics.Statistics
/// </remarks>
public DescriptiveStatistics(IEnumerable<double?> data, bool increasedAccuracy)
{
if (data == null)
{
throw new ArgumentNullException("data");
}
if (increasedAccuracy)
{
ComputeHA(data);
@ -171,9 +184,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param>
private void Compute(IEnumerable<double> data)
{
Mean = data.Mean();
double mean = 0;
double variance = 0;
double correction = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = Double.PositiveInfinity;
@ -181,40 +193,26 @@ namespace MathNet.Numerics.Statistics
int n = 0;
foreach (var xi in data)
{
double diff = xi - Mean;
correction += diff;
double tmp = diff * diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
double delta = xi - mean;
double scaleDelta = delta / ++n;
double scaleDeltaSQR = scaleDelta * scaleDelta;
double tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi; }
if (maximum < xi) { maximum = xi; }
n++;
}
SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
}
Count = n;
Minimum = minimum;
Maximum = maximum;
Variance = (variance - (correction * correction / n)) / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
/// <summary>
/// Computes descriptive statistics from a stream of nullable data values.
@ -222,9 +220,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param>
private void Compute(IEnumerable<double?> data)
{
Mean = data.Mean();
double mean = 0;
double variance = 0;
double correction = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = Double.PositiveInfinity;
@ -234,43 +231,25 @@ namespace MathNet.Numerics.Statistics
{
if (xi.HasValue)
{
double diff = xi.Value - Mean;
double tmp = diff * diff;
correction += diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
double delta = xi.Value - mean;
double scaleDelta = delta / ++n;
double scaleDeltaSQR = scaleDelta * scaleDelta;
double tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi.Value; }
if (maximum < xi) { maximum = xi.Value; }
n++;
}
}
Count = n;
if (n > 0)
{
Minimum = minimum;
Maximum = maximum;
Variance = (variance - (correction * correction / n)) / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
}
/// <summary>
@ -279,10 +258,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param>
private void ComputeHA(IEnumerable<double> data)
{
Mean = data.Mean();
decimal mean = (decimal)Mean;
decimal mean = 0;
decimal variance = 0;
decimal correction = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = Decimal.MaxValue;
@ -290,39 +267,24 @@ namespace MathNet.Numerics.Statistics
int n = 0;
foreach (decimal xi in data)
{
decimal diff = xi - mean;
decimal tmp = diff * diff;
correction += diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
decimal delta = xi - mean;
decimal scaleDelta = delta / ++n;
decimal scaleDeltaSQR = scaleDelta * scaleDelta;
decimal tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi; }
if (maximum < xi) { maximum = xi; }
n++;
}
Count = n;
Minimum = (double)minimum;
Maximum = (double)maximum;
Variance = (double)(variance - (correction * correction / n)) / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation));
}
SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* ((double)kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
/// <summary>
@ -331,10 +293,8 @@ namespace MathNet.Numerics.Statistics
/// <param name="data">A sequence of datapoints.</param>
private void ComputeHA(IEnumerable<double?> data)
{
Mean = data.Mean();
decimal mean = (decimal)Mean;
decimal mean = 0;
decimal variance = 0;
decimal correction = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = Decimal.MaxValue;
@ -344,43 +304,66 @@ namespace MathNet.Numerics.Statistics
{
if (xi.HasValue)
{
decimal diff = xi.Value - mean;
decimal tmp = diff * diff;
correction += diff;
variance += tmp;
tmp *= diff;
skewness += tmp;
tmp *= diff;
kurtosis += tmp;
decimal delta = xi.Value - mean;
decimal scaleDelta = delta / ++n;
decimal scaleDeltaSQR = scaleDelta * scaleDelta;
decimal tmpDelta = delta * (n - 1);
mean += scaleDelta;
kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi.Value; }
if (maximum < xi) { maximum = xi.Value; }
n++;
}
}
SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
}
/// <summary>
/// Internal use. Method use for setting the statistics.
/// </summary>
/// <param name="mean">For setting Mean.</param>
/// <param name="variance">For setting Variance.</param>
/// <param name="skewness">For setting Skewness.</param>
/// <param name="kurtosis">For setting Kurtosis.</param>
/// <param name="minimum">For setting Minimum.</param>
/// <param name="maximum">For setting Maximum.</param>
/// <param name="n">For setting Count.</param>
private void SetStatistics(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, int n)
{
Mean = mean;
Count = n;
if (n > 0)
{
Minimum = (double) minimum;
Maximum = (double) maximum;
Variance = (double)(variance - (correction * correction / n)) / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
Minimum = minimum;
Maximum = maximum;
if (n > 1)
{
Variance = variance / (n - 1);
StandardDeviation = Math.Sqrt(Variance);
}
if (Variance != 0)
{
if (n > 2)
{
Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation));
Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
* ((double)kurtosis / (Variance * Variance)))
* (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
}
}
}

20
src/Numerics/Statistics/Statistics.cs

@ -32,6 +32,7 @@ namespace MathNet.Numerics.Statistics
{
using System;
using System.Collections.Generic;
using System.Linq;
using Properties;
/// <summary>
@ -455,7 +456,7 @@ namespace MathNet.Numerics.Statistics
if (dataArray.Count % 2 == 0)
{
double lower = OrderSelect(dataArray, 0, dataArray.Count - 1, index - 1);
double upper = OrderSelect(dataArray, 0, dataArray.Count - 1, index);
double upper = dataArray.Skip(index - 1).Minimum();
return (lower + upper) / 2.0;
}
@ -543,12 +544,23 @@ namespace MathNet.Numerics.Statistics
return samples[left];
}
// The pivot point.
// The pivot point. Choose median of left, right and center
//to be the pivot and arrange so that
//samples[left]<=samples[right]<=samples[center]
int center = (left + right) / 2;
if (samples[center] < samples[left])
Sorting.Swap(samples, left, center);
if (samples[center] < samples[right])
Sorting.Swap(samples, right, center);
if (samples[right] < samples[left])
Sorting.Swap(samples, right, left);
double pivot = samples[right];
// The partioning code.
int i = left - 1;
for (int j = left; j <= right - 1; j++)
int i = left;
for (int j = left+1; j <= right - 1; j++)
{
if (samples[j] <= pivot)
{

Loading…
Cancel
Save