diff --git a/src/Numerics/Statistics/Correlation.cs b/src/Numerics/Statistics/Correlation.cs
index bd31c401..67abb60a 100644
--- a/src/Numerics/Statistics/Correlation.cs
+++ b/src/Numerics/Statistics/Correlation.cs
@@ -32,6 +32,7 @@ namespace MathNet.Numerics.Statistics
{
using System;
using System.Collections.Generic;
+ using Properties;
///
/// A class with correlation measures between two datasets.
@@ -49,12 +50,10 @@ namespace MathNet.Numerics.Statistics
int n = 0;
double r = 0.0;
- // BUG: PERFORMANCE degraded due to tripple iteration over both IEnumerables
-
- double meanA = dataA.Mean();
- double meanB = dataB.Mean();
- double sdevA = dataA.StandardDeviation();
- double sdevB = dataB.StandardDeviation();
+ double meanA = 0;
+ double meanB = 0;
+ double varA = 0;
+ double varB = 0;
using (IEnumerator ieA = dataA.GetEnumerator())
using (IEnumerator ieB = dataB.GetEnumerator())
@@ -65,9 +64,21 @@ namespace MathNet.Numerics.Statistics
{
throw new ArgumentOutOfRangeException("dataB", "Datasets dataA and dataB need to have the same length. dataB is shorter.");
}
+ double Acurrent = ieA.Current;
+ double Bcurrent = ieB.Current;
- n++;
- r += (ieA.Current - meanA) * (ieB.Current - meanB) / (sdevA * sdevB);
+ double deltaA = Acurrent - meanA;
+ double scaleDeltaA = deltaA / ++n;
+
+ double deltaB = Bcurrent - meanB;
+ double scaleDeltaB = deltaB / n;
+
+ meanA += scaleDeltaA;
+ meanB += scaleDeltaB;
+
+ varA += scaleDeltaA * deltaA * (n - 1);
+ varB += scaleDeltaB * deltaB * (n - 1);
+ r += ((deltaA * deltaB * (n - 1)) / n);
}
if (ieB.MoveNext())
{
@@ -75,7 +86,7 @@ namespace MathNet.Numerics.Statistics
}
}
- return r / (n - 1);
+ return r / Math.Sqrt(varA * varB);
}
}
}
diff --git a/src/Numerics/Statistics/DescriptiveStatistics.cs b/src/Numerics/Statistics/DescriptiveStatistics.cs
index 5d34f72f..609bf066 100644
--- a/src/Numerics/Statistics/DescriptiveStatistics.cs
+++ b/src/Numerics/Statistics/DescriptiveStatistics.cs
@@ -45,7 +45,8 @@ namespace MathNet.Numerics.Statistics
/// Initializes a new instance of the class.
///
/// The sample data.
- public DescriptiveStatistics(IEnumerable data) : this(data, false)
+ public DescriptiveStatistics(IEnumerable data)
+ : this(data, false)
{
}
@@ -53,7 +54,8 @@ namespace MathNet.Numerics.Statistics
/// Initializes a new instance of the class.
///
/// The sample data.
- public DescriptiveStatistics(IEnumerable data) : this(data, false)
+ public DescriptiveStatistics(IEnumerable data)
+ : this(data, false)
{
}
@@ -71,6 +73,11 @@ namespace MathNet.Numerics.Statistics
///
public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy)
{
+ if (data == null)
+ {
+ throw new ArgumentNullException("data");
+ }
+
if (increasedAccuracy)
{
ComputeHA(data);
@@ -97,6 +104,12 @@ namespace MathNet.Numerics.Statistics
///
public DescriptiveStatistics(IEnumerable data, bool increasedAccuracy)
{
+ if (data == null)
+ {
+ throw new ArgumentNullException("data");
+ }
+
+
if (increasedAccuracy)
{
ComputeHA(data);
@@ -171,9 +184,8 @@ namespace MathNet.Numerics.Statistics
/// A sequence of datapoints.
private void Compute(IEnumerable data)
{
- Mean = data.Mean();
+ double mean = 0;
double variance = 0;
- double correction = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = Double.PositiveInfinity;
@@ -181,40 +193,26 @@ namespace MathNet.Numerics.Statistics
int n = 0;
foreach (var xi in data)
{
- double diff = xi - Mean;
- correction += diff;
- double tmp = diff * diff;
- variance += tmp;
- tmp *= diff;
- skewness += tmp;
- tmp *= diff;
- kurtosis += tmp;
+ double delta = xi - mean;
+ double scaleDelta = delta / ++n;
+ double scaleDeltaSQR = scaleDelta * scaleDelta;
+ double tmpDelta = delta * (n - 1);
+
+ mean += scaleDelta;
+
+ kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
+
+ skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
+ variance += tmpDelta * scaleDelta;
+
if (minimum > xi) { minimum = xi; }
if (maximum < xi) { maximum = xi; }
- n++;
}
+ SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
+ }
- Count = n;
- Minimum = minimum;
- Maximum = maximum;
- Variance = (variance - (correction * correction / n)) / (n - 1);
- StandardDeviation = Math.Sqrt(Variance);
- if (Variance != 0)
- {
- if (n > 2)
- {
- Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
- }
- if (n > 3)
- {
- Kurtosis = (((double)n * (n + 1))
- / ((n - 1) * (n - 2) * (n - 3))
- * (kurtosis / (Variance * Variance)))
- - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
- }
- }
- }
///
/// Computes descriptive statistics from a stream of nullable data values.
@@ -222,9 +220,8 @@ namespace MathNet.Numerics.Statistics
/// A sequence of datapoints.
private void Compute(IEnumerable data)
{
- Mean = data.Mean();
+ double mean = 0;
double variance = 0;
- double correction = 0;
double skewness = 0;
double kurtosis = 0;
double minimum = Double.PositiveInfinity;
@@ -234,43 +231,25 @@ namespace MathNet.Numerics.Statistics
{
if (xi.HasValue)
{
- double diff = xi.Value - Mean;
- double tmp = diff * diff;
- correction += diff;
- variance += tmp;
- tmp *= diff;
- skewness += tmp;
- tmp *= diff;
- kurtosis += tmp;
+ double delta = xi.Value - mean;
+ double scaleDelta = delta / ++n;
+ double scaleDeltaSQR = scaleDelta * scaleDelta;
+ double tmpDelta = delta * (n - 1);
+
+ mean += scaleDelta;
+
+ kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
+
+ skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
+ variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi.Value; }
if (maximum < xi) { maximum = xi.Value; }
- n++;
}
}
- Count = n;
- if (n > 0)
- {
- Minimum = minimum;
- Maximum = maximum;
- Variance = (variance - (correction * correction / n)) / (n - 1);
- StandardDeviation = Math.Sqrt(Variance);
- if (Variance != 0)
- {
- if (n > 2)
- {
- Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
- }
+ SetStatistics(mean, variance, skewness, kurtosis, minimum, maximum, n);
- if (n > 3)
- {
- Kurtosis = (((double)n * (n + 1))
- / ((n - 1) * (n - 2) * (n - 3))
- * (kurtosis / (Variance * Variance)))
- - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
- }
- }
- }
}
///
@@ -279,10 +258,8 @@ namespace MathNet.Numerics.Statistics
/// A sequence of datapoints.
private void ComputeHA(IEnumerable data)
{
- Mean = data.Mean();
- decimal mean = (decimal)Mean;
+ decimal mean = 0;
decimal variance = 0;
- decimal correction = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = Decimal.MaxValue;
@@ -290,39 +267,24 @@ namespace MathNet.Numerics.Statistics
int n = 0;
foreach (decimal xi in data)
{
- decimal diff = xi - mean;
- decimal tmp = diff * diff;
- correction += diff;
- variance += tmp;
- tmp *= diff;
- skewness += tmp;
- tmp *= diff;
- kurtosis += tmp;
+ decimal delta = xi - mean;
+ decimal scaleDelta = delta / ++n;
+ decimal scaleDeltaSQR = scaleDelta * scaleDelta;
+ decimal tmpDelta = delta * (n - 1);
+
+ mean += scaleDelta;
+
+ kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
+
+ skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
+ variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi; }
if (maximum < xi) { maximum = xi; }
- n++;
}
- Count = n;
- Minimum = (double)minimum;
- Maximum = (double)maximum;
- Variance = (double)(variance - (correction * correction / n)) / (n - 1);
- StandardDeviation = Math.Sqrt(Variance);
- if (Variance != 0)
- {
- if (n > 2)
- {
- Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation));
- }
+ SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
- if (n > 3)
- {
- Kurtosis = (((double)n * (n + 1))
- / ((n - 1) * (n - 2) * (n - 3))
- * ((double)kurtosis / (Variance * Variance)))
- - ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
- }
- }
}
///
@@ -331,10 +293,8 @@ namespace MathNet.Numerics.Statistics
/// A sequence of datapoints.
private void ComputeHA(IEnumerable data)
{
- Mean = data.Mean();
- decimal mean = (decimal)Mean;
+ decimal mean = 0;
decimal variance = 0;
- decimal correction = 0;
decimal skewness = 0;
decimal kurtosis = 0;
decimal minimum = Decimal.MaxValue;
@@ -344,43 +304,66 @@ namespace MathNet.Numerics.Statistics
{
if (xi.HasValue)
{
- decimal diff = xi.Value - mean;
- decimal tmp = diff * diff;
- correction += diff;
- variance += tmp;
- tmp *= diff;
- skewness += tmp;
- tmp *= diff;
- kurtosis += tmp;
+ decimal delta = xi.Value - mean;
+ decimal scaleDelta = delta / ++n;
+ decimal scaleDeltaSQR = scaleDelta * scaleDelta;
+ decimal tmpDelta = delta * (n - 1);
+
+ mean += scaleDelta;
+
+ kurtosis += tmpDelta * scaleDelta * scaleDeltaSQR * (n * n - 3 * n + 3)
+ + 6 * scaleDeltaSQR * variance - 4 * scaleDelta * skewness;
+
+ skewness += tmpDelta * scaleDeltaSQR * (n - 2) - 3 * scaleDelta * variance;
+ variance += tmpDelta * scaleDelta;
if (minimum > xi) { minimum = xi.Value; }
if (maximum < xi) { maximum = xi.Value; }
- n++;
}
}
+ SetStatistics((double)mean, (double)variance, (double)skewness, (double)kurtosis, (double)minimum, (double)maximum, n);
+ }
+
+ ///
+ /// Internal use. Method use for setting the statistics.
+ ///
+ /// For setting Mean.
+ /// For setting Variance.
+ /// For setting Skewness.
+ /// For setting Kurtosis.
+ /// For setting Minimum.
+ /// For setting Maximum.
+ /// For setting Count.
+ private void SetStatistics(double mean, double variance, double skewness, double kurtosis, double minimum, double maximum, int n)
+ {
+ Mean = mean;
Count = n;
if (n > 0)
{
- Minimum = (double) minimum;
- Maximum = (double) maximum;
- Variance = (double)(variance - (correction * correction / n)) / (n - 1);
- StandardDeviation = Math.Sqrt(Variance);
+ Minimum = minimum;
+ Maximum = maximum;
+ if (n > 1)
+ {
+ Variance = variance / (n - 1);
+ StandardDeviation = Math.Sqrt(Variance);
+ }
if (Variance != 0)
{
if (n > 2)
{
- Skewness = (double)n / ((n - 1) * (n - 2)) * ((double)skewness / (Variance * StandardDeviation));
+ Skewness = (double)n / ((n - 1) * (n - 2)) * (skewness / (Variance * StandardDeviation));
}
if (n > 3)
{
Kurtosis = (((double)n * (n + 1))
/ ((n - 1) * (n - 2) * (n - 3))
- * ((double)kurtosis / (Variance * Variance)))
+ * (kurtosis / (Variance * Variance)))
- ((3.0 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3)));
}
}
}
}
}
+
}
diff --git a/src/Numerics/Statistics/Statistics.cs b/src/Numerics/Statistics/Statistics.cs
index a95c2732..bba51a97 100644
--- a/src/Numerics/Statistics/Statistics.cs
+++ b/src/Numerics/Statistics/Statistics.cs
@@ -32,6 +32,7 @@ namespace MathNet.Numerics.Statistics
{
using System;
using System.Collections.Generic;
+ using System.Linq;
using Properties;
///
@@ -455,7 +456,7 @@ namespace MathNet.Numerics.Statistics
if (dataArray.Count % 2 == 0)
{
double lower = OrderSelect(dataArray, 0, dataArray.Count - 1, index - 1);
- double upper = OrderSelect(dataArray, 0, dataArray.Count - 1, index);
+ double upper = dataArray.Skip(index - 1).Minimum();
return (lower + upper) / 2.0;
}
@@ -543,12 +544,23 @@ namespace MathNet.Numerics.Statistics
return samples[left];
}
- // The pivot point.
+
+ // The pivot point. Choose median of left, right and center
+ //to be the pivot and arrange so that
+ //samples[left]<=samples[right]<=samples[center]
+ int center = (left + right) / 2;
+ if (samples[center] < samples[left])
+ Sorting.Swap(samples, left, center);
+ if (samples[center] < samples[right])
+ Sorting.Swap(samples, right, center);
+ if (samples[right] < samples[left])
+ Sorting.Swap(samples, right, left);
+
double pivot = samples[right];
// The partioning code.
- int i = left - 1;
- for (int j = left; j <= right - 1; j++)
+ int i = left;
+ for (int j = left+1; j <= right - 1; j++)
{
if (samples[j] <= pivot)
{