Browse Source

fixed order statistics/median bug #5667 that caused a potential stackoverflow on platforms where tail-recursion is not optimized. Fixed by turning the recursion into a loop manually. Unit tests.

pull/36/head
Christoph Ruegg 15 years ago
parent
commit
2b84d8dd5c
  1. 51002
      data/Codeplex-5667.csv
  2. 64
      src/Numerics/Statistics/Statistics.cs
  3. 57
      src/UnitTests/StatisticsTests/StatisticsTests.cs
  4. 4
      src/UnitTests/UnitTests.csproj

51002
data/Codeplex-5667.csv

File diff suppressed because it is too large

64
src/Numerics/Statistics/Statistics.cs

@ -525,44 +525,50 @@ namespace MathNet.Numerics.Statistics
/// <returns>The <paramref name="order"/> order statistic.</returns>
private static double OrderSelect(IList<double> samples, int left, int right, int order)
{
System.Diagnostics.Debug.Assert(order > 0, "Order must always be positive.");
System.Diagnostics.Debug.Assert(left >= 0 && left <= right, "Left side must always be positive and smaller than right side.");
System.Diagnostics.Debug.Assert(right < samples.Count, "Right side must always be smaller than number of elements in list.");
System.Diagnostics.Debug.Assert(right - left + 1 >= order, "Make sure there are at least order items in the segment [left, right].");
if (left == right)
while (true)
{
return samples[left];
}
System.Diagnostics.Debug.Assert(order > 0, "Order must always be positive.");
System.Diagnostics.Debug.Assert(left >= 0 && left <= right, "Left side must always be positive and smaller than right side.");
System.Diagnostics.Debug.Assert(right < samples.Count, "Right side must always be smaller than number of elements in list.");
System.Diagnostics.Debug.Assert(right - left + 1 >= order, "Make sure there are at least order items in the segment [left, right].");
// The pivot point.
double pivot = samples[right];
if (left == right)
{
return samples[left];
}
// The partioning code.
int i = left - 1;
for (int j = left; j <= right - 1; j++)
{
if (samples[j] <= pivot)
// The pivot point.
double pivot = samples[right];
// The partioning code.
int i = left - 1;
for (int j = left; j <= right - 1; j++)
{
i++;
Sorting.Swap(samples, i, j);
if (samples[j] <= pivot)
{
i++;
Sorting.Swap(samples, i, j);
}
}
}
Sorting.Swap(samples, i + 1, right);
Sorting.Swap(samples, i + 1, right);
// Recursive order finding algorithm.
if (order == (i - left) + 2)
{
return pivot;
}
// Recursive order finding algorithm.
if (order == (i - left) + 2)
{
return pivot;
}
if (order < (i - left) + 2)
{
return OrderSelect(samples, left, i, order);
if (order < (i - left) + 2)
{
right = i;
}
else
{
order = order - i + left - 2;
left = i + 2;
}
}
return OrderSelect(samples, i + 2, right, order - i + left - 2);
}
}
}

57
src/UnitTests/StatisticsTests/StatisticsTests.cs

@ -28,6 +28,8 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using NUnit.Framework;
using Statistics;
@ -134,5 +136,60 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
double[] data = null;
Assert.Throws<ArgumentNullException>(() => data.StandardDeviation());
}
/// <summary>
/// Validate Min/Max on a short sequence.
/// </summary>
[Test]
public void ShortMinMax()
{
var samples = new[] { -1.0, 5, 0, -3, 10, -0.5, 4 };
Assert.That(samples.Minimum(), Is.EqualTo(-3), "Min");
Assert.That(samples.Maximum(), Is.EqualTo(10), "Max");
}
/// <summary>
/// Validate Order Statistics & Median on a short sequence.
/// </summary>
[Test]
public void ShortOrderMedian()
{
// -3 -1 -0.5 0 1 4 5 6 10
var samples = new[] { -1, 5, 0, -3, 10, -0.5, 4, 1, 6 };
Assert.That(samples.Median(), Is.EqualTo(1), "Median");
Assert.That(Statistics.OrderStatistic(samples, 1), Is.EqualTo(-3), "Order-1");
Assert.That(Statistics.OrderStatistic(samples, 3), Is.EqualTo(-0.5), "Order-3");
Assert.That(Statistics.OrderStatistic(samples, 7), Is.EqualTo(5), "Order-7");
Assert.That(Statistics.OrderStatistic(samples, 9), Is.EqualTo(10), "Order-9");
}
/// <summary>
/// Validate Median/Variance/StdDev on a longer fixed-random sequence of a,
/// large mean but only a very small variance, verifying the numerical stability.
/// Naive summation algorithms generally fail this test.
/// </summary>
[Test]
public void StabilityMeanVariance()
{
// Test around 10^9, potential stability issues
var gaussian = new Distributions.Normal(1e+9, 2)
{
RandomSource = new Numerics.Random.MersenneTwister(100)
};
AssertHelpers.AlmostEqual(1e+9, gaussian.Samples().Take(10000).Mean(), 11);
AssertHelpers.AlmostEqual(4d, gaussian.Samples().Take(10000).Variance(), 1);
AssertHelpers.AlmostEqual(2d, gaussian.Samples().Take(10000).StandardDeviation(), 2);
}
/// <summary>
/// http://mathnetnumerics.codeplex.com/workitem/5667
/// </summary>
[Test]
public void Median_CodeplexIssue5667()
{
var seq = File.ReadLines("./data/Codeplex-5667.csv").Select(s => double.Parse(s));
Assert.AreEqual(1.0, seq.Median());
}
}
}

4
src/UnitTests/UnitTests.csproj

@ -733,6 +733,10 @@
</ProjectReference>
</ItemGroup>
<ItemGroup>
<None Include="..\..\data\Codeplex-5667.csv">
<Link>data\Codeplex-5667.csv</Link>
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="..\..\data\Matlab\A.mat">
<Link>data\Matlab\A.mat</Link>
<CopyToOutputDirectory>Always</CopyToOutputDirectory>

Loading…
Cancel
Save