Browse Source

Merge pull request #309 from jneedhamspkt/smallValueHistogram

Small value histogram
pull/312/head
Christoph Ruegg 11 years ago
parent
commit
7d039ddf03
  1. 61
      src/Numerics/Statistics/Histogram.cs
  2. 71
      src/UnitTests/StatisticsTests/HistogramTests.cs

61
src/Numerics/Statistics/Histogram.cs

@ -60,7 +60,7 @@ namespace MathNet.Numerics.Statistics
/// <returns>-1 when the point is less than this bucket, 0 when it is in this bucket and 1 otherwise.</returns>
public int Compare(Bucket bkt1, Bucket bkt2)
{
return bkt2.Width == 0.0
return bkt2.IsSinglePoint
? -bkt1.Contains(bkt2.UpperBound)
: -bkt2.Contains(bkt1.UpperBound);
}
@ -81,6 +81,9 @@ namespace MathNet.Numerics.Statistics
/// <summary>
/// The number of datapoints in the bucket.
/// </summary>
/// <remarks>
/// Value may be NaN if this was constructed as a <see cref="PointComparer"/> argument.
/// </remarks>
public double Count { get; set; }
/// <summary>
@ -103,6 +106,18 @@ namespace MathNet.Numerics.Statistics
Count = count;
}
/// <summary>
/// Constructs a Bucket that can be used as an argument for <see cref="PointComparer"/>
/// when performing a Binary search.
/// </summary>
/// <param name="value">Value to look for</param>
public Bucket(double targetValue)
{
LowerBound = targetValue;
UpperBound = targetValue;
Count = double.NaN;
}
/// <summary>
/// Creates a copy of the Bucket with the lowerbound, upperbound and counts exactly equal.
/// </summary>
@ -120,6 +135,15 @@ namespace MathNet.Numerics.Statistics
get { return UpperBound - LowerBound; }
}
/// <summary>
/// True if this is a single point argument for <see cref="PointComparer"/>
/// when performing a Binary search.
/// </summary>
private bool IsSinglePoint
{
get { return double.IsNaN(Count); }
}
/// <summary>
/// Default comparer.
/// </summary>
@ -132,8 +156,10 @@ namespace MathNet.Numerics.Statistics
/// This method check whether a point is contained within this bucket.
/// </summary>
/// <param name="x">The point to check.</param>
/// <returns>0 if the point falls within the bucket boundaries; -1 if the point is
/// smaller than the bucket, +1 if the point is larger than the bucket.</returns>
/// <returns>
/// 0 if the point falls within the bucket boundaries;
/// -1 if the point is smaller than the bucket,
/// +1 if the point is larger than the bucket.</returns>
public int Contains(double x)
{
if (LowerBound < x)
@ -152,6 +178,11 @@ namespace MathNet.Numerics.Statistics
/// <summary>
/// Comparison of two disjoint buckets. The buckets cannot be overlapping.
/// </summary>
/// <returns>
/// 0 if <c>UpperBound</c> and <c>LowerBound</c> are bit-for-bit equal
/// 1 if This bucket is lower that the compared bucket
/// -1 otherwise
/// </returns>
public int CompareTo(Bucket bucket)
{
if (UpperBound > bucket.LowerBound && LowerBound < bucket.LowerBound)
@ -159,8 +190,8 @@ namespace MathNet.Numerics.Statistics
throw new ArgumentException(Resources.PartialOrderException);
}
if (UpperBound.AlmostEqual(bucket.UpperBound)
&& LowerBound.AlmostEqual(bucket.LowerBound))
if (UpperBound.Equals(bucket.UpperBound)
&& LowerBound.Equals(bucket.LowerBound))
{
return 0;
}
@ -174,9 +205,12 @@ namespace MathNet.Numerics.Statistics
}
/// <summary>
/// Checks whether two Buckets are equal; this method tolerates a difference in lowerbound, upperbound
/// and count given by <seealso cref="Precision.AlmostEqual(double,double)"/>.
/// Checks whether two Buckets are equal.
/// </summary>
/// <remarks>
/// <c>UpperBound</c> and <c>LowerBound</c> are compared bit-for-bit, but This method tolerates a
/// difference in <c>Count</c> given by <seealso cref="Precision.AlmostEqual(double,double)"/>.
/// </remarks>
public override bool Equals(object obj)
{
if (!(obj is Bucket))
@ -185,8 +219,8 @@ namespace MathNet.Numerics.Statistics
}
var bucket = (Bucket) obj;
return LowerBound.AlmostEqual(bucket.LowerBound)
&& UpperBound.AlmostEqual(bucket.UpperBound)
return LowerBound.Equals(bucket.LowerBound)
&& UpperBound.Equals(bucket.UpperBound)
&& Count.AlmostEqual(bucket.Count);
}
@ -257,10 +291,13 @@ namespace MathNet.Numerics.Statistics
// Add buckets for each bin; the smallest bucket's lowerbound must be slightly smaller
// than the minimal element.
AddBucket(new Bucket(lower.Decrement(), lower + width));
double fNextLowerBound = lower + width;
AddBucket(new Bucket(lower.Decrement(), fNextLowerBound));
for (int n = 1; n < nbuckets; n++)
{
AddBucket(new Bucket(lower + n * width, lower + (n + 1) * width));
AddBucket(new Bucket(
fNextLowerBound,
fNextLowerBound = (lower + (n + 1) * width)));
}
AddData(data);
@ -380,7 +417,7 @@ namespace MathNet.Numerics.Statistics
LazySort();
// Binary search for the bucket index.
int index = _buckets.BinarySearch(new Bucket(v, v), Bucket.DefaultPointComparer);
int index = _buckets.BinarySearch(new Bucket(v), Bucket.DefaultPointComparer);
if (index < 0)
{

71
src/UnitTests/StatisticsTests/HistogramTests.cs

@ -28,6 +28,8 @@
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
using System.Linq;
namespace MathNet.Numerics.UnitTests.StatisticsTests
{
using System;
@ -46,6 +48,20 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
/// </summary>
readonly double[] _smallDataset = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5};
/// <summary>
/// Datatset array with small absolute values
/// </summary>
/// <remarks>
/// These values are chosen to precisely match the upper bounds of 9 buckets,
/// from 0.5e-22 to 9.5E-22
/// </remarks>
readonly double[] _smallValueDataset =
{
0.5e-22, 1.5E-22, 2.5E-22, 3.4999999999999996E-22, 4.4999999999999989E-22,
5.4999999999999983E-22, 6.4999999999999986E-22, 7.4999999999999988E-22,
8.4999999999999982E-22, 9.5E-22
};
/// <summary>
/// Can create empty bucket.
/// </summary>
@ -371,5 +387,60 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests
Assert.AreEqual(0.0, hist.LowerBound);
Assert.AreEqual(10.0, hist.UpperBound);
}
/// <summary>
/// Dataset of small values histogram without bounds.
/// </summary>
[Test]
public void SmallValuesHistogramWithoutBounds()
{
var hist = new Histogram(_smallValueDataset, 9);
Assert.AreEqual(9, hist.BucketCount);
for (var i = 1; i < 9; i++)
{
Assert.AreEqual(1.0, hist[i].Count);
}
Assert.AreEqual(2.0, hist[0].Count);
Assert.AreEqual(0.5e-22.Decrement(), hist.LowerBound);
Assert.AreEqual(9.5e-22, hist.UpperBound);
}
/// <summary>
/// Dataset of small values histogram with bounds.
/// </summary>
[Test]
public void SmallValuesHistogramWithBounds()
{
var hist = new Histogram(_smallValueDataset, 10, 0.0, 10e-22);
Assert.AreEqual(10, hist.BucketCount);
for (var i = 0; i < 10; i++)
{
Assert.AreEqual(1.0, hist[i].Count);
}
Assert.AreEqual(0.0, hist.LowerBound);
Assert.AreEqual(10.0e-22, hist.UpperBound);
}
/// <summary>
/// Attempt to construct a dataset with small valued buckets
/// </summary>
[Test]
public void SmallValuesManyBucketsHistogramWithBounds()
{
var hist = new Histogram(_smallValueDataset, 100, 0.0, 10e-22);
Assert.AreEqual(100, hist.BucketCount);
Assert.AreEqual(0.0, hist.LowerBound);
Assert.AreEqual(10.0e-22, hist.UpperBound);
}
}
}

Loading…
Cancel
Save