From 62300406df09839d3abba75bf13f3edbd28d1eeb Mon Sep 17 00:00:00 2001 From: Justin Needham Date: Wed, 6 May 2015 14:08:42 -0400 Subject: [PATCH 1/3] Added Unit tests that fail current bucket sorting algorithm --- .../StatisticsTests/HistogramTests.cs | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/UnitTests/StatisticsTests/HistogramTests.cs b/src/UnitTests/StatisticsTests/HistogramTests.cs index 3420d3ee..8d1e353f 100644 --- a/src/UnitTests/StatisticsTests/HistogramTests.cs +++ b/src/UnitTests/StatisticsTests/HistogramTests.cs @@ -28,6 +28,8 @@ // OTHER DEALINGS IN THE SOFTWARE. // +using System.Linq; + namespace MathNet.Numerics.UnitTests.StatisticsTests { using System; @@ -46,6 +48,16 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests /// readonly double[] _smallDataset = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5}; + /// + /// Datatset array with small absolute values + /// + /// + /// These values are chosen to precisely match the upper bounds of 9 buckets, + /// from 0.5e-22 to 9.5E-22 + /// + readonly double[] _smallValueDataset = { 0.5e-22, 1.5E-22, 2.5E-22, 3.4999999999999996E-22, 4.4999999999999989E-22, + 5.4999999999999983E-22, 6.4999999999999986E-22, 7.4999999999999988E-22, + 8.4999999999999982E-22, 9.5E-22}; /// /// Can create empty bucket. /// @@ -371,5 +383,60 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests Assert.AreEqual(0.0, hist.LowerBound); Assert.AreEqual(10.0, hist.UpperBound); } + + + + /// + /// Dataset of small values histogram without bounds. + /// + [Test] + public void SmallValuesHistogramWithoutBounds() + { + var hist = new Histogram(_smallValueDataset, 9); + + Assert.AreEqual(9, hist.BucketCount); + + for (var i = 1; i < 9; i++) + { + Assert.AreEqual(1.0, hist[i].Count); + } + + Assert.AreEqual(2.0, hist[0].Count); + + Assert.AreEqual(0.5e-22.Decrement(), hist.LowerBound); + Assert.AreEqual(9.5e-22, hist.UpperBound); + } + + /// + /// Dataset of small values histogram with bounds. + /// + [Test] + public void SmallValuesHistogramWithBounds() + { + var hist = new Histogram(_smallValueDataset, 10, 0.0, 10e-22); + + Assert.AreEqual(10, hist.BucketCount); + + for (var i = 0; i < 10; i++) + { + Assert.AreEqual(1.0, hist[i].Count); + } + + Assert.AreEqual(0.0, hist.LowerBound); + Assert.AreEqual(10.0e-22, hist.UpperBound); + } + + /// + /// Attempt to construct a dataset with small valued buckets + /// + [Test] + public void SmallValuesManyBucketsHistogramWithBounds() + { + var hist = new Histogram(_smallValueDataset, 100, 0.0, 10e-22); + + Assert.AreEqual(100, hist.BucketCount); + Assert.AreEqual(0.0, hist.LowerBound); + Assert.AreEqual(10.0e-22, hist.UpperBound); + } } } From cbc6c2cdb5f5615e4cb53b09f5a69a31cff68436 Mon Sep 17 00:00:00 2001 From: Justin Needham Date: Wed, 6 May 2015 16:40:57 -0400 Subject: [PATCH 2/3] Authored Proposed fix for handling small-width buckets --- src/Numerics/Statistics/Histogram.cs | 61 ++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/src/Numerics/Statistics/Histogram.cs b/src/Numerics/Statistics/Histogram.cs index 5d9de518..1772e2b2 100644 --- a/src/Numerics/Statistics/Histogram.cs +++ b/src/Numerics/Statistics/Histogram.cs @@ -60,7 +60,7 @@ namespace MathNet.Numerics.Statistics /// -1 when the point is less than this bucket, 0 when it is in this bucket and 1 otherwise. public int Compare(Bucket bkt1, Bucket bkt2) { - return bkt2.Width == 0.0 + return bkt2.IsSinglePoint ? -bkt1.Contains(bkt2.UpperBound) : -bkt2.Contains(bkt1.UpperBound); } @@ -81,6 +81,9 @@ namespace MathNet.Numerics.Statistics /// /// The number of datapoints in the bucket. /// + /// + /// Value may be NaN if this was constructed as a argument. + /// public double Count { get; set; } /// @@ -103,6 +106,18 @@ namespace MathNet.Numerics.Statistics Count = count; } + /// + /// Constructs a Bucket that can be used as an argument for + /// when performing a Binary search. + /// + /// Value to look for + public Bucket(double targetValue) + { + LowerBound = targetValue; + UpperBound = targetValue; + Count = double.NaN; + } + /// /// Creates a copy of the Bucket with the lowerbound, upperbound and counts exactly equal. /// @@ -120,6 +135,15 @@ namespace MathNet.Numerics.Statistics get { return UpperBound - LowerBound; } } + /// + /// True if this is a single point argument for + /// when performing a Binary search. + /// + private bool IsSinglePoint + { + get { return double.IsNaN(Count); } + } + /// /// Default comparer. /// @@ -132,8 +156,10 @@ namespace MathNet.Numerics.Statistics /// This method check whether a point is contained within this bucket. /// /// The point to check. - /// 0 if the point falls within the bucket boundaries; -1 if the point is - /// smaller than the bucket, +1 if the point is larger than the bucket. + /// + /// 0 if the point falls within the bucket boundaries; + /// -1 if the point is smaller than the bucket, + /// +1 if the point is larger than the bucket. public int Contains(double x) { if (LowerBound < x) @@ -152,6 +178,11 @@ namespace MathNet.Numerics.Statistics /// /// Comparison of two disjoint buckets. The buckets cannot be overlapping. /// + /// + /// 0 if UpperBound and LowerBound are bit-for-bit equal + /// 1 if This bucket is lower that the compared bucket + /// -1 otherwise + /// public int CompareTo(Bucket bucket) { if (UpperBound > bucket.LowerBound && LowerBound < bucket.LowerBound) @@ -159,8 +190,8 @@ namespace MathNet.Numerics.Statistics throw new ArgumentException(Resources.PartialOrderException); } - if (UpperBound.AlmostEqual(bucket.UpperBound) - && LowerBound.AlmostEqual(bucket.LowerBound)) + if (UpperBound.Equals(bucket.UpperBound) + && LowerBound.Equals(bucket.LowerBound)) { return 0; } @@ -174,9 +205,12 @@ namespace MathNet.Numerics.Statistics } /// - /// Checks whether two Buckets are equal; this method tolerates a difference in lowerbound, upperbound - /// and count given by . + /// Checks whether two Buckets are equal. /// + /// + /// UpperBound and LowerBound are compared bit-for-bit, but This method tolerates a + /// difference in Count given by . + /// public override bool Equals(object obj) { if (!(obj is Bucket)) @@ -185,8 +219,8 @@ namespace MathNet.Numerics.Statistics } var bucket = (Bucket) obj; - return LowerBound.AlmostEqual(bucket.LowerBound) - && UpperBound.AlmostEqual(bucket.UpperBound) + return LowerBound.Equals(bucket.LowerBound) + && UpperBound.Equals(bucket.UpperBound) && Count.AlmostEqual(bucket.Count); } @@ -257,10 +291,13 @@ namespace MathNet.Numerics.Statistics // Add buckets for each bin; the smallest bucket's lowerbound must be slightly smaller // than the minimal element. - AddBucket(new Bucket(lower.Decrement(), lower + width)); + double fNextLowerBound = lower + width; + AddBucket(new Bucket(lower.Decrement(), fNextLowerBound)); for (int n = 1; n < nbuckets; n++) { - AddBucket(new Bucket(lower + n * width, lower + (n + 1) * width)); + AddBucket(new Bucket( + fNextLowerBound, + fNextLowerBound = (lower + (n + 1) * width))); } AddData(data); @@ -380,7 +417,7 @@ namespace MathNet.Numerics.Statistics LazySort(); // Binary search for the bucket index. - int index = _buckets.BinarySearch(new Bucket(v, v), Bucket.DefaultPointComparer); + int index = _buckets.BinarySearch(new Bucket(v), Bucket.DefaultPointComparer); if (index < 0) { From 10abeebde6f784285047c2e668ae63428e9f532a Mon Sep 17 00:00:00 2001 From: Justin Needham Date: Wed, 6 May 2015 17:35:16 -0400 Subject: [PATCH 3/3] Better code formatting for test vector --- src/UnitTests/StatisticsTests/HistogramTests.cs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/UnitTests/StatisticsTests/HistogramTests.cs b/src/UnitTests/StatisticsTests/HistogramTests.cs index 8d1e353f..c5f79e5b 100644 --- a/src/UnitTests/StatisticsTests/HistogramTests.cs +++ b/src/UnitTests/StatisticsTests/HistogramTests.cs @@ -53,11 +53,15 @@ namespace MathNet.Numerics.UnitTests.StatisticsTests /// /// /// These values are chosen to precisely match the upper bounds of 9 buckets, - /// from 0.5e-22 to 9.5E-22 - /// - readonly double[] _smallValueDataset = { 0.5e-22, 1.5E-22, 2.5E-22, 3.4999999999999996E-22, 4.4999999999999989E-22, - 5.4999999999999983E-22, 6.4999999999999986E-22, 7.4999999999999988E-22, - 8.4999999999999982E-22, 9.5E-22}; + /// from 0.5e-22 to 9.5E-22 + /// + readonly double[] _smallValueDataset = + { + 0.5e-22, 1.5E-22, 2.5E-22, 3.4999999999999996E-22, 4.4999999999999989E-22, + 5.4999999999999983E-22, 6.4999999999999986E-22, 7.4999999999999988E-22, + 8.4999999999999982E-22, 9.5E-22 + }; + /// /// Can create empty bucket. ///