diff --git a/src/UnitTests/DistributionTests/CommonDistributionTests.cs b/src/UnitTests/DistributionTests/CommonDistributionTests.cs index 42021271..efa62174 100644 --- a/src/UnitTests/DistributionTests/CommonDistributionTests.cs +++ b/src/UnitTests/DistributionTests/CommonDistributionTests.cs @@ -59,11 +59,11 @@ namespace MathNet.Numerics.UnitTests.DistributionTests new Categorical(new[] { 0.7, 0.3 }), //new ConwayMaxwellPoisson(0.2, 0.4), new DiscreteUniform(1, 10), - //new Geometric(0.1), + //new Geometric(0.2), new Hypergeometric(20, 3, 5), //new NegativeBinomial(4, 0.6), //new Poisson(0.4), - new Zipf(0.6, 10), + new Zipf(3.0, 10), }; readonly List _continuousDistributions = @@ -134,55 +134,55 @@ namespace MathNet.Numerics.UnitTests.DistributionTests } } - /// - /// Test the method which samples only one variable at a time. - /// [Test] - public void SampleFollowsCorrectDistribution() + public void DiscreteSampleIsDistributedCorrectly() { - foreach (var dd in _discreteDistributions) + foreach (var dist in _discreteDistributions) { - dd.RandomSource = new SystemRandomSource(1, false); - var samples = new double[NumberOfTestSamples]; + dist.RandomSource = new SystemRandomSource(1, false); + var samples = new int[NumberOfTestSamples]; for (var i = 0; i < NumberOfTestSamples; i++) { - samples[i] = dd.Sample(); + samples[i] = dist.Sample(); } + DiscreteVapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, dist); + } + } - VapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, dd); + [Test] + public void DiscreteSampleSequenceIsDistributedCorrectly() + { + foreach (var dist in _discreteDistributions) + { + dist.RandomSource = new SystemRandomSource(1, false); + var samples = dist.Samples().Take(NumberOfTestSamples).ToArray(); + DiscreteVapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, dist); } + } - foreach (var cd in _continuousDistributions) + [Test] + public void ContinuousSampleIsDistributedCorrectly() + { + foreach (var dist in _continuousDistributions) { - cd.RandomSource = new SystemRandomSource(1, false); + dist.RandomSource = new SystemRandomSource(1, false); var samples = new double[NumberOfTestSamples]; for (var i = 0; i < NumberOfTestSamples; i++) { - samples[i] = cd.Sample(); + samples[i] = dist.Sample(); } - - VapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, cd); + ContinuousVapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, dist); } } - /// - /// Test the method which samples a sequence of variables. - /// [Test] - public void SamplesFollowsCorrectDistribution() + public void ContinuousSampleSequenceIsDistributedCorrectly() { - foreach (var dd in _discreteDistributions) + foreach (var dist in _continuousDistributions) { - dd.RandomSource = new SystemRandomSource(1, false); - var samples = dd.Samples().Select(x => (double)x).Take(NumberOfTestSamples).ToArray(); - VapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, dd); - } - - foreach (var cd in _continuousDistributions) - { - cd.RandomSource = new SystemRandomSource(1, false); - var samples = cd.Samples().Take(NumberOfTestSamples).ToArray(); - VapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, cd); + dist.RandomSource = new SystemRandomSource(1, false); + var samples = dist.Samples().Take(NumberOfTestSamples).ToArray(); + ContinuousVapnikChervonenkisTest(ErrorTolerance, ErrorProbability, samples, dist); } } @@ -193,10 +193,10 @@ namespace MathNet.Numerics.UnitTests.DistributionTests /// The error probability we are willing to tolerate. /// The samples to use for testing. /// The distribution we are testing. - public static void VapnikChervonenkisTest(double epsilon, double delta, double[] s, IUnivariateDistribution dist) + public static void ContinuousVapnikChervonenkisTest(double epsilon, double delta, double[] s, IContinuousDistribution dist) { // Using VC-dimension, we can bound the probability of making an error when estimating empirical probability - // distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". + // distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". // http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false . // For intervals on the real line the VC-dimension is 2. Assert.Greater(s.Length, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon)); @@ -205,7 +205,39 @@ namespace MathNet.Numerics.UnitTests.DistributionTests for (var i = 0; i < NumberOfHistogramBuckets; i++) { var p = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound); - var pe = histogram[i].Count / s.Length; + var pe = histogram[i].Count/(double)s.Length; + Assert.Less(Math.Abs(p - pe), epsilon, dist.ToString()); + } + } + + /// + /// Vapnik Chervonenkis test. + /// + /// The error we are willing to tolerate. + /// The error probability we are willing to tolerate. + /// The samples to use for testing. + /// The distribution we are testing. + public static void DiscreteVapnikChervonenkisTest(double epsilon, double delta, int[] s, IDiscreteDistribution dist) + { + // Using VC-dimension, we can bound the probability of making an error when estimating empirical probability + // distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". + // http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false . + // For intervals on the real line the VC-dimension is 2. + Assert.Greater(s.Length, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon)); + + var min = s.Min(); + var max = s.Max(); + + var histogram = new int[max - min + 1]; + for (int i = 0; i < s.Length; i++) + { + histogram[s[i] - min]++; + } + + for (int i = 0; i < histogram.Length; i++) + { + var p = dist.CumulativeDistribution(i + min) - dist.CumulativeDistribution(i + min - 1.0); + var pe = histogram[i]/(double)s.Length; Assert.Less(Math.Abs(p - pe), epsilon, dist.ToString()); } } diff --git a/src/UnitTests/DistributionTests/Multivariate/NormalGammaTests.cs b/src/UnitTests/DistributionTests/Multivariate/NormalGammaTests.cs index 6e7462c8..c7503553 100644 --- a/src/UnitTests/DistributionTests/Multivariate/NormalGammaTests.cs +++ b/src/UnitTests/DistributionTests/Multivariate/NormalGammaTests.cs @@ -349,14 +349,14 @@ namespace MathNet.Numerics.UnitTests.DistributionTests.Multivariate var precMarginal = cd.PrecisionMarginal(); // Check the precision distribution. - CommonDistributionTests.VapnikChervonenkisTest( + CommonDistributionTests.ContinuousVapnikChervonenkisTest( CommonDistributionTests.ErrorTolerance, CommonDistributionTests.ErrorProbability, precs, precMarginal); // Check the mean distribution. - CommonDistributionTests.VapnikChervonenkisTest( + CommonDistributionTests.ContinuousVapnikChervonenkisTest( CommonDistributionTests.ErrorTolerance, CommonDistributionTests.ErrorProbability, means, @@ -381,14 +381,14 @@ namespace MathNet.Numerics.UnitTests.DistributionTests.Multivariate var precMarginal = cd.PrecisionMarginal(); // Check the precision distribution. - CommonDistributionTests.VapnikChervonenkisTest( + CommonDistributionTests.ContinuousVapnikChervonenkisTest( CommonDistributionTests.ErrorTolerance, CommonDistributionTests.ErrorProbability, precs, precMarginal); // Check the mean distribution. - CommonDistributionTests.VapnikChervonenkisTest( + CommonDistributionTests.ContinuousVapnikChervonenkisTest( CommonDistributionTests.ErrorTolerance, CommonDistributionTests.ErrorProbability, means,