From d285d2eb2626f49f8f9d0af6c62ca982f821a3ca Mon Sep 17 00:00:00 2001 From: Christoph Ruegg Date: Fri, 21 Mar 2014 20:05:45 +0100 Subject: [PATCH] Distributions: static PMF, PMFLn, CDF functions for discrete distributions --- src/FSharp/Distributions.fs | 8 +- src/Numerics/Distributions/Bernoulli.cs | 70 ++++++--- src/Numerics/Distributions/Binomial.cs | 69 +++++--- src/Numerics/Distributions/Categorical.cs | 147 ++++++++++++++---- .../Distributions/ConwayMaxwellPoisson.cs | 55 ++++++- src/Numerics/Distributions/DiscreteUniform.cs | 68 +++++--- src/Numerics/Distributions/Geometric.cs | 51 ++++-- src/Numerics/Distributions/Hypergeometric.cs | 63 +++++++- .../Distributions/NegativeBinomial.cs | 61 ++++++-- src/Numerics/Distributions/Poisson.cs | 37 +++++ src/Numerics/Distributions/Zipf.cs | 47 +++++- .../Discrete/CategoricalTests.cs | 4 +- .../Discrete/HypergeometricTests.cs | 4 +- 13 files changed, 544 insertions(+), 140 deletions(-) diff --git a/src/FSharp/Distributions.fs b/src/FSharp/Distributions.fs index 53aa354f..ac079b1f 100644 --- a/src/FSharp/Distributions.fs +++ b/src/FSharp/Distributions.fs @@ -4,7 +4,7 @@ // http://github.com/mathnet/mathnet-numerics // http://mathnetnumerics.codeplex.com // -// Copyright (c) 2009-2012 Math.NET +// Copyright (c) 2009-2014 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation @@ -56,8 +56,8 @@ module Sample = let binomialSeq p n rng = Binomial.Samples(rng, p, n) /// Categorical with an array of nonnegative ratios defining the relative probability mass (unnormalized). - let categorical probabilityMass rng = Categorical.SampleWithProbabilityMass(rng, probabilityMass) - let categoricalSeq probabilityMass rng = Categorical.SamplesWithProbabilityMass(rng, probabilityMass) + let categorical probabilityMass rng = Categorical.Sample(rng, probabilityMass) + let categoricalSeq probabilityMass rng = Categorical.Samples(rng, probabilityMass) /// Cauchy with location (x0) and scale (γ). let cauchy location scale rng = Cauchy.Sample(rng, location, scale) @@ -119,7 +119,7 @@ module Sample = let logNormal mu sigma rng = LogNormal.Sample(rng, mu, sigma) let logNormalSeq mu sigma rng = LogNormal.Samples(rng, mu, sigma) - /// Negative-Binomial with number of failures (r) until the experiment stoppedand probability (p) of a trial resulting in success. + /// Negative-Binomial with number of failures (r) until the experiment stopped and probability (p) of a trial resulting in success. let negativeBinomial r p rng = NegativeBinomial.Sample(rng, r, p) let negativeBinomialSeq r p rng = NegativeBinomial.Samples(rng, r, p) diff --git a/src/Numerics/Distributions/Bernoulli.cs b/src/Numerics/Distributions/Bernoulli.cs index bc124510..1a0fa11f 100644 --- a/src/Numerics/Distributions/Bernoulli.cs +++ b/src/Numerics/Distributions/Bernoulli.cs @@ -191,16 +191,8 @@ namespace MathNet.Numerics.Distributions /// the probability mass at location . public double Probability(int k) { - if (k == 0) - { - return 1.0 - _p; - } - - if (k == 1) - { - return _p; - } - + if (k == 0) return 1.0 - _p; + if (k == 1) return _p; return 0.0; } @@ -211,11 +203,7 @@ namespace MathNet.Numerics.Distributions /// the log probability mass at location . public double ProbabilityLn(int k) { - if (k == 0) - { - return Math.Log(1.0 - _p); - } - + if (k == 0) return Math.Log(1.0 - _p); return k == 1 ? Math.Log(_p) : Double.NegativeInfinity; } @@ -226,17 +214,51 @@ namespace MathNet.Numerics.Distributions /// the cumulative distribution at location . public double CumulativeDistribution(double x) { - if (x < 0.0) - { - return 0.0; - } + if (x < 0.0) return 0.0; + if (x >= 1.0) return 1.0; + return 1.0 - _p; + } - if (x < 1.0) - { - return 1.0 - _p; - } + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The probability (p) of generating one. Range: 0 ≤ p ≤ 1. + /// the probability mass at location . + public static double PMF(double p, int k) + { + if (!(p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (k == 0) return 1.0 - p; + if (k == 1) return p; + return 0.0; + } - return 1.0; + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The probability (p) of generating one. Range: 0 ≤ p ≤ 1. + /// the log probability mass at location . + public static double PMFLn(double p, int k) + { + if (!(p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (k == 0) return Math.Log(1.0 - p); + return k == 1 ? Math.Log(p) : Double.NegativeInfinity; + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The probability (p) of generating one. Range: 0 ≤ p ≤ 1. + /// the cumulative distribution at location . + /// + public static double CDF(double p, double x) + { + if (!(p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (x < 0.0) return 0.0; + if (x >= 1.0) return 1.0; + return 1.0 - p; } /// diff --git a/src/Numerics/Distributions/Binomial.cs b/src/Numerics/Distributions/Binomial.cs index a1f3f096..b6d3e75e 100644 --- a/src/Numerics/Distributions/Binomial.cs +++ b/src/Numerics/Distributions/Binomial.cs @@ -228,14 +228,7 @@ namespace MathNet.Numerics.Distributions /// the probability mass at location . public double Probability(int k) { - if (k < 0) return 0.0; - if (k > _trials) return 0.0; - if (_p == 0.0 && k == 0) return 1.0; - if (_p == 0.0) return 0.0; - if (_p == 1.0 && k == _trials) return 1.0; - if (_p == 1.0) return 0.0; - - return SpecialFunctions.Binomial(_trials, k)*Math.Pow(_p, k)*Math.Pow(1.0 - _p, _trials - k); + return PMF(_p, _trials, k); } /// @@ -245,14 +238,7 @@ namespace MathNet.Numerics.Distributions /// the log probability mass at location . public double ProbabilityLn(int k) { - if (k < 0) return Double.NegativeInfinity; - if (k > _trials) return Double.NegativeInfinity; - if (_p == 0.0 && k == 0) return 0.0; - if (_p == 0.0) return Double.NegativeInfinity; - if (_p == 1.0 && k == _trials) return 0.0; - if (_p == 1.0) return Double.NegativeInfinity; - - return SpecialFunctions.BinomialLn(_trials, k) + (k*Math.Log(_p)) + ((_trials - k)*Math.Log(1.0 - _p)); + return PMFLn(_p, _trials, k); } /// @@ -262,11 +248,56 @@ namespace MathNet.Numerics.Distributions /// the cumulative distribution at location . public double CumulativeDistribution(double x) { - if (x < 0.0) return 0.0; - if (x > _trials) return 1.0; + return CDF(_p, _trials, x); + } + + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The success probability (p) in each trial. Range: 0 ≤ p ≤ 1. + /// The number of trials (n). Range: n ≥ 0. + /// the probability mass at location . + public static double PMF(double p, int n, int k) + { + if (!(p >= 0.0 && p <= 1.0 && n >= 0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (k < 0 || k > n) return 0.0; + if (p == 0.0) return k == 0 ? 1.0 : 0.0; + if (p == 1.0) return k == n ? 1.0 : 0.0; + return Math.Exp(SpecialFunctions.BinomialLn(n, k) + (k*Math.Log(p)) + ((n - k)*Math.Log(1.0 - p))); + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The success probability (p) in each trial. Range: 0 ≤ p ≤ 1. + /// The number of trials (n). Range: n ≥ 0. + /// the log probability mass at location . + public static double PMFLn(double p, int n, int k) + { + if (!(p >= 0.0 && p <= 1.0 && n >= 0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (k < 0 || k > n) return Double.NegativeInfinity; + if (p == 0.0) return k == 0 ? 0.0 : Double.NegativeInfinity; + if (p == 1.0) return k == n ? 0.0 : Double.NegativeInfinity; + return SpecialFunctions.BinomialLn(n, k) + (k*Math.Log(p)) + ((n - k)*Math.Log(1.0 - p)); + } + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The success probability (p) in each trial. Range: 0 ≤ p ≤ 1. + /// The number of trials (n). Range: n ≥ 0. + /// the cumulative distribution at location . + /// + public static double CDF(double p, int n, double x) + { + if (!(p >= 0.0 && p <= 1.0 && n >= 0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (x < 0.0) return 0.0; + if (x > n) return 1.0; double k = Math.Floor(x); - return SpecialFunctions.BetaRegularized(_trials - k, k + 1, 1 - _p); + return SpecialFunctions.BetaRegularized(n - k, k + 1, 1 - p); } /// diff --git a/src/Numerics/Distributions/Categorical.cs b/src/Numerics/Distributions/Categorical.cs index 44335c5b..2bbf3f8f 100644 --- a/src/Numerics/Distributions/Categorical.cs +++ b/src/Numerics/Distributions/Categorical.cs @@ -4,7 +4,7 @@ // http://github.com/mathnet/mathnet-numerics // http://mathnetnumerics.codeplex.com // -// Copyright (c) 2009-2013 Math.NET +// Copyright (c) 2009-2014 Math.NET // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation @@ -365,6 +365,91 @@ namespace MathNet.Numerics.Distributions return idx; } + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// An array of nonnegative ratios: this array does not need to be normalized + /// as this is often impossible using floating point arithmetic. + /// the probability mass at location . + public static double PMF(double[] probabilityMass, int k) + { + if (Control.CheckDistributionParameters && !IsValidProbabilityMass(probabilityMass)) + { + throw new ArgumentException(Resources.InvalidDistributionParameters); + } + + if (k < 0) return 0.0; + if (k >= probabilityMass.Length) return 0.0; + + return probabilityMass[k]/probabilityMass.Sum(); + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// An array of nonnegative ratios: this array does not need to be normalized + /// as this is often impossible using floating point arithmetic. + /// the log probability mass at location . + public static double PMFLn(double[] probabilityMass, int k) + { + return Math.Log(PMF(probabilityMass, k)); + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// An array of nonnegative ratios: this array does not need to be normalized + /// as this is often impossible using floating point arithmetic. + /// the cumulative distribution at location . + /// + public static double CDF(double[] probabilityMass, double x) + { + if (Control.CheckDistributionParameters && !IsValidProbabilityMass(probabilityMass)) + { + throw new ArgumentException(Resources.InvalidDistributionParameters); + } + + if (x < 0.0) return 0.0; + if (x >= probabilityMass.Length) return 1.0; + + var cdfUnnormalized = ProbabilityMassToCumulativeDistribution(probabilityMass); + return cdfUnnormalized[(int)Math.Floor(x)]/cdfUnnormalized[cdfUnnormalized.Length - 1]; + } + + /// + /// Computes the inverse of the cumulative distribution function (InvCDF) for the distribution + /// at the given probability. + /// + /// An array of nonnegative ratios: this array does not need to be normalized + /// as this is often impossible using floating point arithmetic. + /// A real number between 0 and 1. + /// An integer between 0 and the size of the categorical (exclusive), that corresponds to the inverse CDF for the given probability. + public static int InvCDF(double[] probabilityMass, double probability) + { + if (Control.CheckDistributionParameters && !IsValidProbabilityMass(probabilityMass)) + { + throw new ArgumentException(Resources.InvalidDistributionParameters); + } + + if (probability < 0.0 || probability > 1.0 || Double.IsNaN(probability)) + { + throw new ArgumentOutOfRangeException("probability"); + } + + var cdfUnnormalized = ProbabilityMassToCumulativeDistribution(probabilityMass); + var denormalizedProbability = probability * cdfUnnormalized[cdfUnnormalized.Length - 1]; + int idx = Array.BinarySearch(cdfUnnormalized, denormalizedProbability); + if (idx < 0) + { + idx = ~idx; + } + + return idx; + } + /// /// Computes the inverse of the cumulative distribution function (InvCDF) for the distribution /// at the given probability. @@ -372,7 +457,7 @@ namespace MathNet.Numerics.Distributions /// An array corresponding to a CDF for a categorical distribution. Not assumed to be normalized. /// A real number between 0 and 1. /// An integer between 0 and the size of the categorical (exclusive), that corresponds to the inverse CDF for the given probability. - public static int InverseCumulativeDistribution(double[] cdfUnnormalized, double probability) + public static int InvCDFWithCumulativeDistribution(double[] cdfUnnormalized, double probability) { if (Control.CheckDistributionParameters && !IsValidCumulativeDistribution(cdfUnnormalized)) { @@ -398,16 +483,16 @@ namespace MathNet.Numerics.Distributions /// Computes the cumulative distribution function. This method performs no parameter checking. /// If the probability mass was normalized, the resulting cumulative distribution is normalized as well (up to numerical errors). /// - /// An array of nonnegative ratios: this array does not need to be normalized + /// An array of nonnegative ratios: this array does not need to be normalized /// as this is often impossible using floating point arithmetic. /// An array representing the unnormalized cumulative distribution function. - internal static double[] ProbabilityMassToCumulativeDistribution(double[] pmfUnnormalized) + internal static double[] ProbabilityMassToCumulativeDistribution(double[] probabilityMass) { - var cdfUnnormalized = new double[pmfUnnormalized.Length]; - cdfUnnormalized[0] = pmfUnnormalized[0]; - for (int i = 1; i < pmfUnnormalized.Length; i++) + var cdfUnnormalized = new double[probabilityMass.Length]; + cdfUnnormalized[0] = probabilityMass[0]; + for (int i = 1; i < probabilityMass.Length; i++) { - cdfUnnormalized[i] = cdfUnnormalized[i - 1] + pmfUnnormalized[i]; + cdfUnnormalized[i] = cdfUnnormalized[i - 1] + probabilityMass[i]; } return cdfUnnormalized; @@ -458,71 +543,71 @@ namespace MathNet.Numerics.Distributions /// Samples one categorical distributed random variable; also known as the Discrete distribution. /// /// The random number generator to use. - /// An array of the cumulative distribution. Not assumed to be normalized. + /// An array of nonnegative ratios. Not assumed to be normalized. /// One random integer between 0 and the size of the categorical (exclusive). - public static int SampleWithCumulativeDistribution(System.Random rnd, double[] cdfUnnormalized) + public static int Sample(System.Random rnd, double[] probabilityMass) { - if (Control.CheckDistributionParameters && !IsValidCumulativeDistribution(cdfUnnormalized)) + if (Control.CheckDistributionParameters && !IsValidProbabilityMass(probabilityMass)) { throw new ArgumentException(Resources.InvalidDistributionParameters); } - return SampleUnchecked(rnd, cdfUnnormalized); + var cdf = ProbabilityMassToCumulativeDistribution(probabilityMass); + return SampleUnchecked(rnd, cdf); } /// - /// Samples one categorical distributed random variable; also known as the Discrete distribution. + /// Samples a categorically distributed random variable. /// /// The random number generator to use. - /// An array of nonnegative ratios. Not assumed to be normalized. - /// One random integer between 0 and the size of the categorical (exclusive). - public static int SampleWithProbabilityMass(System.Random rnd, double[] pmfUnnormalized) + /// An array of nonnegative ratios. Not assumed to be normalized. + /// random integers between 0 and the size of the categorical (exclusive). + public static IEnumerable Samples(System.Random rnd, double[] probabilityMass) { - if (Control.CheckDistributionParameters && !IsValidProbabilityMass(pmfUnnormalized)) + if (Control.CheckDistributionParameters && !IsValidProbabilityMass(probabilityMass)) { throw new ArgumentException(Resources.InvalidDistributionParameters); } - var cdf = ProbabilityMassToCumulativeDistribution(pmfUnnormalized); - return SampleUnchecked(rnd, cdf); + var cdf = ProbabilityMassToCumulativeDistribution(probabilityMass); + while (true) + { + yield return SampleUnchecked(rnd, cdf); + } } /// - /// Samples a categorically distributed random variable. + /// Samples one categorical distributed random variable; also known as the Discrete distribution. /// /// The random number generator to use. /// An array of the cumulative distribution. Not assumed to be normalized. - /// random integers between 0 and the size of the categorical (exclusive). - public static IEnumerable SamplesWithCumulativeDistribution(System.Random rnd, double[] cdfUnnormalized) + /// One random integer between 0 and the size of the categorical (exclusive). + public static int SampleWithCumulativeDistribution(System.Random rnd, double[] cdfUnnormalized) { if (Control.CheckDistributionParameters && !IsValidCumulativeDistribution(cdfUnnormalized)) { throw new ArgumentException(Resources.InvalidDistributionParameters); } - while (true) - { - yield return SampleUnchecked(rnd, cdfUnnormalized); - } + return SampleUnchecked(rnd, cdfUnnormalized); } /// /// Samples a categorically distributed random variable. /// /// The random number generator to use. - /// An array of nonnegative ratios. Not assumed to be normalized. + /// An array of the cumulative distribution. Not assumed to be normalized. /// random integers between 0 and the size of the categorical (exclusive). - public static IEnumerable SamplesWithProbabilityMass(System.Random rnd, double[] pmfUnnormalized) + public static IEnumerable SamplesWithCumulativeDistribution(System.Random rnd, double[] cdfUnnormalized) { - if (Control.CheckDistributionParameters && !IsValidProbabilityMass(pmfUnnormalized)) + if (Control.CheckDistributionParameters && !IsValidCumulativeDistribution(cdfUnnormalized)) { throw new ArgumentException(Resources.InvalidDistributionParameters); } - var cdf = ProbabilityMassToCumulativeDistribution(pmfUnnormalized); while (true) { - yield return SampleUnchecked(rnd, cdf); + yield return SampleUnchecked(rnd, cdfUnnormalized); } } } diff --git a/src/Numerics/Distributions/ConwayMaxwellPoisson.cs b/src/Numerics/Distributions/ConwayMaxwellPoisson.cs index b1e8f468..977439cc 100644 --- a/src/Numerics/Distributions/ConwayMaxwellPoisson.cs +++ b/src/Numerics/Distributions/ConwayMaxwellPoisson.cs @@ -347,7 +347,7 @@ namespace MathNet.Numerics.Distributions /// the log probability mass at location . public double ProbabilityLn(int k) { - return Math.Log(Probability(k)); + return k*Math.Log(_lambda) - _nu*SpecialFunctions.FactorialLn(k) - Math.Log(Z); } /// @@ -357,12 +357,63 @@ namespace MathNet.Numerics.Distributions /// the cumulative distribution at location . public double CumulativeDistribution(double x) { + var z = Z; double sum = 0; for (var i = 0; i < x + 1; i++) { - sum += Probability(i); + sum += Math.Pow(_lambda, i)/Math.Pow(SpecialFunctions.Factorial(i), _nu)/z; } + return sum; + } + + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The lambda (λ) parameter. Range: λ > 0. + /// The rate of decay (ν) parameter. Range: ν ≥ 0. + /// the probability mass at location . + public static double PMF(double lambda, double nu, int k) + { + if (!(lambda > 0.0 && nu >= 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + var z = Normalization(lambda, nu); + return Math.Pow(lambda, k)/Math.Pow(SpecialFunctions.Factorial(k), nu)/z; + } + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The lambda (λ) parameter. Range: λ > 0. + /// The rate of decay (ν) parameter. Range: ν ≥ 0. + /// the log probability mass at location . + public static double PMFLn(double lambda, double nu, int k) + { + if (!(lambda > 0.0 && nu >= 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + var z = Normalization(lambda, nu); + return k*Math.Log(lambda) - nu*SpecialFunctions.FactorialLn(k) - Math.Log(z); + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The lambda (λ) parameter. Range: λ > 0. + /// The rate of decay (ν) parameter. Range: ν ≥ 0. + /// the cumulative distribution at location . + /// + public static double CDF(double lambda, double nu, double x) + { + if (!(lambda > 0.0 && nu >= 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + var z = Normalization(lambda, nu); + double sum = 0; + for (var i = 0; i < x + 1; i++) + { + sum += Math.Pow(lambda, i)/Math.Pow(SpecialFunctions.Factorial(i), nu)/z; + } return sum; } diff --git a/src/Numerics/Distributions/DiscreteUniform.cs b/src/Numerics/Distributions/DiscreteUniform.cs index 7ae65eff..cbafa02c 100644 --- a/src/Numerics/Distributions/DiscreteUniform.cs +++ b/src/Numerics/Distributions/DiscreteUniform.cs @@ -205,12 +205,7 @@ namespace MathNet.Numerics.Distributions /// the probability mass at location . public double Probability(int k) { - if (k >= _lower && k <= _upper) - { - return 1.0/(_upper - _lower + 1); - } - - return 0.0; + return PMF(_lower, _upper, k); } /// @@ -220,12 +215,7 @@ namespace MathNet.Numerics.Distributions /// the log probability mass at location . public double ProbabilityLn(int k) { - if (k >= _lower && k <= _upper) - { - return -Math.Log(_upper - _lower + 1); - } - - return Double.NegativeInfinity; + return PMFLn(_lower, _upper, k); } /// @@ -235,17 +225,53 @@ namespace MathNet.Numerics.Distributions /// the cumulative distribution at location . public double CumulativeDistribution(double x) { - if (x < _lower) - { - return 0.0; - } + return CDF(_lower, _upper, x); + } - if (x >= _upper) - { - return 1.0; - } + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// Lower bound. Range: lower ≤ upper. + /// Upper bound. Range: lower ≤ upper. + /// the probability mass at location . + public static double PMF(int lower, int upper, int k) + { + if (!(lower <= upper)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + return k >= lower && k <= upper ? 1.0/(upper - lower + 1) : 0.0; + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// Lower bound. Range: lower ≤ upper. + /// Upper bound. Range: lower ≤ upper. + /// the log probability mass at location . + public static double PMFLn(int lower, int upper, int k) + { + if (!(lower <= upper)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + return k >= lower && k <= upper ? -Math.Log(upper - lower + 1) : Double.NegativeInfinity; + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// Lower bound. Range: lower ≤ upper. + /// Upper bound. Range: lower ≤ upper. + /// the cumulative distribution at location . + /// + public static double CDF(int lower, int upper, double x) + { + if (!(lower <= upper)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + if (x < lower) return 0.0; + if (x >= upper) return 1.0; - return Math.Min(1.0, (Math.Floor(x) - _lower + 1)/(_upper - _lower + 1)); + return Math.Min(1.0, (Math.Floor(x) - lower + 1)/(upper - lower + 1)); } /// diff --git a/src/Numerics/Distributions/Geometric.cs b/src/Numerics/Distributions/Geometric.cs index 0d65d6d8..f402ec4d 100644 --- a/src/Numerics/Distributions/Geometric.cs +++ b/src/Numerics/Distributions/Geometric.cs @@ -190,11 +190,7 @@ namespace MathNet.Numerics.Distributions /// the probability mass at location . public double Probability(int k) { - if (k <= 0) - { - return 0.0; - } - + if (k <= 0) return 0.0; return Math.Pow(1.0 - _p, k - 1)*_p; } @@ -205,11 +201,7 @@ namespace MathNet.Numerics.Distributions /// the log probability mass at location . public double ProbabilityLn(int k) { - if (k <= 0) - { - return Double.NegativeInfinity; - } - + if (k <= 0) return Double.NegativeInfinity; return ((k - 1)*Math.Log(1.0 - _p)) + Math.Log(_p); } @@ -223,6 +215,45 @@ namespace MathNet.Numerics.Distributions return 1.0 - Math.Pow(1.0 - _p, x); } + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The probability (p) of generating one. Range: 0 ≤ p ≤ 1. + /// the probability mass at location . + public static double PMF(double p, int k) + { + if (!(p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (k <= 0) return 0.0; + return Math.Pow(1.0 - p, k - 1)*p; + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The probability (p) of generating one. Range: 0 ≤ p ≤ 1. + /// the log probability mass at location . + public static double PMFLn(double p, int k) + { + if (!(p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (k <= 0) return Double.NegativeInfinity; + return ((k - 1)*Math.Log(1.0 - p)) + Math.Log(p); + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The probability (p) of generating one. Range: 0 ≤ p ≤ 1. + /// the cumulative distribution at location . + /// + public static double CDF(double p, double x) + { + if (!(p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + return 1.0 - Math.Pow(1.0 - p, x); + } + /// /// Returns one sample from the distribution. /// diff --git a/src/Numerics/Distributions/Hypergeometric.cs b/src/Numerics/Distributions/Hypergeometric.cs index cc7532f1..895b2055 100644 --- a/src/Numerics/Distributions/Hypergeometric.cs +++ b/src/Numerics/Distributions/Hypergeometric.cs @@ -260,7 +260,7 @@ namespace MathNet.Numerics.Distributions /// the log probability mass at location . public double ProbabilityLn(int k) { - return Math.Log(Probability(k)); + return SpecialFunctions.BinomialLn(_success, k) + SpecialFunctions.BinomialLn(_population - _success, _draws - k) - SpecialFunctions.BinomialLn(_population, _draws); } /// @@ -270,21 +270,70 @@ namespace MathNet.Numerics.Distributions /// the cumulative distribution at location . public double CumulativeDistribution(double x) { - if (x < Minimum) + return CDF(_population, _success, _draws, x); + } + + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The size of the population (N). + /// The number successes within the population (K, M). + /// The number of draws without replacement (n). + /// the probability mass at location . + public static double PMF(int population, int success, int draws, int k) + { + if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) + { + throw new ArgumentException(Resources.InvalidDistributionParameters); + } + + return SpecialFunctions.Binomial(success, k)*SpecialFunctions.Binomial(population - success, draws - k)/SpecialFunctions.Binomial(population, draws); + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The size of the population (N). + /// The number successes within the population (K, M). + /// The number of draws without replacement (n). + /// the log probability mass at location . + public static double PMFLn(int population, int success, int draws, int k) + { + if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { - return 0.0; + throw new ArgumentException(Resources.InvalidDistributionParameters); } - if (x >= Maximum) + + return SpecialFunctions.BinomialLn(success, k) + SpecialFunctions.BinomialLn(population - success, draws - k) - SpecialFunctions.BinomialLn(population, draws); + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The size of the population (N). + /// The number successes within the population (K, M). + /// The number of draws without replacement (n). + /// the cumulative distribution at location . + /// + public static double CDF(int population, int success, int draws, double x) + { + if (!(population >= 0 && success >= 0 && draws >= 0 && success <= population && draws <= population)) { - return 1.0; + throw new ArgumentException(Resources.InvalidDistributionParameters); } + if (x < Math.Max(0, draws + success - population)) return 0.0; + if (x >= Math.Min(success, draws)) return 1.0; + var k = (int)Math.Floor(x); - var denominatorLn = SpecialFunctions.BinomialLn(_population, _draws); + var denominatorLn = SpecialFunctions.BinomialLn(population, draws); var sum = 0.0; for (var i = 0; i <= k; i++) { - sum += Math.Exp(SpecialFunctions.BinomialLn(_success, i) + SpecialFunctions.BinomialLn(_population - _success, _draws - i) - denominatorLn); + sum += Math.Exp(SpecialFunctions.BinomialLn(success, i) + SpecialFunctions.BinomialLn(population - success, draws - i) - denominatorLn); } return sum; } diff --git a/src/Numerics/Distributions/NegativeBinomial.cs b/src/Numerics/Distributions/NegativeBinomial.cs index 43a148a1..1ef6fa22 100644 --- a/src/Numerics/Distributions/NegativeBinomial.cs +++ b/src/Numerics/Distributions/NegativeBinomial.cs @@ -206,12 +206,7 @@ namespace MathNet.Numerics.Distributions /// the probability mass at location . public double Probability(int k) { - var ln = SpecialFunctions.GammaLn(_trials + k) - - SpecialFunctions.GammaLn(_trials) - - SpecialFunctions.GammaLn(k + 1.0) - + (_trials*Math.Log(_p)) - + (k*Math.Log(1.0 - _p)); - return Math.Exp(ln); + return PMF(_trials, _p, k); } /// @@ -221,12 +216,7 @@ namespace MathNet.Numerics.Distributions /// the log probability mass at location . public double ProbabilityLn(int k) { - var ln = SpecialFunctions.GammaLn(_trials + k) - - SpecialFunctions.GammaLn(_trials) - - SpecialFunctions.GammaLn(k + 1.0) - + (_trials*Math.Log(_p)) - + (k*Math.Log(1.0 - _p)); - return ln; + return PMFLn(_trials, _p, k); } /// @@ -236,7 +226,52 @@ namespace MathNet.Numerics.Distributions /// the cumulative distribution at location . public double CumulativeDistribution(double x) { - return 1 - SpecialFunctions.BetaRegularized(x + 1, _trials, 1 - _p); + return CDF(_trials, _p, x); + } + + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The number of failures (r) until the experiment stopped. Range: r ≥ 0. + /// The probability (p) of a trial resulting in success. Range: 0 ≤ p ≤ 1. + /// the probability mass at location . + public static double PMF(double r, double p, int k) + { + return Math.Exp(PMFLn(r, p, k)); + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The number of failures (r) until the experiment stopped. Range: r ≥ 0. + /// The probability (p) of a trial resulting in success. Range: 0 ≤ p ≤ 1. + /// the log probability mass at location . + public static double PMFLn(double r, double p, int k) + { + if (!(r >= 0.0 && p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + return SpecialFunctions.GammaLn(r + k) + - SpecialFunctions.GammaLn(r) + - SpecialFunctions.GammaLn(k + 1.0) + + (r*Math.Log(p)) + + (k*Math.Log(1.0 - p)); + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The number of failures (r) until the experiment stopped. Range: r ≥ 0. + /// The probability (p) of a trial resulting in success. Range: 0 ≤ p ≤ 1. + /// the cumulative distribution at location . + /// + public static double CDF(double r, double p, double x) + { + if (!(r >= 0.0 && p >= 0.0 && p <= 1.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + + return 1 - SpecialFunctions.BetaRegularized(x + 1, r, 1 - p); } /// diff --git a/src/Numerics/Distributions/Poisson.cs b/src/Numerics/Distributions/Poisson.cs index ba261f2b..4bbca97d 100644 --- a/src/Numerics/Distributions/Poisson.cs +++ b/src/Numerics/Distributions/Poisson.cs @@ -220,6 +220,43 @@ namespace MathNet.Numerics.Distributions return 1.0 - SpecialFunctions.GammaLowerRegularized(x + 1, _lambda); } + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The lambda (λ) parameter of the Poisson distribution. Range: λ > 0. + /// the probability mass at location . + public static double PMF(double lambda, int k) + { + if (!(lambda > 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + return Math.Exp(-lambda + (k*Math.Log(lambda)) - SpecialFunctions.FactorialLn(k)); + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The lambda (λ) parameter of the Poisson distribution. Range: λ > 0. + /// the log probability mass at location . + public static double PMFLn(double lambda, int k) + { + if (!(lambda > 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + return -lambda + (k*Math.Log(lambda)) - SpecialFunctions.FactorialLn(k); + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The lambda (λ) parameter of the Poisson distribution. Range: λ > 0. + /// the cumulative distribution at location . + /// + public static double CDF(double lambda, double x) + { + if (!(lambda > 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + return 1.0 - SpecialFunctions.GammaLowerRegularized(x + 1, lambda); + } + /// /// Generates one sample from the Poisson distribution. /// diff --git a/src/Numerics/Distributions/Zipf.cs b/src/Numerics/Distributions/Zipf.cs index e37ea257..41991f22 100644 --- a/src/Numerics/Distributions/Zipf.cs +++ b/src/Numerics/Distributions/Zipf.cs @@ -259,14 +259,51 @@ namespace MathNet.Numerics.Distributions /// the cumulative distribution at location . public double CumulativeDistribution(double x) { - if (x < 1) - { - return 0.0; - } - + if (x < 1) return 0.0; return SpecialFunctions.GeneralHarmonic((int)x, _s)/SpecialFunctions.GeneralHarmonic(_n, _s); } + /// + /// Computes the probability mass (PMF) at k, i.e. P(X = k). + /// + /// The location in the domain where we want to evaluate the probability mass function. + /// The s parameter of the distribution. + /// The n parameter of the distribution. + /// the probability mass at location . + public static double PMF(double s, int n, int k) + { + if (!(n > 0 && s > 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + return (1.0/Math.Pow(k, s))/SpecialFunctions.GeneralHarmonic(n, s); + } + + /// + /// Computes the log probability mass (lnPMF) at k, i.e. ln(P(X = k)). + /// + /// The location in the domain where we want to evaluate the log probability mass function. + /// The s parameter of the distribution. + /// The n parameter of the distribution. + /// the log probability mass at location . + public static double PMFLn(double s, int n, int k) + { + if (!(n > 0 && s > 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + return Math.Log(PMF(s, n, k)); + } + + /// + /// Computes the cumulative distribution (CDF) of the distribution at x, i.e. P(X ≤ x). + /// + /// The location at which to compute the cumulative distribution function. + /// The s parameter of the distribution. + /// The n parameter of the distribution. + /// the cumulative distribution at location . + /// + public static double CDF(double s, int n, double x) + { + if (!(n > 0 && s > 0.0)) throw new ArgumentException(Resources.InvalidDistributionParameters); + if (x < 1) return 0.0; + return SpecialFunctions.GeneralHarmonic((int)x, s)/SpecialFunctions.GeneralHarmonic(n, s); + } + /// /// Generates a sample from the Zipf distribution without doing parameter checking. /// diff --git a/src/UnitTests/DistributionTests/Discrete/CategoricalTests.cs b/src/UnitTests/DistributionTests/Discrete/CategoricalTests.cs index a72f5d4a..9f38c792 100644 --- a/src/UnitTests/DistributionTests/Discrete/CategoricalTests.cs +++ b/src/UnitTests/DistributionTests/Discrete/CategoricalTests.cs @@ -165,7 +165,7 @@ namespace MathNet.Numerics.UnitTests.DistributionTests.Discrete [Test] public void CanSampleStatic() { - Categorical.SampleWithProbabilityMass(new Random(0), _largeP); + Categorical.Sample(new Random(0), _largeP); } /// @@ -174,7 +174,7 @@ namespace MathNet.Numerics.UnitTests.DistributionTests.Discrete [Test] public void FailSampleStatic() { - Assert.That(() => Categorical.SampleWithProbabilityMass(new Random(0), _badP), Throws.ArgumentException); + Assert.That(() => Categorical.Sample(new Random(0), _badP), Throws.ArgumentException); } /// diff --git a/src/UnitTests/DistributionTests/Discrete/HypergeometricTests.cs b/src/UnitTests/DistributionTests/Discrete/HypergeometricTests.cs index a7bd0d4a..f7bda643 100644 --- a/src/UnitTests/DistributionTests/Discrete/HypergeometricTests.cs +++ b/src/UnitTests/DistributionTests/Discrete/HypergeometricTests.cs @@ -280,7 +280,7 @@ namespace MathNet.Numerics.UnitTests.DistributionTests.Discrete public void ValidateProbability(int population, int success, int draws, int x) { var d = new Hypergeometric(population, success, draws); - Assert.AreEqual(SpecialFunctions.Binomial(success, x)*SpecialFunctions.Binomial(population - success, draws - x)/SpecialFunctions.Binomial(population, draws), d.Probability(x)); + Assert.That(d.Probability(x), Is.EqualTo(SpecialFunctions.Binomial(success, x)*SpecialFunctions.Binomial(population - success, draws - x)/SpecialFunctions.Binomial(population, draws))); } /// @@ -302,7 +302,7 @@ namespace MathNet.Numerics.UnitTests.DistributionTests.Discrete public void ValidateProbabilityLn(int population, int success, int draws, int x) { var d = new Hypergeometric(population, success, draws); - Assert.AreEqual(Math.Log(d.Probability(x)), d.ProbabilityLn(x)); + Assert.That(d.ProbabilityLn(x), Is.EqualTo(Math.Log(d.Probability(x))).Within(1e-14)); } ///