From 9fd4dc8dfcd5ba732cca81e5db0a2c91fdd752be Mon Sep 17 00:00:00 2001 From: Christoph Ruegg Date: Thu, 23 Jan 2014 23:00:50 +0100 Subject: [PATCH] Docs: random numbers, probability distributions --- docs/content/RandomAndDistributions.fsx | 236 ++++++++++++++++------ docs/tools/build-docs.fsx | 4 +- src/Numerics/Random/MersenneTwister.cs | 21 +- src/Numerics/Random/SystemRandomSource.cs | 2 - 4 files changed, 190 insertions(+), 73 deletions(-) diff --git a/docs/content/RandomAndDistributions.fsx b/docs/content/RandomAndDistributions.fsx index d0acd13b..70ad1647 100644 --- a/docs/content/RandomAndDistributions.fsx +++ b/docs/content/RandomAndDistributions.fsx @@ -2,25 +2,28 @@ #I "../../out/lib/net40" #r "MathNet.Numerics.dll" #r "MathNet.Numerics.FSharp.dll" +open MathNet.Numerics.Random +open MathNet.Numerics.Distributions (** Random Numbers and Probability Distributions ============================================ -The .Net Framework base class library includes a pseudo-random number generator +The .Net Framework base class library (BCL) includes a pseudo-random number generator for non-cryptography use in the form of the `System.Random` class. Math.NET Numerics provides a few alternatives with different characteristics -in randomness, bias, sequence length and performance. All these classes +in randomness, bias, sequence length, performance and thread-safety. All these classes inherit from `System.Random` so you can use them as a drop-in replacement even in third-party code. -All random number generators (RNG) generate numbers in a more-or-less uniform +All random number generators (RNG) generate numbers in a uniform distribution. In practice you often need to sample random numbers with a different distribution, like a Gaussian or Poisson. You can do that with one of our probability distribution classes, or in F# also using the `Sample` module. Once parametrized, the distribution classes also provide a variety of other functionality around probability distributions, like evaluating statistical distribution properties or functions. + Initialization -------------- @@ -31,51 +34,125 @@ random numbers and probability distributions: using MathNet.Numerics.Random; using MathNet.Numerics.Distributions; -Or in F#: + +Generating Random Numbers +------------------------- + +Let's sample a few random values from a uniform distributed variable +$X\sim\mathcal{U}(0,1)$, such that $0 \le x < 1$: + + [lang=csharp] + // create an array with 1000 random values + double[] samples = SystemRandomSource.Doubles(1000); + + // now overwrite the existing array with new random values + SystemRandomSource.Doubles(samples); + + // we can also create an infinite sequence of random values: + IEnumerable sampleSeq = SystemRandomSource.DoubleSequence(); + + // take a single random value + System.Random rng = SystemRandomSource.Default; + double sample = rng.NextDouble(); + decimal sampled = rng.NextDecimal(); + +In F# we can do exactly the same, or alternatively use the `Random` module: *) -open MathNet.Numerics.Random -open MathNet.Numerics.Distributions +let samples = Random.doubles 1000 + +// overwrite the whole array with new random values +Random.doubleFill samples + +// create an infinite sequence: +let sampleSeq = Random.doubleSeq () + +// take a single random value +let rng = Random.shared +let sample = rng.NextDouble() +let sampled = rng.NextDecimal() (** -Random Number Generators ------------------------- +If you have used the .Net BCL random number generators before, you have likely +noticed a few differences: we used special routines to create a full array or +sequence in one go, we were able to sample a decimal number, an we used static functions +and a shared default instance instead of creating our own instance. -Let's sample a few uniform random values using Mersenne Twister in C#: +Math.NET Numerics provides a few alternative random number generators in their own types. +For example, `MersenneTwister` implements the very popular mersenne twister algorithm. All these types +inherit from `System.Random`, are fully compatible to it and can also be used exactly the same way: [lang=csharp] - var rng = new MersenneTwister(42); - int randomInt = rng.Next(); - double randomDouble = rng.NextDouble(); + System.Random random = new SystemRandomSource(); + var sample = random.NextDouble(); -In F# you can use the constructor as well, or alternatively use the `Random` module. -In case of the latter, all objects will be cast to their common base type `System.Random`: -*) +However, unlike System.Random they can be made thread safe, use much more reasonable +default seeds and have some convinient extra routines. The `SystemRandomSource` class that +was used above uses System.Random to generate random numbers internally - but with all the extras. -let rng = MersenneTwister(42) -let rngEx = Random.mersenneTwisterSeed 42 -let randomInt = rng.Next() -(** -If you have used `System.Random` before, you may remember that it only offers `Next` methods -to sample integers, and `NextDouble` for floating point numbers in the [0,1) interval. +Full Range Integers and Decimal +------------------------------- + +Out of the box, `System.Random` only provides `Next` methods to sample integers +in the [0, Int.MaxValue) range and `NextDouble` for floating point numbers in the [0,1) interval. Did you ever have a need to generate numbers of the full integer range including negative numbers, or a `System.Decimal`? Extending discrete random numbers to different ranges or types is non-trivial if the distribution should still be uniform over the chosen range. That's why we've added a few extensions -methods which are available on all RNGs (including `System.Random` itself): +methods which are available on all RNGs (including System.Random itself): + +* NextInt64: generates a 64 bit integer, uniform in the range [0, Long.MaxValue) +* NextFullRangeInt32: generates a 32 bit integer, uniform in the range [Int.MinValue, Int.MaxValue] +* NextFullRangeInt64: generates a 64 bit integer, uniform in the range [Long.MinValue, Long.MaxValue] +* NextDecimal: generates a `System.Decimal`, uniform in the range [0.0, 1.0) + + +Seeds +----- + +All RNGs can be initialized with a custom seed number. The same seed causes +the same number sequence to be generated, which can be very useful if you need results +to be reproducible, e.g. in testing/verification. The exception is cryptography, +where reproducible random number sequences would be a fatal security flaw, +so our crypto random source does not accept a seed. + +In the code samples above we did not provide a seed, so a default seed was used. +If no seed is provided, `System.Random` uses a time based seed equivalent to the +one below. This means that all instances created within a short timeframe +(which typically spans about a thousand CPU clock cycles) will generate +exactly the same sequence. This can happen easily e.g. in parallel computing +and is often unwanted. That's why all Math.NET Numerics RNGs are by default +initialized with a robust seed taken from the `CryptoRandomSource` if available, +or else a combination of a random number from a shared RNG, the time and a Guid +(which are supposed to be generated uniquely, worldwide). *) -let values = - ( rng.Next(), // built-in: int32 in the range [0, Int.MaxValue) - rng.NextInt64(), // int64 in the range [0, Long.MaxValue) - rng.NextFullRangeInt32(), // int32 in the range [Int.MinValue, Int.MaxValue] - rng.NextFullRangeInt64(), // int64 in the range [Long.MinValue, Long.MaxValue] - rng.NextDouble(), // built-in: double in the range [0.0, 1.0) - rng.NextDecimal() ) // decimal in then range [0.0, 1.0) +let someTimeSeed = RandomSeed.Time() // not recommended +let someGuidSeed = RandomSeed.Guid() +let someRobustSeed = RandomSeed.Robust() // recommended, used by default + +(** Let's generate random numbers like before, but this time with custom seed 42: *) + +let samplesSeeded = Random.doublesSeed 42 1000 +Random.doubleFillSeed 42 samplesSeeded +let samplesSeqSeeded = Random.doubleSeqSeed 42 (** -The following custom RNGs are currently available in Math.NET Numerics: +Or without the F# Random module, e.g. in C#: + + [lang=csharp] + double[] samplesSeeded = SystemRandomSource.Doubles(1000, 42); + SystemRandomSource.Doubles(samplesSeeded, 42); + IEnumerable sampleSeqSeeded = SystemRandomSource.DoubleSequence(42); + +Uniform Random Number Generators +-------------------------------- + +Up to now we've used only `SystemRandomSource`, but there's much more: + +* `SystemRandomSource`: Wraps the .NET BCL System.Random to provide thread-safety +* `CryptoRandomSource`: Wraps the .NET BCL RNGCryptoServiceProvider. *Not available in portable builds.* * `MersenneTwister`: Mersenne Twister 19937 generator * `Xorshift`: Multiply-with-carry XOR-shift generator * `Mcg31m1`: Multiplicative congruental generator using a modulus of 2^31-1 and a multiplier of 1132489760 @@ -84,53 +161,82 @@ The following custom RNGs are currently available in Math.NET Numerics: * `WH2006`: Wichmann-Hill's 2006 combined multiplicative congruental generator * `Mrg32k3a`: 32-bit combined multiple recursive generator with 2 components of order 3 * `Palf`: Parallel Additive Lagged Fibonacci generator -* `SystemCryptoRandomNumberGenerator`: Using the RNGCryptoServiceProvider of the .Net Framework. *Not available in portable builds.* -Seeds and Thread Safety ------------------------ +Let's sample a few uniform random values using Mersenne Twister in C#: -Other than for cryptographic random numbers where you'd never want to provide -a seed, all other RNGs can be initialized with a custom seed. In the code sample -above we've used `42` as seed. The same seed causes the same number sequence -to be generated, which can be very useful if you need results to be reproducible, -e.g. in testing/verification. + [lang=csharp] + // Typical way with an instance: + var random = new MersenneTwister(42); // seed 42 + int sampleInt = random.Next(); + double sampleDouble = random.NextDouble(); + decimal sampleDecimal = random.NextDecimal(); + double[] samples = random.NextDoubles(1000); + IEnumerable sampleSeq = random.NextDoubleSequence(); + + // Simpler and faster if you need a large sequence, only once: + double[] samples = MersenneTwister.Doubles(1000, 42) // 1000 numbers, seed 42 + IEnumerable sampleSeq = MersenneTwister.DoubleSequence(42); // seed 42 -If no seed is provided, `System.Random` uses a time based seed equivalent to the -one below. This means that all instances created within a short timeframe -(which typically spans about a thousand CPU clock cycles) will generate -exactly the same sequence. This can happen easily e.g. in parallel computing -and is often unwanted. That's why all number generators created using -Math.NET Numerics routines are by default initialized with a seed that combines -the time with a Guid (which are supposed to be generated uniquely, worldwide). +In F# you can use the constructor as well, or alternatively use the `Random` module. +In case of the latter, all objects will be cast to their common base type `System.Random`: *) -let someTimeSeed = RandomSeed.Time() -let someGuidSeed = RandomSeed.Guid() +// By using the normal constructor (random1 has type MersenneTwister) +let random1 = MersenneTwister() +let random1b = MersenneTwister(42) // with seed + +// By using the Random module (random2 has type System.Random) +let random2 = Random.mersenneTwister () +let random2b = Random.mersenneTwisterSeed 42 // with seed +let random2c = Random.mersenneTwisterWith 42 false // opt-out of thread-safety + +// Using some other algorithms: +let random3 = Random.crypto () +let random4 = Random.xorshift () +let random5 = Random.wh2006 () (** -Note that the generators should be reused when generating multiple numbers. -If you'd create a new generator each time, the numbers it generates would be -exactly as random as your seed - and thus not very random at all. -However, generators are not automatically thread-safe in .Net. They *are* thread-safe -when created using Math.NET Numerics by default, but that can be controlled either by a -boolean argument at creation or by setting `Control.ThreadSafeRandomNumberGenerators`. +Shared Instances and Thread Safety +---------------------------------- + +Generators make certain claims about how many random numbers they can generate +until the whole sequence repeats itself. However, this only applies if you +continue to sample from the same instance and its internal state. +The generator instances should therefore be reused within an application if long +random sequences are needed. If you'd create a new instance each time, the numbers +it generates would be exactly as random as your seed - and thus not very random at all. + +Another reason to share instances: most generators run an initialization routine before +they can start generating numbers which can be expensive. Some of them also maintain +their internal state in large memory blocks, which can quickly add up when creating multiple +instances. + +Unfortunately the two generators provided by .NET are not thread-safe and thus cannot be +shared between threads without manual locking. But all the RNGs provided by Math.NET Numerics, +including the `SystemRandomSource` and `CryptoRandomSource` wrappers, *are* thread-safe by default, +unless explicitly disabled by a constructor argument or by setting `Control.ThreadSafeRandomNumberGenerators` +(which is used if the constructor argument is omitted). + +For convenience a few generators provide a thread-safe shared instance + + [lang=csharp] + let a = SystemRandomSource.Default; + let b = MersenneTwister.Default; + +Or with the F# module: *) -let a = Random.system () -let b = Random.systemSeed (RandomSeed.Guid()) -let c = Random.crypto () -let d = Random.mersenneTwister () -let e = Random.mersenneTwisterWith 1000 true (* thread-safe *) -let f = Random.xorshift () -let g = Random.xorshiftCustom someTimeSeed false 916905990L 13579L 362436069L 77465321L -let h = Random.wh2006 () -let i = Random.palf () +let a = Random.systemShared +let b = Random.mersenneTwisterShared + +// or if you don't care, simply +let c = Random.shared; (** Probability Distributions ------------------------- -For non-uniform random number generation you can use one the wide range of probability +For non-uniform random number generation you can use the wide range of probability distributions in the `MathNet.Numerics.Distributions` namespace. There are many ways to parametrize a distribution in the literature. When using the @@ -148,7 +254,7 @@ as last argument. A few more examples, this time in F#: *) // some probability distributions -let normal = Normal.WithMeanVariance(3.0, 1.5, g) +let normal = Normal.WithMeanVariance(3.0, 1.5, a) let exponential = Exponential(2.4) let gamma = Gamma(2.0, 1.5, Random.crypto()) let cauchy = Cauchy(0.0, 1.0, Random.mrg32k3aWith 10 false) @@ -170,7 +276,7 @@ let discrete = let u = Normal.Sample(Random.system(), 2.0, 4.0) let v = Laplace.Samples(Random.mersenneTwister(), 1.0, 3.0) |> Seq.take 100 |> List.ofSeq let w = Rayleigh.Sample(c, 1.5) -let x = Hypergeometric.Sample(h, 100, 20, 5) +let x = Hypergeometric.Sample(b, 100, 20, 5) (** Distribution Functions and Properties diff --git a/docs/tools/build-docs.fsx b/docs/tools/build-docs.fsx index 2048f30d..1f8d64a8 100644 --- a/docs/tools/build-docs.fsx +++ b/docs/tools/build-docs.fsx @@ -72,7 +72,9 @@ let buildReference () = for lib in referenceBinaries do MetadataFormat.Generate ( bin @@ lib, output @@ "reference", layoutRoots, - parameters = ("root", root)::info ) + parameters = ("root", root)::info, + sourceRepo = "https://github.com/mathnet/mathnet-numerics/tree/master/src", + sourceFolder = @"..\..\src", publicOnly = true) // Build documentation from `fsx` and `md` files in `docs/content` let buildDocumentation () = diff --git a/src/Numerics/Random/MersenneTwister.cs b/src/Numerics/Random/MersenneTwister.cs index a26477b2..dca8f693 100644 --- a/src/Numerics/Random/MersenneTwister.cs +++ b/src/Numerics/Random/MersenneTwister.cs @@ -71,6 +71,7 @@ using System.Collections.Generic; #if PORTABLE using System; #else +using System.Runtime; using System.Threading; #endif @@ -337,10 +338,10 @@ namespace MathNet.Numerics.Random }*/ /// - /// Returns an array of random numbers greater than or equal to 0.0 and less than 1.0. + /// Fills an array with random numbers greater than or equal to 0.0 and less than 1.0. /// /// Supports being called in parallel from multiple threads. - public static double[] Doubles(int length, int seed) + public static void Doubles(double[] values, int seed) { uint[] t = new uint[624]; int k; @@ -353,8 +354,7 @@ namespace MathNet.Numerics.Random t[k] &= 0xffffffff; } - var data = new double[length]; - for (int i = 0; i < data.Length; i++) + for (int i = 0; i < values.Length; i++) { uint y; @@ -385,8 +385,19 @@ namespace MathNet.Numerics.Random y ^= (y << 15) & 0xefc60000; y ^= (y >> 18); - data[i] = y*Reciprocal; + values[i] = y*Reciprocal; } + } + + /// + /// Returns an array of random numbers greater than or equal to 0.0 and less than 1.0. + /// + /// Supports being called in parallel from multiple threads. + [TargetedPatchingOptOut("Performance critical to inline this type of method across NGen image boundaries")] + public static double[] Doubles(int length, int seed) + { + var data = new double[length]; + Doubles(data, seed); return data; } diff --git a/src/Numerics/Random/SystemRandomSource.cs b/src/Numerics/Random/SystemRandomSource.cs index fb9c9804..e86b915d 100644 --- a/src/Numerics/Random/SystemRandomSource.cs +++ b/src/Numerics/Random/SystemRandomSource.cs @@ -182,7 +182,6 @@ namespace MathNet.Numerics.Random public static void Doubles(double[] values, int seed) { var rnd = new System.Random(seed); - for (int i = 0; i < values.Length; i++) { values[i] = rnd.NextDouble(); @@ -208,7 +207,6 @@ namespace MathNet.Numerics.Random public static IEnumerable DoubleSequence(int seed) { var rnd = new System.Random(seed); - while (true) { yield return rnd.NextDouble();