From cd3c824df36b557889fb1fd903351b60b629afbd Mon Sep 17 00:00:00 2001 From: Richard Kistruck Date: Tue, 20 Jan 2009 18:23:53 +0000 Subject: Mps br/timing rnd(): Use a = 48271. Use fast implementation. Much improved verification by rnd_verify(). Copied from Perforce Change: 167182 ServerID: perforce.ravenbrook.com --- mps/code/testlib.c | 201 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 149 insertions(+), 52 deletions(-) (limited to 'mps/code/testlib.c') diff --git a/mps/code/testlib.c b/mps/code/testlib.c index 5c92969d2e0..947ead5fb82 100644 --- a/mps/code/testlib.c +++ b/mps/code/testlib.c @@ -34,72 +34,167 @@ struct itimerspec; /* stop complaints from time.h */ /* rnd -- a random number generator * - * I nabbed it from "ML for the Working Programmer", originally from: - * Stephen K Park & Keith W Miller (1988). Random number generators: - * good ones are hard to find. Communications of the ACM, 31:1192-1201. + * We use the (Multiplicative) Linear Congruential Generator + * Xn = a * Xn-1 mod m + * with: m = 2147483647 (2^31 - 1, a Mersenne prime), and a = 48271. + * This is a 'full-period' generator: all values from [1..(mod-1)], + * or 0x00000001 to 0x7ffffffe inclusive, are returned once, and then + * the cycle begins again. (The value 0 is not part of the cycle and + * is never returned). So the period = mod-1, ie. 2147483646. * - * This is a 'full-period' generator: all values from [1..(mod-1)] - * are returned once, and then the generator cycles round again. - * (The value 0 is not part of the cycle and is never returned). That - * means the period = mod-1, ie. 2147483646. + * This generator is extremely simple and has been very well studied. + * It is free of major vices we might care about for this application. + * In particular, as m is prime, low order bits are random. Therefore + * to roll an N-sided die (N << m), "rnd() % N" is acceptable, giving + * a value in [0..N-1]. * - * This "x 16807 mod 2^31-1" generator has been very well studied. - * It is not the 'best' (most random) simple generator, but it is - * free of all vices we might care about for this application, so - * we'll keep it simple and stick with this, thanks. + * It was popularised by the much-cited Park & Miller paper: + * Stephen K Park & Keith W Miller (1988). Random number generators: + * good ones are hard to find. Communications of the ACM, + * 31:1192-1201. + * The recommended multiplier a was later updated from 16807 to 48271: + * Stephen K Park, Keith W Miller, Paul K. Stockmeyer (1993). + * Technical Correspondence. Communications of the ACM, 36:105-110. * - * There are faster implementations of this generator, summarised - * briefly here: - * - L. Schrage (1979 & 1983) showed how to do all the calculations - * within 32-bit integers. See also Numerical recipes in C. - * - David Carta (1990) showed how to also eliminate division. + * (Many more elaborate generators have been invented. The next simple + * step would be to combine with the MLCG m = 2147483399 a = 40692, to + * make the period "about 74 quadrillion". See the summary of chapter + * 3 in Knuth's "The Art of Computer Programming".) + * + * This (fast) implementation uses the identity: + * 0x80000000 == 0x7FFFFFFF + 0x00000001 + * noted by David Carta (1990), where 0x7FFFFFFF == 2^31-1 == m, which + * means that bits above the first 31 can simply be shifted >> 31 and + * added, preserving Xn mod m. To remain within 32-bit unsigned + * arithmetic when multiplying the previous seed (31 bits) by a (16 + * bits), the seed is split into bottom and top halves; bits above + * the first 31 are simply "top >> 16". (Code by RHSK, inspired by + * Robin Whittle's article at "http://www.firstpr.com.au/dsp/rand31/"). + * + * Slower implementations, used for verification: + * rnd_verify_schrage uses the method of L. Schrage (1979 & 1983), + * namely splitting the seed by q, where q = m div a. + * rnd_verify_float simply uses floating point arithmetic. */ static unsigned long seed = 1; -/* 2^31 - 1 == (1UL<<31) - 1 == 2147483647 */ -#define RND_MOD_2_31_1 2147483647 -#define RND_MOD_2_31_1_FLOAT 2147483647.0 +#define R_m 2147483647UL +#define R_a 48271UL unsigned long rnd(void) { - double s; - - s = seed; - s *= 16807.0; - s = fmod(s, RND_MOD_2_31_1_FLOAT); /* 2^31 - 1 */ - seed = (unsigned long)s; + /* requires m == 2^31-1, a < 2^16 */ + unsigned long bot = R_a * (seed & 0x7FFF); + unsigned long top = R_a * (seed >> 15); + seed = bot + ((top & 0xFFFF) << 15) + (top >> 16); + if(seed > R_m) + seed -= R_m; return seed; - /* Have you modified this code? Run rnd_verify() please! RHSK */ + /* Have you modified this code? Run rnd_verify(3) please! RHSK */ } -/* rnd_verify -- verify that rnd() returns the correct results */ -static void rnd_verify(void) +static unsigned long seed_verify_schrage = 1; +#define R_q (R_m / R_a) +#define R_r (R_m % R_a) +static unsigned long rnd_verify_schrage(void) +{ + /* requires m < 2^31, q > r; see Park & Miller (1988) */ + unsigned long alpha = R_a * (seed_verify_schrage % R_q); /* < m */ + unsigned long beta = R_r * (seed_verify_schrage / R_q); /* < m */ + seed_verify_schrage = alpha - beta; + if(alpha < beta) + seed_verify_schrage += R_m; + return seed_verify_schrage; +} + +static unsigned long seed_verify_float = 1; +#define R_m_float 2147483647.0 +#define R_a_float 48271.0 +static unsigned long rnd_verify_float(void) +{ + double s; + s = seed_verify_float; + s *= R_a_float; + s = fmod(s, R_m_float); + seed_verify_float = (unsigned long)s; + return seed_verify_float; +} + +/* rnd_verify -- verify that rnd() returns the correct results + * + * depth = how much time to spend verifying + * 0: very quick -- just verify the next rnd() value + * 1: quick -- verify the first 10000 calls from seed = 1 + * 2: slow (< 5 minutes) -- run the fast generator for a full cycle + * 3: very slow (several hours) -- verify a full cycle + */ +void rnd_verify(int depth) { unsigned long orig_seed = seed; unsigned long i; + unsigned long r; + + /* 0: the next value from rnd() matches rnd_verify_*() */ + if(depth >= 0) { + seed_verify_schrage = seed; + seed_verify_float = seed; + r = rnd(); + Insist(rnd_verify_schrage() == r); + Insist(rnd_verify_float() == r); + } - /* This test is from: - * Stephen K Park & Keith W Miller (1988). Random number generators: - * good ones are hard to find. Communications of the ACM, 31:1192-1201. - * Note: The Park-Miller paper uses 1-based indices. - */ - i = 1; - seed = 1; - for(i = 2; i < 10001; i += 1) { - (void)rnd(); + /* 1: first 10000 (from Park & Miller, note: 1-based indexing!) */ + if(depth >= 1) { + i = 1; + seed = 1; + seed_verify_schrage = 1; + seed_verify_float = 1; + for(i = 2; i <= 10001; i += 1) { + r = rnd(); + Insist(rnd_verify_schrage() == r); + Insist(rnd_verify_float() == r); + } + printf("%lu\n", r); + /* Insist(r == 1043618065UL); -- correct value for a = 16807 */ + Insist(r == 399268537UL); /* correct for a = 48271 */ } - Insist(i == 10001); - Insist(rnd() == 1043618065); - /* from Robin Whittle's compendium at - * http://www.firstpr.com.au/dsp/rand31/ - * Observe wrap-around after 'full-period' (which excludes 0). - * Note: uses 0-based indices, ie. rnd_0 = 1, rnd_1 = 16807, ... - */ - i = 2147483645; - seed = 1407677000; - i += 1; - Insist(i == (1UL<<31) - 2 ); /* 'full-period' excludes 0 */ - Insist(rnd() == 1); /* wrap-around */ + /* 1: observe wrap-around (note: 0-based indexing) */ + if(depth >= 1) { + i = 2147483645UL; + /* seed = 1407677000UL; -- correct value for a = 16807 */ + seed = 1899818559UL; /* correct for a = 48271 */ + seed_verify_schrage = 1899818559UL; + seed_verify_float = 1899818559UL; + i += 1; + Insist(i == (1UL<<31) - 2 ); /* 'full-period' excludes 0 */ + Insist(rnd() == 1); /* wrap-around */ + Insist(rnd_verify_schrage() == 1); + Insist(rnd_verify_float() == 1); + } + + /* 2 & 3: Full cycle (3 => verifying each value) */ + if(depth >= 2) { + unsigned long r1; + i = 0; + seed = 1; + seed_verify_schrage = 1; + seed_verify_float = 1; + while(1) { + i += 1; + r = rnd(); + if(depth >= 3) { + Insist(rnd_verify_schrage() == r); + Insist(rnd_verify_float() == r); + } + if(r == 1) { + printf("Wrap at i=%lu, r=%lu, r(i-1)=%lu.\n", + i, r, r1); + break; + } else { + r1 = r; + } + } + } seed = orig_seed; } @@ -135,18 +230,20 @@ void randomize(int argc, char **argv) n = sscanf(argv[1], "%lu", &seed0); Insist(n == 1); printf("randomize(): resetting initial seed to: %lu.\n", seed0); - rnd_verify(); /* good to verify occasionally: slip it in here */ } else { /* time_t uses an arbitrary encoding, but hopefully the low order */ /* 31 bits will have at least one bit changed from run to run. */ - seed0 = 1 + time(NULL) % (RND_MOD_2_31_1 - 1); + seed0 = 1 + time(NULL) % (R_m - 1); printf("randomize(): choosing initial seed: %lu.\n", seed0); } - Insist(seed0 < RND_MOD_2_31_1); /* 2^31 - 1 */ + Insist(seed0 < R_m); Insist(seed0 != 0); seed = seed0; + rnd_verify(0); + Insist(seed == seed0); + /* The 'random' seed is taken from time(), which may simply be a * count of seconds: therefore successive runs may start with * nearby seeds, possibly differing only by 1. So the first value -- cgit v1.2.1