ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RandPoissonQ.cc
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file RandPoissonQ.cc
1 // -*- C++ -*-
2 //
3 // -----------------------------------------------------------------------
4 // HEP Random
5 // --- RandPoissonQ ---
6 // class implementation file
7 // -----------------------------------------------------------------------
8 
9 // =======================================================================
10 // M. Fischler - Implemented new, much faster table-driven algorithm
11 // applicable for mu < 100
12 // - Implemented "quick()" methods, shich are the same as the
13 // new methods for mu < 100 and are a skew-corrected gaussian
14 // approximation for large mu.
15 // M. Fischler - Removed mean=100 from the table-driven set, since it
16 // uses a value just off the end of the table. (April 2004)
17 // M Fischler - put and get to/from streams 12/15/04
18 // M Fischler - Utilize RandGaussQ rather than RandGauss, as clearly
19 // intended by the inclusion of RandGaussQ.h. Using RandGauss
20 // introduces a subtle trap in that the state of RandPoissonQ
21 // can never be properly captured without also saveing the
22 // state of RandGauss! RandGaussQ is, on the other hand,
23 // stateless except for the engine used.
24 // M Fisculer - Modified use of wrong engine when shoot (anEngine, mean)
25 // is called. This flaw was preventing any hope of proper
26 // saving and restoring in the instance cases.
27 // M Fischler - fireArray using defaultMean 2/10/05
28 // M Fischler - put/get to/from streams uses pairs of ulongs when
29 // + storing doubles avoid problems with precision
30 // 4/14/05
31 // M Fisculer - Modified use of shoot (mean) instead of
32 // shoot(getLocalEngine(), mean) when fire(mean) is called.
33 // This flaw was causing bad "cross-talk" between modules
34 // in CMS, where one used its own engine, and the other
35 // used the static generator. 10/18/07
36 //
37 // =======================================================================
38 
41 #include "CLHEP/Random/DoubConv.h"
42 #include "CLHEP/Random/Stat.h"
44 #include <cmath> // for std::pow()
45 
46 namespace CLHEP {
47 
48 std::string RandPoissonQ::name() const {return "RandPoissonQ";}
50 
51 // Initialization of static data: Note that this is all const static data,
52 // so that saveEngineStatus properly saves all needed information.
53 
54  // The following MUST MATCH the corresponding values used (in
55  // poissonTables.cc) when poissonTables.cdat was created.
56 
57 const double RandPoissonQ::FIRST_MU = 10;// lowest mu value in table
58 const double RandPoissonQ::LAST_MU = 95;// highest mu value
59 const double RandPoissonQ::S = 5; // Spacing between mu values
60 const int RandPoissonQ::BELOW = 30; // Starting point for N is at mu - BELOW
61 const int RandPoissonQ::ENTRIES = 51; // Number of entries in each mu row
62 
63 const double RandPoissonQ::MAXIMUM_POISSON_DEVIATE = 2.0E9;
64  // Careful -- this is NOT the maximum number that can be held in
65  // a long. It actually should be some large number of sigma below
66  // that.
67 
68  // Here comes the big (9K bytes) table, kept in a file of
69  // ENTRIES * (FIRST_MU - LAST_MU + 1)/S doubles
70 
71 static const double poissonTables [ 51 * ( (95-10)/5 + 1 ) ] = {
72 #include "CLHEP/Random/poissonTables.cdat"
73 };
74 
75 //
76 // Constructors and destructors:
77 //
78 
80 }
81 
83 
84  // The following are useful for quick approximation, for large mu
85 
86  double sig2 = defaultMean * (.9998654 - .08346/defaultMean);
87  sigma = std::sqrt(sig2);
88  // sigma for the Guassian which approximates the Poisson -- naively
89  // sqrt (defaultMean).
90  //
91  // The multiplier corrects for fact that discretization of the form
92  // [gaussian+.5] increases the second moment by a small amount.
93 
94  double t = 1./(sig2);
95 
96  a2 = t/6 + t*t/324;
97  a1 = std::sqrt (1-2*a2*a2*sig2);
98  a0 = defaultMean + .5 - sig2 * a2;
99 
100  // The formula will be a0 + a1*x + a2*x*x where x has 2nd moment of sigma.
101  // The coeffeicients are chosen to match the first THREE moments of the
102  // true Poisson distribution.
103  //
104  // Actually, if the correction for discretization were not needed, then
105  // a2 could be taken one order higher by adding t*t*t/5832. However,
106  // the discretization correction is not perfect, leading to inaccuracy
107  // on the order to 1/mu**2, so adding a third term is overkill.
108 
109 } // setupForDefaultMu()
110 
111 
112 //
113 // fire, quick, operator(), and shoot methods:
114 //
115 
116 long RandPoissonQ::shoot(double xm) {
117  return shoot(getTheEngine(), xm);
118 }
119 
121  return (double) fire();
122 }
123 
124 double RandPoissonQ::operator()( double mean ) {
125  return (double) fire(mean);
126 }
127 
128 long RandPoissonQ::fire(double mean) {
129  return shoot(getLocalEngine(), mean);
130 }
131 
133  if ( defaultMean < LAST_MU + S ) {
135  } else {
136  return poissonDeviateQuick ( getLocalEngine(), a0, a1, a2, sigma );
137  }
138 } // fire()
139 
140 long RandPoissonQ::shoot(HepRandomEngine* anEngine, double mean) {
141 
142  // The following variables, static to this method, apply to the
143  // last time a large mean was supplied; they obviate certain calculations
144  // if consecutive calls use the same mean.
145 
146  static CLHEP_THREAD_LOCAL double lastLargeMean = -1.; // Mean from previous shoot
147  // requiring poissonDeviateQuick()
148  static CLHEP_THREAD_LOCAL double lastA0;
149  static CLHEP_THREAD_LOCAL double lastA1;
150  static CLHEP_THREAD_LOCAL double lastA2;
151  static CLHEP_THREAD_LOCAL double lastSigma;
152 
153  if ( mean < LAST_MU + S ) {
154  return poissonDeviateSmall ( anEngine, mean );
155  } else {
156  if ( mean != lastLargeMean ) {
157  // Compute the coefficients defining the quadratic transformation from a
158  // Gaussian to a Poisson for this mean. Also save these for next time.
159  double sig2 = mean * (.9998654 - .08346/mean);
160  lastSigma = std::sqrt(sig2);
161  double t = 1./sig2;
162  lastA2 = t*(1./6.) + t*t*(1./324.);
163  lastA1 = std::sqrt (1-2*lastA2*lastA2*sig2);
164  lastA0 = mean + .5 - sig2 * lastA2;
165  }
166  return poissonDeviateQuick ( anEngine, lastA0, lastA1, lastA2, lastSigma );
167  }
168 
169 } // shoot (anEngine, mean)
170 
171 void RandPoissonQ::shootArray(const int size, long* vect, double m) {
172  for( long* v = vect; v != vect + size; ++v )
173  *v = shoot(m);
174  // Note: We could test for m > 100, and if it is, precompute a0, a1, a2,
175  // and sigma and call the appropriate form of poissonDeviateQuick.
176  // But since those are cached anyway, not much time would be saved.
177 }
178 
179 void RandPoissonQ::fireArray(const int size, long* vect, double m) {
180  for( long* v = vect; v != vect + size; ++v )
181  *v = fire( m );
182 }
183 
184 void RandPoissonQ::fireArray(const int size, long* vect) {
185  for( long* v = vect; v != vect + size; ++v )
186  *v = fire( defaultMean );
187 }
188 
189 
190 // Quick Poisson deviate algorithm used by quick for large mu:
191 
193 
194  // Compute the coefficients defining the quadratic transformation from a
195  // Gaussian to a Poisson:
196 
197  double sig2 = mu * (.9998654 - .08346/mu);
198  double sig = std::sqrt(sig2);
199  // The multiplier corrects for fact that discretization of the form
200  // [gaussian+.5] increases the second moment by a small amount.
201 
202  double t = 1./sig2;
203 
204  double sa2 = t*(1./6.) + t*t*(1./324.);
205  double sa1 = std::sqrt (1-2*sa2*sa2*sig2);
206  double sa0 = mu + .5 - sig2 * sa2;
207 
208  // The formula will be sa0 + sa1*x + sa2*x*x where x has sigma of sq.
209  // The coeffeicients are chosen to match the first THREE moments of the
210  // true Poisson distribution.
211 
212  return poissonDeviateQuick ( e, sa0, sa1, sa2, sig );
213 }
214 
215 
217  double A0, double A1, double A2, double sig) {
218 //
219 // Quick Poisson deviate algorithm used by quick for large mu:
220 //
221 // The principle: For very large mu, a poisson distribution can be approximated
222 // by a gaussian: return the integer part of mu + .5 + g where g is a unit
223 // normal. However, this yelds a miserable approximation at values as
224 // "large" as 100. The primary problem is that the poisson distribution is
225 // supposed to have a skew of 1/mu**2, and the zero skew of the Guassian
226 // leads to errors of order as big as 1/mu**2.
227 //
228 // We substitute for the gaussian a quadratic function of that gaussian random.
229 // The expression looks very nearly like mu + .5 - 1/6 + g + g**2/(6*mu).
230 // The small positive quadratic term causes the resulting variate to have
231 // a positive skew; the -1/6 constant term is there to correct for this bias
232 // in the mean. By adjusting these two and the linear term, we can match the
233 // first three moments to high accuracy in 1/mu.
234 //
235 // The sigma used is not precisely sqrt(mu) since a rounded-off Gaussian
236 // has a second moment which is slightly larger than that of the Gaussian.
237 // To compensate, sig is multiplied by a factor which is slightly less than 1.
238 
239  // double g = RandGauss::shootQuick( e ); // TEMPORARY MOD:
240  double g = RandGaussQ::shoot( e ); // Unit normal
241  g *= sig;
242  double p = A2*g*g + A1*g + A0;
243  if ( p < 0 ) return 0; // Shouldn't ever possibly happen since
244  // mean should not be less than 100, but
245  // we check due to paranoia.
247 
248  return long(p);
249 
250 } // poissonDeviateQuick ()
251 
252 
253 
255  long N1;
256  long N2;
257  // The following are for later use to form a secondary random s:
258  double rRange; // This will hold the interval between cdf for the
259  // computed N1 and cdf for N1+1.
260  double rRemainder = 0; // This will hold the length into that interval.
261 
262  // Coming in, mean should not be more than LAST_MU + S. However, we will
263  // be paranoid and test for this:
264 
265  if ( mean > LAST_MU + S ) {
266  return RandPoisson::shoot(e, mean);
267  }
268 
269  if (mean <= 0) {
270  return 0; // Perhaps we ought to balk harder here!
271  }
272 
273  // >>> 1 <<<
274  // Generate the first random, which we always will need.
275 
276  double r = e->flat();
277 
278  // >>> 2 <<<
279  // For small mean, below the start of the tables,
280  // do the series for cdf directly.
281 
282  // In this case, since we know the series will terminate relatively quickly,
283  // almost alwaye use a precomputed 1/N array without fear of overrunning it.
284 
285  static const double oneOverN[50] =
286  { 0, 1., 1/2., 1/3., 1/4., 1/5., 1/6., 1/7., 1/8., 1/9.,
287  1/10., 1/11., 1/12., 1/13., 1/14., 1/15., 1/16., 1/17., 1/18., 1/19.,
288  1/20., 1/21., 1/22., 1/23., 1/24., 1/25., 1/26., 1/27., 1/28., 1/29.,
289  1/30., 1/31., 1/32., 1/33., 1/34., 1/35., 1/36., 1/37., 1/38., 1/39.,
290  1/40., 1/41., 1/42., 1/43., 1/44., 1/45., 1/46., 1/47., 1/48., 1/49. };
291 
292 
293  if ( mean < FIRST_MU ) {
294 
295  long N = 0;
296  double term = std::exp(-mean);
297  double cdf = term;
298 
299  if ( r < (1 - 1.0E-9) ) {
300  //
301  // **** This is a normal path: ****
302  //
303  // Except when r is very close to 1, it is certain that we will exceed r
304  // before the 30-th term in the series, so a simple while loop is OK.
305  const double* oneOverNptr = oneOverN;
306  while( cdf <= r ) {
307  N++ ;
308  oneOverNptr++;
309  term *= ( mean * (*oneOverNptr) );
310  cdf += term;
311  }
312  return N;
313  //
314  // **** ****
315  //
316  } else { // r is almost 1...
317  // For r very near to 1 we would have to check that we don't fall
318  // off the end of the table of 1/N. Since this is very rare, we just
319  // ignore the table and do the identical while loop, using explicit
320  // division.
321  double cdf0;
322  while ( cdf <= r ) {
323  N++ ;
324  term *= ( mean / N );
325  cdf0 = cdf;
326  cdf += term;
327  if (cdf == cdf0) break; // Can't happen, but just in case...
328  }
329  return N;
330  } // end of if ( r compared to (1 - 1.0E-9) )
331 
332  } // End of the code for mean < FIRST_MU
333 
334  // >>> 3 <<<
335  // Find the row of the tables corresponding to the highest tabulated mu
336  // which is no greater than our actual mean.
337 
338  int rowNumber = int((mean - FIRST_MU)/S);
339  const double * cdfs = &poissonTables [rowNumber*ENTRIES];
340  double mu = FIRST_MU + rowNumber*S;
341  double deltaMu = mean - mu;
342  int Nmin = int(mu - BELOW);
343  if (Nmin < 1) Nmin = 1;
344  int Nmax = Nmin + (ENTRIES - 1);
345 
346 
347  // >>> 4 <<<
348  // If r is less that the smallest entry in the row, then
349  // generate the deviate directly from the series.
350 
351  if ( r < cdfs[0] ) {
352 
353  // In this case, we are tempted to use the actual mean, and not
354  // generate a second deviate to account for the leftover part mean - mu.
355  // That would be an error, generating a distribution with enough excess
356  // at Nmin + (mean-mu)/2 to be detectable in 4,000,000 trials.
357 
358  // Since this case is very rare (never more than .2% of the r values)
359  // and can happen where N will be large (up to 65 for the mu=95 row)
360  // we use explicit division so as to avoid having to worry about running
361  // out of oneOverN table.
362 
363  long N = 0;
364  double term = std::exp(-mu);
365  double cdf = term;
366  double cdf0;
367 
368  while(cdf <= r) {
369  N++ ;
370  term *= ( mu / N );
371  cdf0 = cdf;
372  cdf += term;
373  if (cdf == cdf0) break; // Can't happen, but just in case...
374  }
375  N1 = N;
376  // std::cout << r << " " << N << " ";
377  // DBG_small = true;
378  rRange = 0; // In this case there is always a second r needed
379 
380  } // end of small-r case
381 
382 
383  // >>> 5 <<<
384  // Assuming r lies within the scope of the row for this mu, find the
385  // largest entry not greater than r. N1 is the N corresponding to the
386  // index a.
387 
388  else if ( r < cdfs[ENTRIES-1] ) { // r is also >= cdfs[0]
389 
390  //
391  // **** This is the normal code path ****
392  //
393 
394  int a = 0; // Highest value of index such that cdfs[a]
395  // is known NOT to be greater than r.
396  int b = ENTRIES - 1; // Lowest value of index such that cdfs[b] is
397  // known to exeed r.
398 
399  while (b != (a+1) ) {
400  int c = (a+b+1)>>1;
401  if (r > cdfs[c]) {
402  a = c;
403  } else {
404  b = c;
405  }
406  }
407 
408  N1 = Nmin + a;
409  rRange = cdfs[a+1] - cdfs[a];
410  rRemainder = r - cdfs[a];
411 
412  //
413  // **** ****
414  //
415 
416  } // end of medium-r (normal) case
417 
418 
419  // >>> 6 <<<
420  // If r exceeds the greatest entry in the table for this mu, then start
421  // from that cdf, and use the series to compute from there until r is
422  // exceeded.
423 
424  else { // if ( r >= cdfs[ENTRIES-1] ) {
425 
426  // Here, division must be done explicitly, and we must also protect against
427  // roundoff preventing termination.
428 
429  //
430  //+++ cdfs[ENTRIES-1] is exp(-mu) sum (mu**m/m! , m=0 to Nmax)
431  //+++ (where Nmax = mu - BELOW + ENTRIES - 1)
432  //+++ cdfs[ENTRIES-1]-cdfs[ENTRIES-2] is exp(-mu) mu**(Nmax)/(Nmax)!
433  //+++ If the sum up to k-1 <= r < sum up to k, then N = k-1
434  //+++ Consider k = Nmax in the above statement:
435  //+++ If cdfs[ENTRIES-2] <= r < cdfs[ENTRIES-1], N would be Nmax-1
436  //+++ But here r >= cdfs[ENTRIES-1] so N >= Nmax
437  //
438 
439  // Erroneous:
440  //+++ cdfs[ENTRIES-1] is exp(-mu) sum (mu**m/m! , m=0 to Nmax-1)
441  //+++ cdfs[ENTRIES-1]-cdfs[ENTRIES-2] is exp(-mu) mu**(Nmax-1)/(Nmax-1)!
442  //+++ If a sum up to k-1 <= r < sum up to k, then N = k-1
443  //+++ So if cdfs[ENTRIES-1] were > r, N would be Nmax-1 (or less)
444  //+++ But here r >= cdfs[ENTRIES-1] so N >= Nmax
445  //
446 
447  // std::cout << "r = " << r << " mu = " << mu << "\n";
448  long N = Nmax -1;
449  double cdf = cdfs[ENTRIES-1];
450  double term = cdf - cdfs[ENTRIES-2];
451  double cdf0;
452  while(cdf <= r) {
453  N++ ;
454  // std::cout << " N " << N << " term " <<
455  // term << " cdf " << cdf << "\n";
456  term *= ( mu / N );
457  cdf0 = cdf;
458  cdf += term;
459  if (cdf == cdf0) break; // If term gets so small cdf stops increasing,
460  // terminate using that value of N since we
461  // would never reach r.
462  }
463  N1 = N;
464  rRange = 0; // We can't validly omit the second true random
465 
466  // N = Nmax -1;
467  // cdf = cdfs[ENTRIES-1];
468  // term = cdf - cdfs[ENTRIES-2];
469  // for (int isxz=0; isxz < 100; isxz++) {
470  // N++ ;
471  // term *= ( mu / N );
472  // cdf0 = cdf;
473  // cdf += term;
474  // }
475  // std::cout.precision(20);
476  // std::cout << "Final sum is " << cdf << "\n";
477 
478  } // end of large-r case
479 
480 
481 
482  // >>> 7 <<<
483  // Form a second random, s, based on the position of r within the range
484  // of this table entry to the next entry.
485 
486  // However, if this range is very small, then we lose too many bits of
487  // randomness. In that situation, we generate a second random for s.
488 
489  double s;
490 
491  static const double MINRANGE = .01; // Sacrifice up to two digits of
492  // randomness when using r to produce
493  // a second random s. Leads to up to
494  // .09 extra randoms each time.
495 
496  if ( rRange > MINRANGE ) {
497  //
498  // **** This path taken 90% of the time ****
499  //
500  s = rRemainder / rRange;
501  } else {
502  s = e->flat(); // extra true random needed about one time in 10.
503  }
504 
505  // >>> 8 <<<
506  // Use the direct summation method to form a second poisson deviate N2
507  // from deltaMu and s.
508 
509  N2 = 0;
510  double term = std::exp(-deltaMu);
511  double cdf = term;
512 
513  if ( s < (1 - 1.0E-10) ) {
514  //
515  // This is the normal path:
516  //
517  const double* oneOverNptr = oneOverN;
518  while( cdf <= s ) {
519  N2++ ;
520  oneOverNptr++;
521  term *= ( deltaMu * (*oneOverNptr) );
522  cdf += term;
523  }
524  } else { // s is almost 1...
525  while( cdf <= s ) {
526  N2++ ;
527  term *= ( deltaMu / N2 );
528  cdf += term;
529  }
530  } // end of if ( s compared to (1 - 1.0E-10) )
531 
532  // >>> 9 <<<
533  // The result is the sum of those two deviates
534 
535  // if (DBG_small) {
536  // std::cout << N2 << " " << N1+N2 << "\n";
537  // DBG_small = false;
538  // }
539 
540  return N1 + N2;
541 
542 } // poissonDeviate()
543 
544 std::ostream & RandPoissonQ::put ( std::ostream & os ) const {
545  int pr=os.precision(20);
546  std::vector<unsigned long> t(2);
547  os << " " << name() << "\n";
548  os << "Uvec" << "\n";
549  t = DoubConv::dto2longs(a0);
550  os << a0 << " " << t[0] << " " << t[1] << "\n";
551  t = DoubConv::dto2longs(a1);
552  os << a1 << " " << t[0] << " " << t[1] << "\n";
553  t = DoubConv::dto2longs(a2);
554  os << a2 << " " << t[0] << " " << t[1] << "\n";
556  os << sigma << " " << t[0] << " " << t[1] << "\n";
557  RandPoisson::put(os);
558  os.precision(pr);
559  return os;
560 #ifdef REMOVED
561  int pr=os.precision(20);
562  os << " " << name() << "\n";
563  os << a0 << " " << a1 << " " << a2 << "\n";
564  os << sigma << "\n";
565  RandPoisson::put(os);
566  os.precision(pr);
567  return os;
568 #endif
569 }
570 
571 std::istream & RandPoissonQ::get ( std::istream & is ) {
572  std::string inName;
573  is >> inName;
574  if (inName != name()) {
575  is.clear(std::ios::badbit | is.rdstate());
576  std::cerr << "Mismatch when expecting to read state of a "
577  << name() << " distribution\n"
578  << "Name found was " << inName
579  << "\nistream is left in the badbit state\n";
580  return is;
581  }
582  if (possibleKeywordInput(is, "Uvec", a0)) {
583  std::vector<unsigned long> t(2);
584  is >> a0 >> t[0] >> t[1]; a0 = DoubConv::longs2double(t);
585  is >> a1 >> t[0] >> t[1]; a1 = DoubConv::longs2double(t);
586  is >> a2 >> t[0] >> t[1]; a2 = DoubConv::longs2double(t);
587  is >> sigma >> t[0] >> t[1]; sigma = DoubConv::longs2double(t);
588  RandPoisson::get(is);
589  return is;
590  }
591  // is >> a0 encompassed by possibleKeywordInput
592  is >> a1 >> a2 >> sigma;
593  RandPoisson::get(is);
594  return is;
595 }
596 
597 } // namespace CLHEP
598