/* probability spread calculations */ /* * Copyright (C) 1999 Roger Willcocks * rogerw@centipede.co.uk * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ /* * given a set of numbers (look at them as the number of coins in each of * a set of buckets) calculate the probability of that distribution occurring. * The numbers involved get very large very rapidly. * * Compare the exact result (for coins < 170) with an approximation. */ #include #include #include #define PI 3.14159265359 /* * Stirling's approximation - correct to 7 figures for n > 15 * n! = sqrt(2*pi*n) * n^n * e^(-n + n/12 + ~1/(n^2)) */ double logFactorial[1000]; double cc = log(2 * PI) / 2.0; double trueFactorial[1000]; /* exact to 171! */ void generateFactorial() { double ff = 1.0; trueFactorial[0] = 1.0; logFactorial[0] = 0.0; for (int i = 1; i < 1000; i++) { double ii = (double)i; double jj = 1.0 / (12.0 * ii); logFactorial[i] = cc + log(ii) * (ii + 0.5) - ii + jj; trueFactorial[i] = (ff *= ii); } } /* __int64 factorial(int i, __int64 j, double d) { double ii = i; double jj = 1.0 / (12.0 * ii); // double xx = log(2 * PI * ii) / 2.0 + log(ii) * ii - ii + jj; double xx = printf("%d: %I64d %10.10g %10.10g %10.10g\n", i, j, d, exp(xx), exp(xx) / d); if (i >= 200) return 0.0; dd[i] = xx; factorial(i+1, j*(i+1), d * (i+1)); return d; } */ extern "C" sortFunc(const void *a, const void *b) { if (*(int *)a > *(int *)b) return -1; if (*(int *)a < *(int *)b) return 1; return 0; } int bucket[100]; int count[100]; int value[100]; int main(int argc, char *argv[]) { setbuf(stdout, NULL); generateFactorial(); int total = 0; argc--; for (int i = 0; i < argc; i++) { bucket[i] = atoi(argv[i+1]); total += bucket[i]; } qsort(&bucket[0], argc, sizeof(bucket[0]), &sortFunc); int cp = 0; for (i = 0; i < argc; i++) { if (i && bucket[i-1] != bucket[i]) cp++; count[cp]++; value[cp] = bucket[i]; } cp++; for (i = 0; i < cp; i++) printf("%d * %d\n", count[i], value[i]); printf("\n"); printf("%d coins in %d buckets\n", total, argc); double p = trueFactorial[total]; for (i = 0; i < cp; i++) p /= (trueFactorial[count[i]]); for (i = 0; i < argc; i++) p /= (trueFactorial[bucket[i]]); p *= trueFactorial[argc]; double q = exp(log((double)argc) * (double)total); if (p > q) printf("WARNING: more matches than arrangements\n"); printf("exact probability = %g matches / %g arrangements = %g; significance = %g\n", p, q, p / q, q / p); p = logFactorial[total] + logFactorial[argc]; for (i = 0; i < cp; i++) p -= (logFactorial[count[i]]); for (i = 0; i < argc; i++) p -= (logFactorial[bucket[i]]); printf("approximate significance = %g\n", exp(log((double)argc) * (double)total - p)); return 0; }