-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_64_bit.c
More file actions
127 lines (112 loc) · 4.73 KB
/
test_64_bit.c
File metadata and controls
127 lines (112 loc) · 4.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#define WORD_SIZE_BITS 64
#include "Lemire_RNG.h"
#include "dev_urandom_utils.h" // for the random seed
#include <assert.h>
#include <math.h> // sqrt()
#include <stdio.h>
#include <stdlib.h> // rand()
#include <sys/param.h> // MAX()
#include <time.h>
// This value should be "large", at least one million, but making it much larger
// slows down the test. This is a good sweetspot, I think.
#define NUM_OF_SAMPLES_PER_TEST ((word_t)(1 << 24))
#define PRINT_EVERY_N_WORDS (1000000)
#define NUM_BITS_IN_CHAR 8
uint8_t get_random_char()
{
return rand() % (1 << NUM_BITS_IN_CHAR);
}
// TODO: This should be a truly random word, from a deterministic RNG with a
// random seed, because the Lemire RNG calls this.
// I wanted to find an AES-CTR implementation or similar,
// But I don't know where to find one with an Apache license.
word_t get_random_word(__attribute__((unused)) void *dummy)
{
word_t res = 0;
for (int i = 0; i < (WORD_SIZE_BITS / NUM_BITS_IN_CHAR); i++) {
res <<= NUM_BITS_IN_CHAR;
res += get_random_char();
}
return res;
}
void test_num_samples_below_cutoff(word_t random_upper_bound,
word_t cutoff_value, word_t random_seed)
{
assert(cutoff_value < random_upper_bound);
uint64_t num_samples_below_cutoff = 0;
printf("Testing using %lu samples\n", NUM_OF_SAMPLES_PER_TEST);
// We generate n samples, then check if they are below the cutoff point.
// Each check is a random Bernoulli variable, with the following mean:
double bernoulli_variable_mean = (double)cutoff_value / random_upper_bound;
for (uint64_t i = 0; i < NUM_OF_SAMPLES_PER_TEST; i++) {
word_t sample =
Lemire_RNG__get_random_int(random_upper_bound, &get_random_word, NULL);
if (sample < cutoff_value) {
num_samples_below_cutoff++;
}
if (i % PRINT_EVERY_N_WORDS == 0) {
printf("%d%% done.\n", (int)((float)i / NUM_OF_SAMPLES_PER_TEST * 100));
}
}
// Central Limit Theorem:
// The following value is normally distributed with mean 0 and the same
// variance as the Bernoulli variables.
double sqrt_n = sqrt(NUM_OF_SAMPLES_PER_TEST);
double value_from_normal_distribution =
(num_samples_below_cutoff / sqrt_n) - (bernoulli_variable_mean * sqrt_n);
double bernoulli_variable_variance =
bernoulli_variable_mean * (1 - bernoulli_variable_mean);
double bernoulli_variable_stdev = sqrt(bernoulli_variable_variance);
int num_of_sigmas_to_fail_test = 8;
// If we have a value from a normal distribution that is 8 sigmas or more
// far from the mean, the probability this happened is one in 8*10^14.
// Even if one thousand different CI systems run this test every day for ten
// years, the probability it will fail is one in 8*10^14 / 10^3 / 3650 = 10^8.
// So this test should NEVER fail. Even though the test is probabilistic, if
// it failed there is almost certainly an issue with the RNG.
double deviation_from_mean_in_sigmas =
fabs(value_from_normal_distribution) / bernoulli_variable_stdev;
printf(
"FYI the number from the normal distribution is %f sigmas away from "
"the mean (the smaller, the more reassuring).\n",
deviation_from_mean_in_sigmas);
if (deviation_from_mean_in_sigmas > num_of_sigmas_to_fail_test) {
printf(
"Error in statistical test - "
"there is almost certainly an issue with the RNG.\n"
"This test was run with random seed %lu\n."
"You can reproduce this run by passing the seed as the first "
"argument.\n",
random_seed);
exit(1);
} else {
printf("Looks OK, test considered successful.\n");
}
}
int main(int argc, char *argv[])
{
word_t random_seed;
if (argc > 2) {
random_seed = atoi(argv[1]);
} else {
random_seed = dev_urandom_utils__get_random_word();
}
srand(random_seed);
printf(
"Welcome to the Lemire RNG 64-bit test tool! "
"We hope your stay with us will be enjoyable.\n"
"The tool generates %lu words from the RNG, "
"selects a random cutoff value, and tests that the expected proportion "
"of words from the RNG are below the cutoff value.\n"
"It does so twice, once with WORD_MAX_VALUE (%lu) as the random upper "
"bound, and once with a randomly-selected upper bound.\n",
NUM_OF_SAMPLES_PER_TEST, WORD_MAX_VALUE);
printf("FYI, your random seed for today is %lu.\n", random_seed);
test_num_samples_below_cutoff(WORD_MAX_VALUE, get_random_word(NULL),
random_seed);
word_t random_word1 = get_random_word(NULL);
word_t random_word2 = get_random_word(NULL);
word_t random_upper_bound = MAX(random_word1, random_word2);
word_t cutoff_value = MIN(random_word1, random_word2);
test_num_samples_below_cutoff(random_upper_bound, cutoff_value, random_seed);
}