/home/liu/actions-runner/_work/ccv/ccv/lib/3rdparty/sfmt/SFMT.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * @file SFMT.c |
3 | | * @brief SIMD oriented Fast Mersenne Twister(SFMT) |
4 | | * |
5 | | * @author Mutsuo Saito (Hiroshima University) |
6 | | * @author Makoto Matsumoto (Hiroshima University) |
7 | | * |
8 | | * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima |
9 | | * University. |
10 | | * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima |
11 | | * University and The University of Tokyo. |
12 | | * All rights reserved. |
13 | | * |
14 | | * The 3-clause BSD License is applied to this software, see |
15 | | * LICENSE.txt |
16 | | */ |
17 | | |
18 | | #if defined(__cplusplus) |
19 | | extern "C" { |
20 | | #endif |
21 | | |
22 | | #include <string.h> |
23 | | #include <assert.h> |
24 | | #include "SFMT.h" |
25 | | #include "SFMT-params.h" |
26 | | #include "SFMT-common.h" |
27 | | |
28 | | #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) |
29 | | #define BIG_ENDIAN64 1 |
30 | | #endif |
31 | | #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64) |
32 | | #define BIG_ENDIAN64 1 |
33 | | #endif |
34 | | #if defined(ONLY64) && !defined(BIG_ENDIAN64) |
35 | | #if defined(__GNUC__) |
36 | | #error "-DONLY64 must be specified with -DBIG_ENDIAN64" |
37 | | #endif |
38 | | #undef ONLY64 |
39 | | #endif |
40 | | |
41 | | /** |
42 | | * parameters used by sse2. |
43 | | */ |
44 | | #ifdef HAVE_SSE2 |
45 | | static const w128_t sse2_param_mask = {{SFMT_MSK1, SFMT_MSK2, |
46 | | SFMT_MSK3, SFMT_MSK4}}; |
47 | | #endif |
48 | | /*---------------- |
49 | | STATIC FUNCTIONS |
50 | | ----------------*/ |
51 | | inline static int idxof(int i); |
52 | | inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size); |
53 | | inline static uint32_t func1(uint32_t x); |
54 | | inline static uint32_t func2(uint32_t x); |
55 | | static void period_certification(sfmt_t * sfmt); |
56 | | #if defined(BIG_ENDIAN64) && !defined(ONLY64) |
57 | | inline static void swap(w128_t *array, int size); |
58 | | #endif |
59 | | |
60 | | #if defined(HAVE_ALTIVEC) |
61 | | #include "SFMT-alti.h" |
62 | | #elif defined(HAVE_SSE2) |
63 | | #include "SFMT-sse2.h" |
64 | | #endif |
65 | | |
66 | | /** |
67 | | * This function simulate a 64-bit index of LITTLE ENDIAN |
68 | | * in BIG ENDIAN machine. |
69 | | */ |
70 | | #ifdef ONLY64 |
71 | | inline static int idxof(int i) { |
72 | | return i ^ 1; |
73 | | } |
74 | | #else |
75 | 281M | inline static int idxof(int i) { |
76 | 281M | return i; |
77 | 281M | } |
78 | | #endif |
79 | | |
80 | | #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) |
81 | | /** |
82 | | * This function fills the user-specified array with pseudorandom |
83 | | * integers. |
84 | | * |
85 | | * @param sfmt SFMT internal state |
86 | | * @param array an 128-bit array to be filled by pseudorandom numbers. |
87 | | * @param size number of 128-bit pseudorandom numbers to be generated. |
88 | | */ |
89 | | inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size) { |
90 | | int i, j; |
91 | | w128_t *r1, *r2; |
92 | | |
93 | | r1 = &sfmt->state[SFMT_N - 2]; |
94 | | r2 = &sfmt->state[SFMT_N - 1]; |
95 | | for (i = 0; i < SFMT_N - SFMT_POS1; i++) { |
96 | | do_recursion(&array[i], &sfmt->state[i], &sfmt->state[i + SFMT_POS1], r1, r2); |
97 | | r1 = r2; |
98 | | r2 = &array[i]; |
99 | | } |
100 | | for (; i < SFMT_N; i++) { |
101 | | do_recursion(&array[i], &sfmt->state[i], |
102 | | &array[i + SFMT_POS1 - SFMT_N], r1, r2); |
103 | | r1 = r2; |
104 | | r2 = &array[i]; |
105 | | } |
106 | | for (; i < size - SFMT_N; i++) { |
107 | | do_recursion(&array[i], &array[i - SFMT_N], |
108 | | &array[i + SFMT_POS1 - SFMT_N], r1, r2); |
109 | | r1 = r2; |
110 | | r2 = &array[i]; |
111 | | } |
112 | | for (j = 0; j < 2 * SFMT_N - size; j++) { |
113 | | sfmt->state[j] = array[j + size - SFMT_N]; |
114 | | } |
115 | | for (; i < size; i++, j++) { |
116 | | do_recursion(&array[i], &array[i - SFMT_N], |
117 | | &array[i + SFMT_POS1 - SFMT_N], r1, r2); |
118 | | r1 = r2; |
119 | | r2 = &array[i]; |
120 | | sfmt->state[j] = array[i]; |
121 | | } |
122 | | } |
123 | | #endif |
124 | | |
125 | | #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC) |
126 | | inline static void swap(w128_t *array, int size) { |
127 | | int i; |
128 | | uint32_t x, y; |
129 | | |
130 | | for (i = 0; i < size; i++) { |
131 | | x = array[i].u[0]; |
132 | | y = array[i].u[2]; |
133 | | array[i].u[0] = array[i].u[1]; |
134 | | array[i].u[2] = array[i].u[3]; |
135 | | array[i].u[1] = x; |
136 | | array[i].u[3] = y; |
137 | | } |
138 | | } |
139 | | #endif |
140 | | /** |
141 | | * This function represents a function used in the initialization |
142 | | * by init_by_array |
143 | | * @param x 32-bit integer |
144 | | * @return 32-bit integer |
145 | | */ |
146 | 0 | static uint32_t func1(uint32_t x) { |
147 | 0 | return (x ^ (x >> 27)) * (uint32_t)1664525UL; |
148 | 0 | } |
149 | | |
150 | | /** |
151 | | * This function represents a function used in the initialization |
152 | | * by init_by_array |
153 | | * @param x 32-bit integer |
154 | | * @return 32-bit integer |
155 | | */ |
156 | 0 | static uint32_t func2(uint32_t x) { |
157 | 0 | return (x ^ (x >> 27)) * (uint32_t)1566083941UL; |
158 | 0 | } |
159 | | |
160 | | /** |
161 | | * This function certificate the period of 2^{MEXP} |
162 | | * @param sfmt SFMT internal state |
163 | | */ |
164 | 150k | static void period_certification(sfmt_t * sfmt) { |
165 | 150k | int inner = 0; |
166 | 150k | int i, j; |
167 | 150k | uint32_t work; |
168 | 150k | uint32_t *psfmt32 = &sfmt->state[0].u[0]; |
169 | 150k | const uint32_t parity[4] = {SFMT_PARITY1, SFMT_PARITY2, |
170 | 150k | SFMT_PARITY3, SFMT_PARITY4}; |
171 | | |
172 | 750k | for (i = 0; i < 4; i++600k ) |
173 | 600k | inner ^= psfmt32[idxof(i)] & parity[i]; |
174 | 900k | for (i = 16; i > 0; i >>= 1750k ) |
175 | 750k | inner ^= inner >> i; |
176 | 150k | inner &= 1; |
177 | | /* check OK */ |
178 | 150k | if (inner == 1) { |
179 | 74.9k | return; |
180 | 74.9k | } |
181 | | /* check NG, and modification */ |
182 | 75.1k | for (i = 0; i < 4; i++0 ) { |
183 | 75.1k | work = 1; |
184 | 75.1k | for (j = 0; j < 32; j++0 ) { |
185 | 75.1k | if ((work & parity[i]) != 0) { |
186 | 75.1k | psfmt32[idxof(i)] ^= work; |
187 | 75.1k | return; |
188 | 75.1k | } |
189 | 0 | work = work << 1; |
190 | 0 | } |
191 | 75.1k | } |
192 | 75.1k | } |
193 | | |
194 | | /*---------------- |
195 | | PUBLIC FUNCTIONS |
196 | | ----------------*/ |
197 | 0 | #define UNUSED_VARIABLE(x) (void)(x) |
198 | | /** |
199 | | * This function returns the identification string. |
200 | | * The string shows the word size, the Mersenne exponent, |
201 | | * and all parameters of this generator. |
202 | | * @param sfmt SFMT internal state |
203 | | */ |
204 | 0 | const char *sfmt_get_idstring(sfmt_t * sfmt) { |
205 | 0 | UNUSED_VARIABLE(sfmt); |
206 | 0 | return SFMT_IDSTR; |
207 | 0 | } |
208 | | |
209 | | /** |
210 | | * This function returns the minimum size of array used for \b |
211 | | * fill_array32() function. |
212 | | * @param sfmt SFMT internal state |
213 | | * @return minimum size of array used for fill_array32() function. |
214 | | */ |
215 | 0 | int sfmt_get_min_array_size32(sfmt_t * sfmt) { |
216 | 0 | UNUSED_VARIABLE(sfmt); |
217 | 0 | return SFMT_N32; |
218 | 0 | } |
219 | | |
220 | | /** |
221 | | * This function returns the minimum size of array used for \b |
222 | | * fill_array64() function. |
223 | | * @param sfmt SFMT internal state |
224 | | * @return minimum size of array used for fill_array64() function. |
225 | | */ |
226 | 0 | int sfmt_get_min_array_size64(sfmt_t * sfmt) { |
227 | 0 | UNUSED_VARIABLE(sfmt); |
228 | 0 | return SFMT_N64; |
229 | 0 | } |
230 | | |
231 | | #if !defined(HAVE_SSE2) && !defined(HAVE_ALTIVEC) |
232 | | /** |
233 | | * This function fills the internal state array with pseudorandom |
234 | | * integers. |
235 | | * @param sfmt SFMT internal state |
236 | | */ |
237 | | void sfmt_gen_rand_all(sfmt_t * sfmt) { |
238 | | int i; |
239 | | w128_t *r1, *r2; |
240 | | |
241 | | r1 = &sfmt->state[SFMT_N - 2]; |
242 | | r2 = &sfmt->state[SFMT_N - 1]; |
243 | | for (i = 0; i < SFMT_N - SFMT_POS1; i++) { |
244 | | do_recursion(&sfmt->state[i], &sfmt->state[i], |
245 | | &sfmt->state[i + SFMT_POS1], r1, r2); |
246 | | r1 = r2; |
247 | | r2 = &sfmt->state[i]; |
248 | | } |
249 | | for (; i < SFMT_N; i++) { |
250 | | do_recursion(&sfmt->state[i], &sfmt->state[i], |
251 | | &sfmt->state[i + SFMT_POS1 - SFMT_N], r1, r2); |
252 | | r1 = r2; |
253 | | r2 = &sfmt->state[i]; |
254 | | } |
255 | | } |
256 | | #endif |
257 | | |
258 | | #ifndef ONLY64 |
259 | | /** |
260 | | * This function generates pseudorandom 32-bit integers in the |
261 | | * specified array[] by one call. The number of pseudorandom integers |
262 | | * is specified by the argument size, which must be at least 624 and a |
263 | | * multiple of four. The generation by this function is much faster |
264 | | * than the following gen_rand function. |
265 | | * |
266 | | * For initialization, init_gen_rand or init_by_array must be called |
267 | | * before the first call of this function. This function can not be |
268 | | * used after calling gen_rand function, without initialization. |
269 | | * |
270 | | * @param sfmt SFMT internal state |
271 | | * @param array an array where pseudorandom 32-bit integers are filled |
272 | | * by this function. The pointer to the array must be \b "aligned" |
273 | | * (namely, must be a multiple of 16) in the SIMD version, since it |
274 | | * refers to the address of a 128-bit integer. In the standard C |
275 | | * version, the pointer is arbitrary. |
276 | | * |
277 | | * @param size the number of 32-bit pseudorandom integers to be |
278 | | * generated. size must be a multiple of 4, and greater than or equal |
279 | | * to (MEXP / 128 + 1) * 4. |
280 | | * |
281 | | * @note \b memalign or \b posix_memalign is available to get aligned |
282 | | * memory. Mac OSX doesn't have these functions, but \b malloc of OSX |
283 | | * returns the pointer to the aligned memory block. |
284 | | */ |
285 | 0 | void sfmt_fill_array32(sfmt_t * sfmt, uint32_t *array, int size) { |
286 | 0 | assert(sfmt->idx == SFMT_N32); |
287 | 0 | assert(size % 4 == 0); |
288 | 0 | assert(size >= SFMT_N32); |
289 | | |
290 | 0 | gen_rand_array(sfmt, (w128_t *)array, size / 4); |
291 | 0 | sfmt->idx = SFMT_N32; |
292 | 0 | } |
293 | | #endif |
294 | | |
295 | | /** |
296 | | * This function generates pseudorandom 64-bit integers in the |
297 | | * specified array[] by one call. The number of pseudorandom integers |
298 | | * is specified by the argument size, which must be at least 312 and a |
299 | | * multiple of two. The generation by this function is much faster |
300 | | * than the following gen_rand function. |
301 | | * |
302 | | * @param sfmt SFMT internal state |
303 | | * For initialization, init_gen_rand or init_by_array must be called |
304 | | * before the first call of this function. This function can not be |
305 | | * used after calling gen_rand function, without initialization. |
306 | | * |
307 | | * @param array an array where pseudorandom 64-bit integers are filled |
308 | | * by this function. The pointer to the array must be "aligned" |
309 | | * (namely, must be a multiple of 16) in the SIMD version, since it |
310 | | * refers to the address of a 128-bit integer. In the standard C |
311 | | * version, the pointer is arbitrary. |
312 | | * |
313 | | * @param size the number of 64-bit pseudorandom integers to be |
314 | | * generated. size must be a multiple of 2, and greater than or equal |
315 | | * to (MEXP / 128 + 1) * 2 |
316 | | * |
317 | | * @note \b memalign or \b posix_memalign is available to get aligned |
318 | | * memory. Mac OSX doesn't have these functions, but \b malloc of OSX |
319 | | * returns the pointer to the aligned memory block. |
320 | | */ |
321 | 0 | void sfmt_fill_array64(sfmt_t * sfmt, uint64_t *array, int size) { |
322 | 0 | assert(sfmt->idx == SFMT_N32); |
323 | 0 | assert(size % 2 == 0); |
324 | 0 | assert(size >= SFMT_N64); |
325 | | |
326 | 0 | gen_rand_array(sfmt, (w128_t *)array, size / 2); |
327 | 0 | sfmt->idx = SFMT_N32; |
328 | |
|
329 | | #if defined(BIG_ENDIAN64) && !defined(ONLY64) |
330 | | swap((w128_t *)array, size /2); |
331 | | #endif |
332 | 0 | } |
333 | | /** |
334 | | * This function randomly shuffle the given array |
335 | | * |
336 | | * @param sfmt SFMT internal state |
337 | | * @param array the array that is going to be shuffled |
338 | | * @param size the size of the array |
339 | | * @param rsize the size of each record in the array |
340 | | */ |
341 | | #if !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__NetBSD__) |
342 | | #include <alloca.h> |
343 | | #endif |
344 | | |
345 | 150k | void sfmt_genrand_shuffle(sfmt_t * sfmt, void *array, int size, int rsize) { |
346 | 150k | int i, j; |
347 | 150k | char *t = (char *)alloca(rsize); |
348 | 150k | char *ptr = (char *)array; |
349 | 150k | char *ptri = ptr + (size - 1) * rsize; |
350 | 750k | for (i = size - 1; i >= 0; i--600k ) { |
351 | 600k | j = sfmt_genrand_uint32(sfmt) % (i + 1); |
352 | 600k | if (i != j) |
353 | 288k | { |
354 | 288k | char *ptrj = ptr + j * rsize; |
355 | 288k | memcpy(t, ptri, rsize); |
356 | 288k | memcpy(ptri, ptrj, rsize); |
357 | 288k | memcpy(ptrj, t, rsize); |
358 | 288k | } |
359 | 600k | ptri -= rsize; |
360 | 600k | } |
361 | 150k | } |
362 | | |
363 | | /** |
364 | | * This function initializes the internal state array with a 32-bit |
365 | | * integer seed. |
366 | | * |
367 | | * @param sfmt SFMT internal state |
368 | | * @param seed a 32-bit integer used as the seed. |
369 | | */ |
370 | 150k | void sfmt_init_gen_rand(sfmt_t * sfmt, uint32_t seed) { |
371 | 150k | int i; |
372 | | |
373 | 150k | uint32_t *psfmt32 = &sfmt->state[0].u[0]; |
374 | | |
375 | 150k | psfmt32[idxof(0)] = seed; |
376 | 93.6M | for (i = 1; i < SFMT_N32; i++93.4M ) { |
377 | 93.4M | psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] |
378 | 93.4M | ^ (psfmt32[idxof(i - 1)] >> 30)) |
379 | 93.4M | + i; |
380 | 93.4M | } |
381 | 150k | sfmt->idx = SFMT_N32; |
382 | 150k | period_certification(sfmt); |
383 | 150k | } |
384 | | |
385 | | /** |
386 | | * This function initializes the internal state array, |
387 | | * with an array of 32-bit integers used as the seeds |
388 | | * @param sfmt SFMT internal state |
389 | | * @param init_key the array of 32-bit integers, used as a seed. |
390 | | * @param key_length the length of init_key. |
391 | | */ |
392 | 0 | void sfmt_init_by_array(sfmt_t * sfmt, uint32_t *init_key, int key_length) { |
393 | 0 | int i, j, count; |
394 | 0 | uint32_t r; |
395 | 0 | int lag; |
396 | 0 | int mid; |
397 | 0 | int size = SFMT_N * 4; |
398 | 0 | uint32_t *psfmt32 = &sfmt->state[0].u[0]; |
399 | |
|
400 | 0 | if (size >= 623) { |
401 | 0 | lag = 11; |
402 | 0 | } else if (size >= 68) { |
403 | 0 | lag = 7; |
404 | 0 | } else if (size >= 39) { |
405 | 0 | lag = 5; |
406 | 0 | } else { |
407 | 0 | lag = 3; |
408 | 0 | } |
409 | 0 | mid = (size - lag) / 2; |
410 | |
|
411 | 0 | memset(sfmt, 0x8b, sizeof(sfmt_t)); |
412 | 0 | if (key_length + 1 > SFMT_N32) { |
413 | 0 | count = key_length + 1; |
414 | 0 | } else { |
415 | 0 | count = SFMT_N32; |
416 | 0 | } |
417 | 0 | r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] |
418 | 0 | ^ psfmt32[idxof(SFMT_N32 - 1)]); |
419 | 0 | psfmt32[idxof(mid)] += r; |
420 | 0 | r += key_length; |
421 | 0 | psfmt32[idxof(mid + lag)] += r; |
422 | 0 | psfmt32[idxof(0)] = r; |
423 | |
|
424 | 0 | count--; |
425 | 0 | for (i = 1, j = 0; (j < count) && (j < key_length); j++) { |
426 | 0 | r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)] |
427 | 0 | ^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]); |
428 | 0 | psfmt32[idxof((i + mid) % SFMT_N32)] += r; |
429 | 0 | r += init_key[j] + i; |
430 | 0 | psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r; |
431 | 0 | psfmt32[idxof(i)] = r; |
432 | 0 | i = (i + 1) % SFMT_N32; |
433 | 0 | } |
434 | 0 | for (; j < count; j++) { |
435 | 0 | r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)] |
436 | 0 | ^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]); |
437 | 0 | psfmt32[idxof((i + mid) % SFMT_N32)] += r; |
438 | 0 | r += i; |
439 | 0 | psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r; |
440 | 0 | psfmt32[idxof(i)] = r; |
441 | 0 | i = (i + 1) % SFMT_N32; |
442 | 0 | } |
443 | 0 | for (j = 0; j < SFMT_N32; j++) { |
444 | 0 | r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % SFMT_N32)] |
445 | 0 | + psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]); |
446 | 0 | psfmt32[idxof((i + mid) % SFMT_N32)] ^= r; |
447 | 0 | r -= i; |
448 | 0 | psfmt32[idxof((i + mid + lag) % SFMT_N32)] ^= r; |
449 | 0 | psfmt32[idxof(i)] = r; |
450 | 0 | i = (i + 1) % SFMT_N32; |
451 | 0 | } |
452 | |
|
453 | 0 | sfmt->idx = SFMT_N32; |
454 | 0 | period_certification(sfmt); |
455 | 0 | } |
456 | | #if defined(__cplusplus) |
457 | | } |
458 | | #endif |