/home/liu/actions-runner/_work/ccv/ccv/lib/3rdparty/dsfmt/dSFMT-common.h
Line | Count | Source |
1 | | #pragma once |
2 | | /** |
3 | | * @file dSFMT-common.h |
4 | | * |
5 | | * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom |
6 | | * number generator with jump function. This file includes common functions |
7 | | * used in random number generation and jump. |
8 | | * |
9 | | * @author Mutsuo Saito (Hiroshima University) |
10 | | * @author Makoto Matsumoto (The University of Tokyo) |
11 | | * |
12 | | * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima |
13 | | * University. |
14 | | * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima |
15 | | * University and The University of Tokyo. |
16 | | * All rights reserved. |
17 | | * |
18 | | * The 3-clause BSD License is applied to this software, see |
19 | | * LICENSE.txt |
20 | | */ |
21 | | #ifndef DSFMT_COMMON_H |
22 | | #define DSFMT_COMMON_H |
23 | | |
24 | | #include "dSFMT.h" |
25 | | |
26 | | #if defined(HAVE_SSE2) |
27 | | # include <emmintrin.h> |
28 | | union X128I_T { |
29 | | uint64_t u[2]; |
30 | | __m128i i128; |
31 | | }; |
32 | | union X128D_T { |
33 | | double d[2]; |
34 | | __m128d d128; |
35 | | }; |
36 | | /** mask data for sse2 */ |
37 | | static const union X128I_T sse2_param_mask = {{DSFMT_MSK1, DSFMT_MSK2}}; |
38 | | #endif |
39 | | |
40 | | #if defined(HAVE_ALTIVEC) |
41 | | inline static void do_recursion(dw128_t *r, dw128_t *a, dw128_t * b, |
42 | | dw128_t *lung) { |
43 | | const vector unsigned char sl1 = ALTI_SL1; |
44 | | const vector unsigned char sl1_perm = ALTI_SL1_PERM; |
45 | | const vector unsigned int sl1_msk = ALTI_SL1_MSK; |
46 | | const vector unsigned char sr1 = ALTI_SR; |
47 | | const vector unsigned char sr1_perm = ALTI_SR_PERM; |
48 | | const vector unsigned int sr1_msk = ALTI_SR_MSK; |
49 | | const vector unsigned char perm = ALTI_PERM; |
50 | | const vector unsigned int msk1 = ALTI_MSK; |
51 | | vector unsigned int w, x, y, z; |
52 | | |
53 | | z = a->s; |
54 | | w = lung->s; |
55 | | x = vec_perm(w, (vector unsigned int)perm, perm); |
56 | | y = vec_perm(z, sl1_perm, sl1_perm); |
57 | | y = vec_sll(y, sl1); |
58 | | y = vec_and(y, sl1_msk); |
59 | | w = vec_xor(x, b->s); |
60 | | w = vec_xor(w, y); |
61 | | x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); |
62 | | x = vec_srl(x, sr1); |
63 | | x = vec_and(x, sr1_msk); |
64 | | y = vec_and(w, msk1); |
65 | | z = vec_xor(z, y); |
66 | | r->s = vec_xor(z, x); |
67 | | lung->s = w; |
68 | | } |
69 | | #elif defined(HAVE_SSE2) |
70 | | /** |
71 | | * This function represents the recursion formula. |
72 | | * @param r output 128-bit |
73 | | * @param a a 128-bit part of the internal state array |
74 | | * @param b a 128-bit part of the internal state array |
75 | | * @param d a 128-bit part of the internal state array (I/O) |
76 | | */ |
77 | 300M | inline static void do_recursion(dw128_t *r, dw128_t *a, dw128_t *b, dw128_t *u) { |
78 | 300M | __m128i v, w, x, y, z; |
79 | | |
80 | 300M | x = a->si; |
81 | 300M | z = _mm_slli_epi64(x, DSFMT_SL1); |
82 | 300M | y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); |
83 | 300M | z = _mm_xor_si128(z, b->si); |
84 | 300M | y = _mm_xor_si128(y, z); |
85 | | |
86 | 300M | v = _mm_srli_epi64(y, DSFMT_SR); |
87 | 300M | w = _mm_and_si128(y, sse2_param_mask.i128); |
88 | 300M | v = _mm_xor_si128(v, x); |
89 | 300M | v = _mm_xor_si128(v, w); |
90 | 300M | r->si = v; |
91 | 300M | u->si = y; |
92 | 300M | } |
93 | | #else |
94 | | /** |
95 | | * This function represents the recursion formula. |
96 | | * @param r output 128-bit |
97 | | * @param a a 128-bit part of the internal state array |
98 | | * @param b a 128-bit part of the internal state array |
99 | | * @param lung a 128-bit part of the internal state array (I/O) |
100 | | */ |
101 | | inline static void do_recursion(dw128_t *r, dw128_t *a, dw128_t * b, |
102 | | dw128_t *lung) { |
103 | | uint64_t t0, t1, L0, L1; |
104 | | |
105 | | t0 = a->u[0]; |
106 | | t1 = a->u[1]; |
107 | | L0 = lung->u[0]; |
108 | | L1 = lung->u[1]; |
109 | | lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; |
110 | | lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; |
111 | | r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; |
112 | | r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; |
113 | | } |
114 | | #endif |
115 | | #endif |