/home/liu/actions-runner/_work/ccv/ccv/lib/3rdparty/sfmt/SFMT-sse2.h
Line | Count | Source (jump to first uncovered line) |
1 | | #pragma once |
2 | | /** |
3 | | * @file SFMT-sse2.h |
4 | | * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2 |
5 | | * |
6 | | * @author Mutsuo Saito (Hiroshima University) |
7 | | * @author Makoto Matsumoto (Hiroshima University) |
8 | | * |
9 | | * @note We assume LITTLE ENDIAN in this file |
10 | | * |
11 | | * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima |
12 | | * University. All rights reserved. |
13 | | * |
14 | | * The new BSD License is applied to this software, see LICENSE.txt |
15 | | */ |
16 | | |
17 | | #ifndef SFMT_SSE2_H |
18 | | #define SFMT_SSE2_H |
19 | | |
20 | | inline static void mm_recursion(__m128i * r, __m128i a, __m128i b, |
21 | | __m128i c, __m128i d); |
22 | | |
23 | | /** |
24 | | * This function represents the recursion formula. |
25 | | * @param r an output |
26 | | * @param a a 128-bit part of the interal state array |
27 | | * @param b a 128-bit part of the interal state array |
28 | | * @param c a 128-bit part of the interal state array |
29 | | * @param d a 128-bit part of the interal state array |
30 | | */ |
31 | | inline static void mm_recursion(__m128i * r, __m128i a, __m128i b, |
32 | | __m128i c, __m128i d) |
33 | 23.4M | { |
34 | 23.4M | __m128i v, x, y, z; |
35 | | |
36 | 23.4M | y = _mm_srli_epi32(b, SFMT_SR1); |
37 | 23.4M | z = _mm_srli_si128(c, SFMT_SR2); |
38 | 23.4M | v = _mm_slli_epi32(d, SFMT_SL1); |
39 | 23.4M | z = _mm_xor_si128(z, a); |
40 | 23.4M | z = _mm_xor_si128(z, v); |
41 | 23.4M | x = _mm_slli_si128(a, SFMT_SL2); |
42 | 23.4M | y = _mm_and_si128(y, sse2_param_mask.si); |
43 | 23.4M | z = _mm_xor_si128(z, x); |
44 | 23.4M | z = _mm_xor_si128(z, y); |
45 | 23.4M | *r = z; |
46 | 23.4M | } |
47 | | |
48 | | /** |
49 | | * This function fills the internal state array with pseudorandom |
50 | | * integers. |
51 | | * @param sfmt SFMT internal state |
52 | | */ |
53 | 150k | void sfmt_gen_rand_all(sfmt_t * sfmt) { |
54 | 150k | int i; |
55 | 150k | __m128i r1, r2; |
56 | 150k | w128_t * pstate = sfmt->state; |
57 | | |
58 | 150k | r1 = pstate[SFMT_N - 2].si; |
59 | 150k | r2 = pstate[SFMT_N - 1].si; |
60 | 5.25M | for (i = 0; i < SFMT_N - SFMT_POS1; i++5.10M ) { |
61 | 5.10M | mm_recursion(&pstate[i].si, pstate[i].si, |
62 | 5.10M | pstate[i + SFMT_POS1].si, r1, r2); |
63 | 5.10M | r1 = r2; |
64 | 5.10M | r2 = pstate[i].si; |
65 | 5.10M | } |
66 | 18.4M | for (; i < SFMT_N; i++18.3M ) { |
67 | 18.3M | mm_recursion(&pstate[i].si, pstate[i].si, |
68 | 18.3M | pstate[i + SFMT_POS1 - SFMT_N].si, |
69 | 18.3M | r1, r2); |
70 | 18.3M | r1 = r2; |
71 | 18.3M | r2 = pstate[i].si; |
72 | 18.3M | } |
73 | 150k | } |
74 | | |
75 | | /** |
76 | | * This function fills the user-specified array with pseudorandom |
77 | | * integers. |
78 | | * @param sfmt SFMT internal state. |
79 | | * @param array an 128-bit array to be filled by pseudorandom numbers. |
80 | | * @param size number of 128-bit pseudorandom numbers to be generated. |
81 | | */ |
82 | | static void gen_rand_array(sfmt_t * sfmt, w128_t * array, int size) |
83 | 0 | { |
84 | 0 | int i, j; |
85 | 0 | __m128i r1, r2; |
86 | 0 | w128_t * pstate = sfmt->state; |
87 | |
|
88 | 0 | r1 = pstate[SFMT_N - 2].si; |
89 | 0 | r2 = pstate[SFMT_N - 1].si; |
90 | 0 | for (i = 0; i < SFMT_N - SFMT_POS1; i++) { |
91 | 0 | mm_recursion(&array[i].si, pstate[i].si, |
92 | 0 | pstate[i + SFMT_POS1].si, r1, r2); |
93 | 0 | r1 = r2; |
94 | 0 | r2 = array[i].si; |
95 | 0 | } |
96 | 0 | for (; i < SFMT_N; i++) { |
97 | 0 | mm_recursion(&array[i].si, pstate[i].si, |
98 | 0 | array[i + SFMT_POS1 - SFMT_N].si, r1, r2); |
99 | 0 | r1 = r2; |
100 | 0 | r2 = array[i].si; |
101 | 0 | } |
102 | 0 | for (; i < size - SFMT_N; i++) { |
103 | 0 | mm_recursion(&array[i].si, array[i - SFMT_N].si, |
104 | 0 | array[i + SFMT_POS1 - SFMT_N].si, r1, r2); |
105 | 0 | r1 = r2; |
106 | 0 | r2 = array[i].si; |
107 | 0 | } |
108 | 0 | for (j = 0; j < 2 * SFMT_N - size; j++) { |
109 | 0 | pstate[j] = array[j + size - SFMT_N]; |
110 | 0 | } |
111 | 0 | for (; i < size; i++, j++) { |
112 | 0 | mm_recursion(&array[i].si, array[i - SFMT_N].si, |
113 | 0 | array[i + SFMT_POS1 - SFMT_N].si, r1, r2); |
114 | 0 | r1 = r2; |
115 | 0 | r2 = array[i].si; |
116 | 0 | pstate[j] = array[i]; |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | | |
121 | | #endif |