/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/forward.tests.c
Line | Count | Source |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include "3rdparty/dsfmt/dSFMT.h" |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | TEST_CASE("convolutional network of 11x11 on 225x185 with uniform weights") |
15 | 1 | { |
16 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 225, 185, 3), 0); |
17 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 45, 4), 0); |
18 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 11, 11, 3); |
19 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
20 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 11, 11, 3), 0); |
21 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0); |
22 | | // configure the inlets. |
23 | 1 | int i; |
24 | 1.45k | for (i = 0; i < 11 * 11 * 3 * 4; i++1.45k ) |
25 | 1.45k | w->data.f32[i] = 1; |
26 | 124k | for (i = 0; i < 225 * 185 * 3; i++124k ) |
27 | 124k | a->data.f32[i] = 1; |
28 | 5 | for (i = 0; i < 4; i++4 ) |
29 | 4 | bias->data.f32[i] = 0; |
30 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
31 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(55, 45, CCV_32F | 4, 0, 0); |
32 | 1 | int x, y; |
33 | 56 | for (y = 0; y < 55; y++55 ) |
34 | 2.53k | for (x = 0; 55 x < 45; x++2.47k ) |
35 | 12.3k | for (i = 0; 2.47k i < 4; i++9.90k ) |
36 | 9.90k | c->data.f32[(y * 45 + x) * 4 + i] = ((x == 0 && y == 0220 ) || (9.89k x == 09.89k && y == 54216 ) || (9.89k x == 449.89k && y == 0220 ) || (9.88k x == 449.88k && y == 54216 )) ? 30016 : (9.88k (9.88k x == 09.88k || y == 09.67k || x == 449.50k || y == 549.28k ) ? 330768 : 3639.11k ); |
37 | 1 | REQUIRE_MATRIX_EQ(b, c, "55x45 matrix should be exactly a matrix fill 363, with 300 on the corner and 330 on the border"); |
38 | 1 | ccv_matrix_free(c); |
39 | 1 | ccv_nnc_tensor_free(bias); |
40 | 1 | ccv_nnc_tensor_free(w); |
41 | 1 | ccv_nnc_tensor_free(b); |
42 | 1 | ccv_nnc_tensor_free(a); |
43 | 1 | } |
44 | | |
45 | | TEST_CASE("convolutional network of 5x3 on 17x27 with uniform weights") |
46 | 1 | { |
47 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 17, 27, 1), 0); |
48 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 17, 27, 4), 0); |
49 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 5, 3, 1); |
50 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
51 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 5, 3, 1), 0); |
52 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0); |
53 | | // configure the inlets. |
54 | 1 | int i; |
55 | 61 | for (i = 0; i < 5 * 3 * 4; i++60 ) |
56 | 60 | w->data.f32[i] = 1; |
57 | 460 | for (i = 0; i < 17 * 27; i++459 ) |
58 | 459 | a->data.f32[i] = 1; |
59 | 5 | for (i = 0; i < 4; i++4 ) |
60 | 4 | bias->data.f32[i] = 0; |
61 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
62 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(17, 27, CCV_32F | 4, 0, 0); |
63 | 1 | int x, y; |
64 | 18 | for (y = 0; y < 17; y++17 ) |
65 | 476 | for (x = 0; 17 x < 27; x++459 ) |
66 | 2.29k | for (i = 0; 459 i < 4; i++1.83k ) |
67 | 1.83k | { |
68 | 1.83k | if ((x == 0 && y == 068 ) || (1.83k x == 01.83k && y == 1664 ) || (1.82k x == 261.82k && y == 068 ) || (1.82k x == 261.82k && y == 1664 )) |
69 | 16 | c->data.f32[(y * 27 + x) * 4 + i] = 6; |
70 | 1.82k | else if ((x == 0 && y == 160 ) || (1.81k x == 261.81k && y == 160 ) || (1.81k x == 01.81k && y == 1556 ) || (1.80k x == 261.80k && y == 1556 )) |
71 | 16 | c->data.f32[(y * 27 + x) * 4 + i] = 8; |
72 | 1.80k | else if (y == 0 || y == 161.70k ) |
73 | 200 | c->data.f32[(y * 27 + x) * 4 + i] = 9; |
74 | 1.60k | else if (x == 0 || x == 261.55k ) |
75 | 104 | c->data.f32[(y * 27 + x) * 4 + i] = 10; |
76 | 1.50k | else if (y == 1 || y == 151.40k ) |
77 | 200 | c->data.f32[(y * 27 + x) * 4 + i] = 12; |
78 | 1.30k | else |
79 | 1.30k | c->data.f32[(y * 27 + x) * 4 + i] = 15; |
80 | 1.83k | } |
81 | 1 | REQUIRE_MATRIX_EQ(b, c, "17x27 matrix should be exactly a matrix fill 15, with 6, 8 on the corner and 9, 10, 12 on the border"); |
82 | 1 | ccv_matrix_free(c); |
83 | 1 | ccv_nnc_tensor_free(bias); |
84 | 1 | ccv_nnc_tensor_free(w); |
85 | 1 | ccv_nnc_tensor_free(b); |
86 | 1 | ccv_nnc_tensor_free(a); |
87 | 1 | } |
88 | | |
89 | | TEST_CASE("convolutional network of 11x11 on 225x185 with non-uniform weights") |
90 | 1 | { |
91 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 225, 185, 1), 0); |
92 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 45, 4), 0); |
93 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 11, 11, 1); |
94 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
95 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 11, 11, 1), 0); |
96 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0); |
97 | | // configure the inlets. |
98 | 1 | int i, x, y; |
99 | 5 | for (x = 0; x < 4; x++4 ) |
100 | 488 | for (i = 0; 4 i < 11 * 11; i++484 ) |
101 | 484 | w->data.f32[x * 11 * 11 + i] = i + 1; |
102 | 41.6k | for (i = 0; i < 225 * 185; i++41.6k ) |
103 | 41.6k | a->data.f32[i] = i + 1; |
104 | 5 | for (i = 0; i < 4; i++4 ) |
105 | 4 | bias->data.f32[i] = 0; |
106 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
107 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(55, 45, CCV_32F | 4, 0, 0); |
108 | 1 | float sum = 0; |
109 | | // first column |
110 | 11 | for (y = 0; y < 10; y++10 ) |
111 | 110 | for (x = 0; 10 x < 10; x++100 ) |
112 | 100 | sum += ((y + 1) * 11 + x + 2) * (y * 185 + x + 1); |
113 | 5 | for (i = 0; i < 4; i++4 ) |
114 | 4 | c->data.f32[i] = sum; |
115 | 1 | sum = 0; |
116 | 11 | for (y = 0; y < 10; y++10 ) |
117 | 120 | for (x = 0; 10 x < 11; x++110 ) |
118 | 110 | sum += ((y + 1) * 11 + x + 1) * (y * 185 + (x + 3) + 1); |
119 | 44 | for (x = 1; x < 44; x++43 ) |
120 | 215 | for (i = 0; 43 i < 4; i++172 ) |
121 | 172 | c->data.f32[x * 4 + i] = sum + (x - 1) * 4 * (11 * 11 + 12) * 11 * 10 / 2; |
122 | 1 | sum = 0; |
123 | 11 | for (y = 0; y < 10; y++10 ) |
124 | 110 | for (x = 0; 10 x < 10; x++100 ) |
125 | 100 | sum += ((y + 1) * 11 + x + 1) * (y * 185 + (x + 175) + 1); |
126 | 5 | for (i = 0; i < 4; i++4 ) |
127 | 4 | c->data.f32[44 * 4 + i] = sum; |
128 | | // last column |
129 | 1 | sum = 0; |
130 | 11 | for (y = 0; y < 10; y++10 ) |
131 | 110 | for (x = 0; 10 x < 10; x++100 ) |
132 | 100 | sum += (y * 11 + x + 2) * ((y + 215) * 185 + x + 1); |
133 | 5 | for (i = 0; i < 4; i++4 ) |
134 | 4 | c->data.f32[54 * 45 * 4 + i] = sum; |
135 | 1 | sum = 0; |
136 | 11 | for (y = 0; y < 10; y++10 ) |
137 | 120 | for (x = 0; 10 x < 11; x++110 ) |
138 | 110 | sum += (y * 11 + x + 1) * ((y + 215) * 185 + (x + 3) + 1); |
139 | 44 | for (x = 1; x < 44; x++43 ) |
140 | 215 | for (i = 0; 43 i < 4; i++172 ) |
141 | 172 | c->data.f32[(54 * 45 + x) * 4 + i] = sum + (x - 1) * 4 * (10 * 11 + 1) * 11 * 10 / 2; |
142 | 1 | sum = 0; |
143 | 11 | for (y = 0; y < 10; y++10 ) |
144 | 110 | for (x = 0; 10 x < 10; x++100 ) |
145 | 100 | sum += (y * 11 + x + 1) * ((y + 215) * 185 + (x + 175) + 1); |
146 | 5 | for (i = 0; i < 4; i++4 ) |
147 | 4 | c->data.f32[(54 * 45 + 44) * 4 + i] = sum; |
148 | 1 | float border[] = { |
149 | 1 | 0, 0 |
150 | 1 | }; |
151 | 12 | for (y = 0; y < 11; y++11 ) |
152 | 121 | for (x = 0; 11 x < 10; x++110 ) |
153 | 110 | border[0] += (y * 11 + x + 2) * ((y + 3) * 185 + x + 1); |
154 | 12 | for (y = 0; y < 11; y++11 ) |
155 | 121 | for (x = 0; 11 x < 10; x++110 ) |
156 | 110 | border[1] += (y * 11 + x + 1) * ((y + 3) * 185 + (x + 175) + 1); |
157 | 1 | sum = 0; |
158 | 12 | for (y = 0; y < 11; y++11 ) |
159 | 132 | for (x = 0; 11 x < 11; x++121 ) |
160 | 121 | sum += (y * 11 + x + 1) * ((y + 3) * 185 + (x + 3) + 1); |
161 | 54 | for (y = 1; y < 54; y++53 ) |
162 | 53 | { |
163 | 265 | for (i = 0; i < 4; i++212 ) |
164 | 212 | c->data.f32[y * 45 * 4 + i] = border[0]; |
165 | 2.33k | for (x = 1; x < 44; x++2.27k ) |
166 | 11.3k | for (i = 0; 2.27k i < 4; i++9.11k ) |
167 | 9.11k | c->data.f32[(y * 45 + x) * 4 + i] = sum + (x - 1) * 4 * (11 * 11 + 1) * 11 * 11 / 2; |
168 | 265 | for (i = 0; i < 4; i++212 ) |
169 | 212 | c->data.f32[(y * 45 + 44) * 4 + i] = border[1]; |
170 | 53 | sum += 185 * 4 * (11 * 11 + 1) * 11 * 11 / 2; |
171 | 53 | border[0] += 185 * 4 * ((11 * 11 + 1) * 11 * 11 / 2 - (10 * 11 + 1 + 1) * 11 / 2); |
172 | 53 | border[1] += 185 * 4 * ((11 * 11 + 1) * 11 * 11 / 2 - (11 * 11 + 11) * 11 / 2); |
173 | 53 | } |
174 | | // regularize the output so it is within the tolerance |
175 | 9.90k | for (i = 0; i < 55 * 45 * 4; i++9.90k ) |
176 | 9.90k | c->data.f32[i] = c->data.f32[i] * 1e-7, b->data.f32[i] = b->data.f32[i] * 1e-7; |
177 | 1 | REQUIRE_MATRIX_EQ(b, c, "55x55 matrix should be exactly the same"); |
178 | 1 | ccv_matrix_free(c); |
179 | 1 | ccv_nnc_tensor_free(bias); |
180 | 1 | ccv_nnc_tensor_free(w); |
181 | 1 | ccv_nnc_tensor_free(b); |
182 | 1 | ccv_nnc_tensor_free(a); |
183 | 1 | } |
184 | | |
185 | | TEST_CASE("convolutional network of 3x5 on 27x27 with non-uniform weights") |
186 | 1 | { |
187 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0); |
188 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0); |
189 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 3, 5, 1); |
190 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
191 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 3, 5, 1), 0); |
192 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0); |
193 | | // configure the inlets. |
194 | 1 | int i, x, y; |
195 | 5 | for (x = 0; x < 4; x++4 ) |
196 | 64 | for (i = 0; 4 i < 3 * 5; i++60 ) |
197 | 60 | w->data.f32[x * 3 * 5 + i] = i + 1; |
198 | 730 | for (i = 0; i < 27 * 27; i++729 ) |
199 | 729 | a->data.f32[i] = i + 1; |
200 | 5 | for (i = 0; i < 4; i++4 ) |
201 | 4 | bias->data.f32[i] = 0; |
202 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
203 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | 4, 0, 0); |
204 | | // the first column |
205 | 1 | float sum = 0; |
206 | 3 | for (y = 0; y < 2; y++2 ) |
207 | 8 | for (x = 0; 2 x < 3; x++6 ) |
208 | 6 | sum += ((y + 1) * 5 + x + 3) * (y * 27 + x + 1); |
209 | 5 | for (i = 0; i < 4; i++4 ) |
210 | 4 | c->data.f32[i] = sum; |
211 | 1 | sum = 0; |
212 | 3 | for (y = 0; y < 2; y++2 ) |
213 | 10 | for (x = 0; 2 x < 4; x++8 ) |
214 | 8 | sum += ((y + 1) * 5 + x + 2) * (y * 27 + x + 1); |
215 | 5 | for (i = 0; i < 4; i++4 ) |
216 | 4 | c->data.f32[4 + i] = sum; |
217 | 1 | sum = 0; |
218 | 3 | for (y = 0; y < 2; y++2 ) |
219 | 12 | for (x = 0; 2 x < 5; x++10 ) |
220 | 10 | sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 1); |
221 | 24 | for (x = 2; x < 25; x++23 ) |
222 | 115 | for (i = 0; 23 i < 4; i++92 ) |
223 | 92 | c->data.f32[x * 4 + i] = sum + (x - 2) * 21 * 10 / 2; |
224 | 1 | sum = 0; |
225 | 3 | for (y = 0; y < 2; y++2 ) |
226 | 10 | for (x = 0; 2 x < 4; x++8 ) |
227 | 8 | sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 24); |
228 | 5 | for (i = 0; i < 4; i++4 ) |
229 | 4 | c->data.f32[25 * 4 + i] = sum; |
230 | 1 | sum = 0; |
231 | 3 | for (y = 0; y < 2; y++2 ) |
232 | 8 | for (x = 0; 2 x < 3; x++6 ) |
233 | 6 | sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 25); |
234 | 5 | for (i = 0; i < 4; i++4 ) |
235 | 4 | c->data.f32[26 * 4 + i] = sum; |
236 | | // the last column |
237 | 1 | sum = 0; |
238 | 3 | for (y = 0; y < 2; y++2 ) |
239 | 8 | for (x = 0; 2 x < 3; x++6 ) |
240 | 6 | sum += (y * 5 + x + 3) * ((y + 25) * 27 + x + 1); |
241 | 5 | for (i = 0; i < 4; i++4 ) |
242 | 4 | c->data.f32[27 * 26 * 4 + i] = sum; |
243 | 1 | sum = 0; |
244 | 3 | for (y = 0; y < 2; y++2 ) |
245 | 10 | for (x = 0; 2 x < 4; x++8 ) |
246 | 8 | sum += (y * 5 + x + 2) * ((y + 25) * 27 + x + 1); |
247 | 5 | for (i = 0; i < 4; i++4 ) |
248 | 4 | c->data.f32[(27 * 26 + 1) * 4 + i] = sum; |
249 | 1 | sum = 0; |
250 | 3 | for (y = 0; y < 2; y++2 ) |
251 | 12 | for (x = 0; 2 x < 5; x++10 ) |
252 | 10 | sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 1); |
253 | 24 | for (x = 2; x < 25; x++23 ) |
254 | 115 | for (i = 0; 23 i < 4; i++92 ) |
255 | 92 | c->data.f32[(27 * 26 + x) * 4 + i] = sum + (x - 2) * 11 * 10 / 2; |
256 | 1 | sum = 0; |
257 | 3 | for (y = 0; y < 2; y++2 ) |
258 | 10 | for (x = 0; 2 x < 4; x++8 ) |
259 | 8 | sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 24); |
260 | 5 | for (i = 0; i < 4; i++4 ) |
261 | 4 | c->data.f32[(27 * 26 + 25) * 4 + i] = sum; |
262 | 1 | sum = 0; |
263 | 3 | for (y = 0; y < 2; y++2 ) |
264 | 8 | for (x = 0; 2 x < 3; x++6 ) |
265 | 6 | sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 25); |
266 | 5 | for (i = 0; i < 4; i++4 ) |
267 | 4 | c->data.f32[(27 * 26 + 26) * 4 + i] = sum; |
268 | 1 | float border[] = { |
269 | 1 | 0, 0, 0, 0 |
270 | 1 | }; |
271 | 4 | for (y = 0; y < 3; y++3 ) |
272 | 12 | for (x = 0; 3 x < 3; x++9 ) |
273 | 9 | border[0] += (y * 5 + x + 3) * (y * 27 + x + 1); |
274 | 4 | for (y = 0; y < 3; y++3 ) |
275 | 15 | for (x = 0; 3 x < 4; x++12 ) |
276 | 12 | border[1] += (y * 5 + x + 2) * (y * 27 + x + 1); |
277 | 4 | for (y = 0; y < 3; y++3 ) |
278 | 15 | for (x = 0; 3 x < 4; x++12 ) |
279 | 12 | border[2] += (y * 5 + x + 1) * (y * 27 + x + 24); |
280 | 4 | for (y = 0; y < 3; y++3 ) |
281 | 12 | for (x = 0; 3 x < 3; x++9 ) |
282 | 9 | border[3] += (y * 5 + x + 1) * (y * 27 + x + 25); |
283 | 1 | sum = 0; |
284 | 4 | for (y = 0; y < 3; y++3 ) |
285 | 18 | for (x = 0; 3 x < 5; x++15 ) |
286 | 15 | sum += (y * 5 + x + 1) * (y * 27 + x + 1); |
287 | 26 | for (y = 1; y < 26; y++25 ) |
288 | 25 | { |
289 | 125 | for (i = 0; i < 4; i++100 ) |
290 | 100 | { |
291 | 100 | c->data.f32[y * 27 * 4 + i] = border[0] + (y - 1) * 27 * (3 + 4 + 5 + 8 + 9 + 10 + 13 + 14 + 15); |
292 | 100 | c->data.f32[(y * 27 + 1) * 4 + i] = border[1] + (y - 1) * 27 * (2 + 3 + 4 + 5 + 7 + 8 + 9 + 10 + 12 + 13 + 14 + 15); |
293 | 2.40k | for (x = 2; x < 25; x++2.30k ) |
294 | 2.30k | c->data.f32[(y * 27 + x) * 4 + i] = sum + ((y - 1) * 27 + x - 2) * 16 * 15 / 2; |
295 | 100 | c->data.f32[(y * 27 + 25) * 4 + i] = border[2] + (y - 1) * 27 * (1 + 2 + 3 + 4 + 6 + 7 + 8 + 9 + 11 + 12 + 13 + 14); |
296 | 100 | c->data.f32[(y * 27 + 26) * 4 + i] = border[3] + (y - 1) * 27 * (1 + 2 + 3 + 6 + 7 + 8 + 11 + 12 + 13); |
297 | 100 | } |
298 | 25 | } |
299 | 1 | REQUIRE_MATRIX_EQ(b, c, "27x27 matrix should be exactly the same"); |
300 | 1 | ccv_matrix_free(c); |
301 | 1 | ccv_nnc_tensor_free(bias); |
302 | 1 | ccv_nnc_tensor_free(w); |
303 | 1 | ccv_nnc_tensor_free(b); |
304 | 1 | ccv_nnc_tensor_free(a); |
305 | 1 | } |
306 | | |
307 | | TEST_CASE("convolution with no bias") |
308 | 1 | { |
309 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0); |
310 | 1 | ccv_nnc_tensor_t* bg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0); |
311 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 3, 5, 1); |
312 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, bg->info); |
313 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 3, 5, 1), 0); |
314 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0); |
315 | 1 | dsfmt_t dsfmt; |
316 | 1 | int i; |
317 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
318 | 730 | for (i = 0; i < 27 * 27; i++729 ) |
319 | 729 | a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
320 | 61 | for (i = 0; i < 4 * 3 * 5; i++60 ) |
321 | 60 | w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
322 | 5 | for (i = 0; i < 4; i++4 ) |
323 | 4 | bias->data.f32[i] = 0; |
324 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(bg), 0); |
325 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0); |
326 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w), TENSOR_LIST(b), 0); |
327 | 1 | REQUIRE_MATRIX_EQ(b, bg, "convolution with no bias should equal to with bias = 0"); |
328 | 1 | ccv_nnc_tensor_free(a); |
329 | 1 | ccv_nnc_tensor_free(b); |
330 | 1 | ccv_nnc_tensor_free(bg); |
331 | 1 | ccv_nnc_tensor_free(w); |
332 | 1 | ccv_nnc_tensor_free(bias); |
333 | 1 | } |
334 | | |
335 | | TEST_CASE("maximum pool network of 55x55 with window of 3x3 and stride of 2") |
336 | 1 | { |
337 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 55, 1), 0); |
338 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0); |
339 | 1 | ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(3, 3); |
340 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
341 | | // configure the inlets. |
342 | 1 | int i; |
343 | 3.02k | for (i = 0; i < 55 * 55; i++3.02k ) |
344 | 3.02k | a->data.f32[i] = i + 1; |
345 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
346 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0); |
347 | 1 | int x, y; |
348 | 28 | for (y = 0; y < 27; y++27 ) |
349 | 756 | for (x = 0; 27 x < 27; x++729 ) |
350 | 729 | c->data.f32[y * 27 + x] = 113 + y * 110 + x * 2; |
351 | 1 | REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same"); |
352 | 1 | ccv_matrix_free(c); |
353 | 1 | ccv_nnc_tensor_free(b); |
354 | 1 | ccv_nnc_tensor_free(a); |
355 | 1 | } |
356 | | |
357 | | TEST_CASE("maximum pool network of 57x57 with window of 3x3 and stride of 3") |
358 | 1 | { |
359 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 57, 57, 1), 0); |
360 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 19, 19, 1), 0); |
361 | 1 | ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(3, 3); |
362 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
363 | | // configure the inlets. |
364 | 1 | int i; |
365 | 3.25k | for (i = 0; i < 57 * 57; i++3.24k ) |
366 | 3.24k | a->data.f32[i] = i + 1; |
367 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
368 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(19, 19, CCV_32F | CCV_C1, 0, 0); |
369 | 1 | int x, y; |
370 | 20 | for (y = 0; y < 19; y++19 ) |
371 | 380 | for (x = 0; 19 x < 19; x++361 ) |
372 | 361 | c->data.f32[y * 19 + x] = 117 + y * 171 + x * 3; |
373 | 1 | REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same"); |
374 | 1 | ccv_matrix_free(c); |
375 | 1 | ccv_nnc_tensor_free(b); |
376 | 1 | ccv_nnc_tensor_free(a); |
377 | 1 | } |
378 | | |
379 | | TEST_CASE("maximum pool network of 54x54 with window of 2x2 and stride of 2") |
380 | 1 | { |
381 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 54, 54, 1), 0); |
382 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0); |
383 | 1 | ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(2, 2); |
384 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
385 | | // configure the inlets. |
386 | 1 | int i; |
387 | 2.91k | for (i = 0; i < 54 * 54; i++2.91k ) |
388 | 2.91k | a->data.f32[i] = i + 1; |
389 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
390 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0); |
391 | 1 | int x, y; |
392 | 28 | for (y = 0; y < 27; y++27 ) |
393 | 756 | for (x = 0; 27 x < 27; x++729 ) |
394 | 729 | c->data.f32[y * 27 + x] = 56 + y * 108 + x * 2; |
395 | 1 | REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same"); |
396 | 1 | ccv_matrix_free(c); |
397 | 1 | ccv_nnc_tensor_free(b); |
398 | 1 | ccv_nnc_tensor_free(a); |
399 | 1 | } |
400 | | |
401 | | TEST_CASE("average pool network of 55x55 with window of 3x3 and stride of 2") |
402 | 1 | { |
403 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 55, 1), 0); |
404 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0); |
405 | 1 | ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(3, 3); |
406 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
407 | | // configure the inlets. |
408 | 1 | int i; |
409 | 3.02k | for (i = 0; i < 55 * 55; i++3.02k ) |
410 | 3.02k | a->data.f32[i] = i + 1; |
411 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
412 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0); |
413 | 1 | int x, y; |
414 | 28 | for (y = 0; y < 27; y++27 ) |
415 | 756 | for (x = 0; 27 x < 27; x++729 ) |
416 | 729 | c->data.f32[y * 27 + x] = 57 + y * 110 + x * 2; |
417 | 1 | REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same"); |
418 | 1 | ccv_matrix_free(c); |
419 | 1 | ccv_nnc_tensor_free(b); |
420 | 1 | ccv_nnc_tensor_free(a); |
421 | 1 | } |
422 | | |
423 | | TEST_CASE("average pool network of 57x57 with window of 3x3 and stride of 3") |
424 | 1 | { |
425 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 57, 57, 1), 0); |
426 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 19, 19, 1), 0); |
427 | 1 | ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(3, 3); |
428 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
429 | | // configure the inlets. |
430 | 1 | int i; |
431 | 3.25k | for (i = 0; i < 57 * 57; i++3.24k ) |
432 | 3.24k | a->data.f32[i] = i + 1; |
433 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
434 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(19, 19, CCV_32F | CCV_C1, 0, 0); |
435 | 1 | int x, y; |
436 | 20 | for (y = 0; y < 19; y++19 ) |
437 | 380 | for (x = 0; 19 x < 19; x++361 ) |
438 | 361 | c->data.f32[y * 19 + x] = 59 + y * 171 + x * 3; |
439 | 1 | REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same"); |
440 | 1 | ccv_matrix_free(c); |
441 | 1 | ccv_nnc_tensor_free(b); |
442 | 1 | ccv_nnc_tensor_free(a); |
443 | 1 | } |
444 | | |
445 | | TEST_CASE("average pool network of 54x54 with window of 2x2 and stride of 2") |
446 | 1 | { |
447 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 54, 54, 1), 0); |
448 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0); |
449 | 1 | ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(2, 2); |
450 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
451 | | // configure the inlets. |
452 | 1 | int i; |
453 | 2.91k | for (i = 0; i < 54 * 54; i++2.91k ) |
454 | 2.91k | a->data.f32[i] = i + 1; |
455 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
456 | 1 | ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0); |
457 | 1 | int x, y; |
458 | 28 | for (y = 0; y < 27; y++27 ) |
459 | 756 | for (x = 0; 27 x < 27; x++729 ) |
460 | 729 | c->data.f32[y * 27 + x] = 28.5 + y * 108 + x * 2; |
461 | 1 | REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same"); |
462 | 1 | ccv_matrix_free(c); |
463 | 1 | ccv_nnc_tensor_free(b); |
464 | 1 | ccv_nnc_tensor_free(a); |
465 | 1 | } |
466 | | |
467 | | TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights") |
468 | 1 | { |
469 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0); |
470 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 4, 1), 0); |
471 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(1, 1, 0, 3, 3, 2); |
472 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
473 | 1 | hint.stride.dim[0] = 1; |
474 | 1 | hint.stride.dim[1] = 1; |
475 | 1 | hint.border.begin[0] = 0; |
476 | 1 | hint.border.begin[1] = 0; |
477 | 1 | hint.border.end[0] = 0; |
478 | 1 | hint.border.end[1] = 0; |
479 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0); |
480 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
481 | | // configure the inlets. |
482 | 1 | int i; |
483 | 19 | for (i = 0; i < 3 * 3 * 2; i++18 ) |
484 | 18 | w->data.f32[i] = i; |
485 | 9 | for (i = 0; i < 2 * 2 * 2; i++8 ) |
486 | 8 | a->data.f32[i] = 1; |
487 | 2 | for (i = 0; i < 1; i++1 ) |
488 | 1 | bias->data.f32[i] = 1; |
489 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
490 | 1 | float cp[] = { |
491 | 1 | 10., 21., 25., 14., |
492 | 1 | 25., 53., 61., 33., |
493 | 1 | 37., 77., 85., 45., |
494 | 1 | 22., 45., 49., 26. |
495 | 1 | }; |
496 | 1 | ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 4, 4, 1), 0); |
497 | 1 | REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same"); |
498 | 1 | ccv_nnc_tensor_free(c); |
499 | 1 | ccv_nnc_tensor_free(bias); |
500 | 1 | ccv_nnc_tensor_free(w); |
501 | 1 | ccv_nnc_tensor_free(b); |
502 | 1 | ccv_nnc_tensor_free(a); |
503 | 1 | } |
504 | | |
505 | | TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2") |
506 | 1 | { |
507 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0); |
508 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 4, 2), 0); |
509 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2); |
510 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
511 | 1 | hint.stride.dim[0] = 1; |
512 | 1 | hint.stride.dim[1] = 1; |
513 | 1 | hint.border.begin[0] = 0; |
514 | 1 | hint.border.begin[1] = 0; |
515 | 1 | hint.border.end[0] = 0; |
516 | 1 | hint.border.end[1] = 0; |
517 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0); |
518 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0); |
519 | | // configure the inlets. |
520 | 1 | int i; |
521 | 19 | for (i = 0; i < 3 * 3 * 2; i++18 ) |
522 | 18 | w->data.f32[i] = i; |
523 | 9 | for (i = 0; i < 2 * 2 * 2; i++8 ) |
524 | 8 | a->data.f32[i] = 1; |
525 | 3 | for (i = 0; i < 2; i++2 ) |
526 | 2 | bias->data.f32[i] = 1; |
527 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
528 | 1 | float cp[] = { |
529 | 1 | 1., 10., 2., 20., 4., 22., 3., 12., |
530 | 1 | 4., 22., 9., 45., 13., 49., 8., 26., |
531 | 1 | 10., 28., 21., 57., 25., 61., 14., 32., |
532 | 1 | 7., 16., 14., 32., 16., 34., 9., 18. |
533 | 1 | }; |
534 | 1 | ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 4, 4, 2), 0); |
535 | 1 | REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same"); |
536 | 1 | ccv_nnc_tensor_free(c); |
537 | 1 | ccv_nnc_tensor_free(bias); |
538 | 1 | ccv_nnc_tensor_free(w); |
539 | 1 | ccv_nnc_tensor_free(b); |
540 | 1 | ccv_nnc_tensor_free(a); |
541 | 1 | } |
542 | | |
543 | | TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, stride of 2") |
544 | 1 | { |
545 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0); |
546 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 5, 2), 0); |
547 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2); |
548 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
549 | 1 | hint.stride.dim[0] = 2; |
550 | 1 | hint.stride.dim[1] = 2; |
551 | 1 | hint.border.begin[0] = 0; |
552 | 1 | hint.border.begin[1] = 0; |
553 | 1 | hint.border.end[0] = 0; |
554 | 1 | hint.border.end[1] = 0; |
555 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0); |
556 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0); |
557 | | // configure the inlets. |
558 | 1 | int i; |
559 | 19 | for (i = 0; i < 3 * 3 * 2; i++18 ) |
560 | 18 | w->data.f32[i] = i; |
561 | 9 | for (i = 0; i < 2 * 2 * 2; i++8 ) |
562 | 8 | a->data.f32[i] = 1; |
563 | 3 | for (i = 0; i < 2; i++2 ) |
564 | 2 | bias->data.f32[i] = 1; |
565 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
566 | 1 | float cp[] = { |
567 | 1 | 1., 10., 2., 11., 3., 21., 2., 11., 3., 12., |
568 | 1 | 4., 13., 5., 14., 9., 27., 5., 14., 6., 15., |
569 | 1 | 7., 25., 9., 27., 17., 53., 9., 27., 11., 29., |
570 | 1 | 4., 13., 5., 14., 9., 27., 5., 14., 6., 15., |
571 | 1 | 7., 16., 8., 17., 15., 33., 8., 17., 9., 18. |
572 | 1 | }; |
573 | 1 | ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 5, 5, 2), 0); |
574 | 1 | REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same"); |
575 | 1 | ccv_nnc_tensor_free(c); |
576 | 1 | ccv_nnc_tensor_free(bias); |
577 | 1 | ccv_nnc_tensor_free(w); |
578 | 1 | ccv_nnc_tensor_free(b); |
579 | 1 | ccv_nnc_tensor_free(a); |
580 | 1 | } |
581 | | |
582 | | TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights, NCHW") |
583 | 1 | { |
584 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0); |
585 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 4, 4), 0); |
586 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(1, 1, 0, 3, 3, 2); |
587 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
588 | 1 | hint.stride.dim[0] = 1; |
589 | 1 | hint.stride.dim[1] = 1; |
590 | 1 | hint.border.begin[0] = 0; |
591 | 1 | hint.border.begin[1] = 0; |
592 | 1 | hint.border.end[0] = 0; |
593 | 1 | hint.border.end[1] = 0; |
594 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0); |
595 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
596 | | // configure the inlets. |
597 | 1 | int i; |
598 | 19 | for (i = 0; i < 3 * 3 * 2; i++18 ) |
599 | 18 | w->data.f32[i] = i; |
600 | 9 | for (i = 0; i < 2 * 2 * 2; i++8 ) |
601 | 8 | a->data.f32[i] = 1; |
602 | 2 | for (i = 0; i < 1; i++1 ) |
603 | 1 | bias->data.f32[i] = 1; |
604 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
605 | 1 | float cp[] = { |
606 | 1 | 10., 21., 25., 14., |
607 | 1 | 25., 53., 61., 33., |
608 | 1 | 37., 77., 85., 45., |
609 | 1 | 22., 45., 49., 26. |
610 | 1 | }; |
611 | 1 | ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 1, 4, 4), 0); |
612 | 1 | REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same"); |
613 | 1 | ccv_nnc_tensor_free(c); |
614 | 1 | ccv_nnc_tensor_free(bias); |
615 | 1 | ccv_nnc_tensor_free(w); |
616 | 1 | ccv_nnc_tensor_free(b); |
617 | 1 | ccv_nnc_tensor_free(a); |
618 | 1 | } |
619 | | |
620 | | TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, NCHW") |
621 | 1 | { |
622 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0); |
623 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 4, 4), 0); |
624 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2); |
625 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
626 | 1 | hint.stride.dim[0] = 1; |
627 | 1 | hint.stride.dim[1] = 1; |
628 | 1 | hint.border.begin[0] = 0; |
629 | 1 | hint.border.begin[1] = 0; |
630 | 1 | hint.border.end[0] = 0; |
631 | 1 | hint.border.end[1] = 0; |
632 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0); |
633 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2), 0); |
634 | | // configure the inlets. |
635 | 1 | int i; |
636 | 19 | for (i = 0; i < 3 * 3 * 2; i++18 ) |
637 | 18 | w->data.f32[i] = i; |
638 | 9 | for (i = 0; i < 2 * 2 * 2; i++8 ) |
639 | 8 | a->data.f32[i] = 1; |
640 | 3 | for (i = 0; i < 2; i++2 ) |
641 | 2 | bias->data.f32[i] = 1; |
642 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
643 | 1 | float cp[] = { |
644 | 1 | 1., 2., 4., 3., |
645 | 1 | 4., 9., 13., 8., |
646 | 1 | 10., 21., 25., 14., |
647 | 1 | 7., 14., 16., 9., |
648 | 1 | 10., 20., 22., 12., |
649 | 1 | 22., 45., 49., 26., |
650 | 1 | 28., 57., 61., 32., |
651 | 1 | 16., 32., 34., 18. |
652 | 1 | }; |
653 | 1 | ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 2, 4, 4), 0); |
654 | 1 | REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same"); |
655 | 1 | ccv_nnc_tensor_free(c); |
656 | 1 | ccv_nnc_tensor_free(bias); |
657 | 1 | ccv_nnc_tensor_free(w); |
658 | 1 | ccv_nnc_tensor_free(b); |
659 | 1 | ccv_nnc_tensor_free(a); |
660 | 1 | } |
661 | | |
662 | | TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, stride of 2, NCHW") |
663 | 1 | { |
664 | 1 | ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0); |
665 | 1 | ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 5, 5), 0); |
666 | 1 | ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2); |
667 | 1 | ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info); |
668 | 1 | hint.stride.dim[0] = 2; |
669 | 1 | hint.stride.dim[1] = 2; |
670 | 1 | hint.border.begin[0] = 0; |
671 | 1 | hint.border.begin[1] = 0; |
672 | 1 | hint.border.end[0] = 0; |
673 | 1 | hint.border.end[1] = 0; |
674 | 1 | ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0); |
675 | 1 | ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2), 0); |
676 | | // configure the inlets. |
677 | 1 | int i; |
678 | 19 | for (i = 0; i < 3 * 3 * 2; i++18 ) |
679 | 18 | w->data.f32[i] = i; |
680 | 9 | for (i = 0; i < 2 * 2 * 2; i++8 ) |
681 | 8 | a->data.f32[i] = 1; |
682 | 3 | for (i = 0; i < 2; i++2 ) |
683 | 2 | bias->data.f32[i] = 1; |
684 | 1 | ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0); |
685 | 1 | float cp[] = { |
686 | 1 | 1., 2., 3., 2., 3., |
687 | 1 | 4., 5., 9., 5., 6., |
688 | 1 | 7., 9., 17., 9., 11., |
689 | 1 | 4., 5., 9., 5., 6., |
690 | 1 | 7., 8., 15., 8., 9., |
691 | 1 | 10., 11., 21., 11., 12., |
692 | 1 | 13., 14., 27., 14., 15., |
693 | 1 | 25., 27., 53., 27., 29., |
694 | 1 | 13., 14., 27., 14., 15., |
695 | 1 | 16., 17., 33., 17., 18. |
696 | 1 | }; |
697 | 1 | ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 2, 5, 5), 0); |
698 | 1 | REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same"); |
699 | 1 | ccv_nnc_tensor_free(c); |
700 | 1 | ccv_nnc_tensor_free(bias); |
701 | 1 | ccv_nnc_tensor_free(w); |
702 | 1 | ccv_nnc_tensor_free(b); |
703 | 1 | ccv_nnc_tensor_free(a); |
704 | 1 | } |
705 | | |
706 | | #include "case_main.h" |