/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/reduce.tests.c
Line | Count | Source |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include <3rdparty/dsfmt/dSFMT.h> |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | TEST_CASE("reduce sum forward") |
15 | | { |
16 | | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_MPS)); |
17 | | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
18 | | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
19 | | ha->data.f32[0] = 1; |
20 | | ha->data.f32[1] = 2; |
21 | | ha->data.f32[2] = 3; |
22 | | ha->data.f32[3] = 4; |
23 | | ha->data.f32[4] = 5; |
24 | | ha->data.f32[5] = 6; |
25 | | ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
26 | | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
27 | | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
28 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
29 | | ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
30 | | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
31 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
32 | | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
33 | | ccv_nnc_tensor_free(ha); |
34 | | ccv_nnc_tensor_free(hb); |
35 | | ccv_nnc_tensor_free(a); |
36 | | ccv_nnc_tensor_free(b); |
37 | | ccv_nnc_tensor_free(bt); |
38 | | } |
39 | | |
40 | | TEST_CASE("reduce sum forward noop") |
41 | 1 | { |
42 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_MPS)); |
43 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
44 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
45 | 1 | ha->data.f32[0] = 1; |
46 | 1 | ha->data.f32[1] = 2; |
47 | 1 | ha->data.f32[2] = 3; |
48 | 1 | ha->data.f32[3] = 4; |
49 | 1 | ha->data.f32[4] = 5; |
50 | 1 | ha->data.f32[5] = 6; |
51 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
52 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0); |
53 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0); |
54 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
55 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
56 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
57 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
58 | 1 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
59 | 1 | ccv_nnc_tensor_free(ha); |
60 | 1 | ccv_nnc_tensor_free(hb); |
61 | 1 | ccv_nnc_tensor_free(a); |
62 | 1 | ccv_nnc_tensor_free(b); |
63 | 1 | ccv_nnc_tensor_free(bt); |
64 | 1 | } |
65 | | |
66 | | TEST_CASE("reduce sum backward") |
67 | 1 | { |
68 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
69 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
70 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
71 | 1 | hb->data.f32[0] = 1; |
72 | 1 | hb->data.f32[1] = 2; |
73 | 1 | hb->data.f32[2] = 3; |
74 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_SUM_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(ha), 0); |
75 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
76 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
77 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(b), 0); |
78 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_SUM_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(a), 0); |
79 | 1 | ccv_nnc_tensor_t* const at = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
80 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(at), 0); |
81 | 1 | REQUIRE_TENSOR_EQ(ha, at, "result should be equal"); |
82 | 1 | ccv_nnc_tensor_free(ha); |
83 | 1 | ccv_nnc_tensor_free(hb); |
84 | 1 | ccv_nnc_tensor_free(a); |
85 | 1 | ccv_nnc_tensor_free(b); |
86 | 1 | ccv_nnc_tensor_free(at); |
87 | 1 | } |
88 | | |
89 | | TEST_CASE("reduce mean forward") |
90 | 1 | { |
91 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_MPS)); |
92 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
93 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
94 | 1 | ha->data.f32[0] = 1; |
95 | 1 | ha->data.f32[1] = 2; |
96 | 1 | ha->data.f32[2] = 3; |
97 | 1 | ha->data.f32[3] = 4; |
98 | 1 | ha->data.f32[4] = 5; |
99 | 1 | ha->data.f32[5] = 6; |
100 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
101 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
102 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
103 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
104 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
105 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
106 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
107 | 1 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
108 | 1 | ccv_nnc_tensor_free(ha); |
109 | 1 | ccv_nnc_tensor_free(hb); |
110 | 1 | ccv_nnc_tensor_free(a); |
111 | 1 | ccv_nnc_tensor_free(b); |
112 | 1 | ccv_nnc_tensor_free(bt); |
113 | 1 | } |
114 | | |
115 | | TEST_CASE("reduce mean forward noop") |
116 | 1 | { |
117 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_MPS)); |
118 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
119 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
120 | 1 | ha->data.f32[0] = 1; |
121 | 1 | ha->data.f32[1] = 2; |
122 | 1 | ha->data.f32[2] = 3; |
123 | 1 | ha->data.f32[3] = 4; |
124 | 1 | ha->data.f32[4] = 5; |
125 | 1 | ha->data.f32[5] = 6; |
126 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
127 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0); |
128 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0); |
129 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
130 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
131 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
132 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
133 | 1 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
134 | 1 | ccv_nnc_tensor_free(ha); |
135 | 1 | ccv_nnc_tensor_free(hb); |
136 | 1 | ccv_nnc_tensor_free(a); |
137 | 1 | ccv_nnc_tensor_free(b); |
138 | 1 | ccv_nnc_tensor_free(bt); |
139 | 1 | } |
140 | | |
141 | | TEST_CASE("reduce mean backward") |
142 | 1 | { |
143 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_BACKWARD, CCV_NNC_BACKEND_MPS)); |
144 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
145 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
146 | 1 | hb->data.f32[0] = 1; |
147 | 1 | hb->data.f32[1] = 2; |
148 | 1 | hb->data.f32[2] = 3; |
149 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(ha), 0); |
150 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
151 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
152 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(b), 0); |
153 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(a), 0); |
154 | 1 | ccv_nnc_tensor_t* const at = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
155 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(at), 0); |
156 | 1 | REQUIRE_TENSOR_EQ(ha, at, "result should be equal"); |
157 | 1 | ccv_nnc_tensor_free(ha); |
158 | 1 | ccv_nnc_tensor_free(hb); |
159 | 1 | ccv_nnc_tensor_free(a); |
160 | 1 | ccv_nnc_tensor_free(b); |
161 | 1 | ccv_nnc_tensor_free(at); |
162 | 1 | } |
163 | | |
164 | | TEST_CASE("reduce max forward") |
165 | 1 | { |
166 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MAX_FORWARD, CCV_NNC_BACKEND_MPS)); |
167 | 0 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
168 | 0 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
169 | 0 | ha->data.f32[0] = 1; |
170 | 0 | ha->data.f32[1] = 2; |
171 | 0 | ha->data.f32[2] = 3; |
172 | 0 | ha->data.f32[3] = 4; |
173 | 0 | ha->data.f32[4] = 5; |
174 | 0 | ha->data.f32[5] = 6; |
175 | 0 | ccv_nnc_cmd_exec(CMD_REDUCE_MAX_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
176 | 0 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
177 | 0 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
178 | 0 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
179 | 0 | ccv_nnc_cmd_exec(CMD_REDUCE_MAX_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
180 | 0 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
181 | 0 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
182 | 0 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
183 | 0 | ccv_nnc_tensor_free(ha); |
184 | 0 | ccv_nnc_tensor_free(hb); |
185 | 0 | ccv_nnc_tensor_free(a); |
186 | 0 | ccv_nnc_tensor_free(b); |
187 | 0 | ccv_nnc_tensor_free(bt); |
188 | 0 | } |
189 | | |
190 | | TEST_CASE("reduce min forward") |
191 | 1 | { |
192 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MIN_FORWARD, CCV_NNC_BACKEND_MPS)); |
193 | 0 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
194 | 0 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
195 | 0 | ha->data.f32[0] = 1; |
196 | 0 | ha->data.f32[1] = 2; |
197 | 0 | ha->data.f32[2] = 3; |
198 | 0 | ha->data.f32[3] = 4; |
199 | 0 | ha->data.f32[4] = 5; |
200 | 0 | ha->data.f32[5] = 6; |
201 | 0 | ccv_nnc_cmd_exec(CMD_REDUCE_MIN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
202 | 0 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
203 | 0 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
204 | 0 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
205 | 0 | ccv_nnc_cmd_exec(CMD_REDUCE_MIN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
206 | 0 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
207 | 0 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
208 | 0 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
209 | 0 | ccv_nnc_tensor_free(ha); |
210 | 0 | ccv_nnc_tensor_free(hb); |
211 | 0 | ccv_nnc_tensor_free(a); |
212 | 0 | ccv_nnc_tensor_free(b); |
213 | 0 | ccv_nnc_tensor_free(bt); |
214 | 0 | } |
215 | | |
216 | | TEST_CASE("argmin with float") |
217 | | { |
218 | | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ARGMIN_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ARGMIN_FORWARD, CCV_NNC_BACKEND_MPS)); |
219 | | dsfmt_t dsfmt; |
220 | | dsfmt_init_gen_rand(&dsfmt, 0); |
221 | | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 3, 5, 3), 0); |
222 | | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0); |
223 | | int i; |
224 | | for (i = 0; i < 10 * 3 * 5 * 3; i++) |
225 | | ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
226 | | ccv_nnc_cmd_exec(CMD_ARGMIN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
227 | | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 3, 5, 3), 0); |
228 | | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 10, 1, 5, 3), 0); |
229 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
230 | | ccv_nnc_cmd_exec(CMD_ARGMIN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
231 | | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0); |
232 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
233 | | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
234 | | ccv_nnc_tensor_free(ha); |
235 | | ccv_nnc_tensor_free(hb); |
236 | | ccv_nnc_tensor_free(a); |
237 | | ccv_nnc_tensor_free(b); |
238 | | ccv_nnc_tensor_free(bt); |
239 | | } |
240 | | |
241 | | TEST_CASE("argmax with float") |
242 | 1 | { |
243 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ARGMAX_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ARGMAX_FORWARD, CCV_NNC_BACKEND_MPS)); |
244 | 1 | dsfmt_t dsfmt; |
245 | 1 | dsfmt_init_gen_rand(&dsfmt, 0); |
246 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 3, 5, 3), 0); |
247 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0); |
248 | 1 | int i; |
249 | 451 | for (i = 0; i < 10 * 3 * 5 * 3; i++450 ) |
250 | 450 | ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
251 | 1 | ccv_nnc_cmd_exec(CMD_ARGMAX_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
252 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 3, 5, 3), 0); |
253 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 10, 1, 5, 3), 0); |
254 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
255 | 1 | ccv_nnc_cmd_exec(CMD_ARGMAX_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
256 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0); |
257 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
258 | 1 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
259 | 1 | ccv_nnc_tensor_free(ha); |
260 | 1 | ccv_nnc_tensor_free(hb); |
261 | 1 | ccv_nnc_tensor_free(a); |
262 | 1 | ccv_nnc_tensor_free(b); |
263 | 1 | ccv_nnc_tensor_free(bt); |
264 | 1 | } |
265 | | |
266 | | TEST_CASE("reduce norm2 forward") |
267 | | { |
268 | | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS)); |
269 | | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
270 | | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
271 | | ha->data.f32[0] = 1; |
272 | | ha->data.f32[1] = 2; |
273 | | ha->data.f32[2] = 3; |
274 | | ha->data.f32[3] = 4; |
275 | | ha->data.f32[4] = 5; |
276 | | ha->data.f32[5] = 6; |
277 | | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
278 | | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
279 | | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
280 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
281 | | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
282 | | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
283 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
284 | | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
285 | | ccv_nnc_tensor_free(ha); |
286 | | ccv_nnc_tensor_free(hb); |
287 | | ccv_nnc_tensor_free(a); |
288 | | ccv_nnc_tensor_free(b); |
289 | | ccv_nnc_tensor_free(bt); |
290 | | } |
291 | | |
292 | | TEST_CASE("reduce norm2 forward noop") |
293 | 1 | { |
294 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS)); |
295 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
296 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
297 | 1 | ha->data.f32[0] = 1; |
298 | 1 | ha->data.f32[1] = 2; |
299 | 1 | ha->data.f32[2] = 3; |
300 | 1 | ha->data.f32[3] = 4; |
301 | 1 | ha->data.f32[4] = 5; |
302 | 1 | ha->data.f32[5] = 6; |
303 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
304 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0); |
305 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0); |
306 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
307 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
308 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0); |
309 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
310 | 1 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
311 | 1 | ccv_nnc_tensor_free(ha); |
312 | 1 | ccv_nnc_tensor_free(hb); |
313 | 1 | ccv_nnc_tensor_free(a); |
314 | 1 | ccv_nnc_tensor_free(b); |
315 | 1 | ccv_nnc_tensor_free(bt); |
316 | 1 | } |
317 | | |
318 | | TEST_CASE("reduce norm2 backward") |
319 | 1 | { |
320 | 1 | GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS)) && |
321 | 1 | (ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS))); |
322 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
323 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
324 | 1 | ha->data.f32[0] = 1; |
325 | 1 | ha->data.f32[1] = 2; |
326 | 1 | ha->data.f32[2] = 3; |
327 | 1 | ha->data.f32[3] = 4; |
328 | 1 | ha->data.f32[4] = 5; |
329 | 1 | ha->data.f32[5] = 6; |
330 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
331 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
332 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
333 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
334 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
335 | 1 | ccv_nnc_tensor_t* const hh = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
336 | 1 | ccv_nnc_tensor_t* const hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
337 | 1 | hg->data.f32[0] = 1; |
338 | 1 | hg->data.f32[1] = 2; |
339 | 1 | hg->data.f32[2] = 3; |
340 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hh), 0); |
341 | 1 | ccv_nnc_tensor_t* const h = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
342 | 1 | ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
343 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg), TENSOR_LIST(g), 0); |
344 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(h), 0); |
345 | 1 | ccv_nnc_tensor_t* const ht = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
346 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(h), TENSOR_LIST(ht), 0); |
347 | 1 | REQUIRE_TENSOR_EQ(hh, ht, "result should be equal"); |
348 | 1 | ccv_nnc_tensor_free(hh); |
349 | 1 | ccv_nnc_tensor_free(hg); |
350 | 1 | ccv_nnc_tensor_free(h); |
351 | 1 | ccv_nnc_tensor_free(g); |
352 | 1 | ccv_nnc_tensor_free(ha); |
353 | 1 | ccv_nnc_tensor_free(hb); |
354 | 1 | ccv_nnc_tensor_free(a); |
355 | 1 | ccv_nnc_tensor_free(b); |
356 | 1 | ccv_nnc_tensor_free(ht); |
357 | 1 | } |
358 | | |
359 | | TEST_CASE("reduce isnan float") |
360 | 1 | { |
361 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_MPS)); |
362 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
363 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 3), 0); |
364 | 1 | ha->data.f32[0] = NAN; |
365 | 1 | ha->data.f32[1] = 2; |
366 | 1 | ha->data.f32[2] = 3; |
367 | 1 | ha->data.f32[3] = 4; |
368 | 1 | ha->data.f32[4] = 5; |
369 | 1 | ha->data.f32[5] = 6; |
370 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
371 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
372 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 3), 0); |
373 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
374 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0); |
375 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 3), 0); |
376 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
377 | 1 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
378 | 1 | ccv_nnc_tensor_free(ha); |
379 | 1 | ccv_nnc_tensor_free(hb); |
380 | 1 | ccv_nnc_tensor_free(a); |
381 | 1 | ccv_nnc_tensor_free(b); |
382 | 1 | ccv_nnc_tensor_free(bt); |
383 | 1 | } |
384 | | |
385 | | TEST_CASE("reduce isnan in half precision") |
386 | 1 | { |
387 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_MPS)); |
388 | 1 | ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0); |
389 | 1 | ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0); |
390 | 1 | ha->data.f32[0] = NAN; |
391 | 1 | ha->data.f32[1] = 2; |
392 | 1 | ha->data.f32[2] = 3; |
393 | 1 | ha->data.f32[3] = 4; |
394 | 1 | ha->data.f32[4] = 5; |
395 | 1 | ha->data.f32[5] = 6; |
396 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0); |
397 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0); |
398 | 1 | ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 2, 3), 0); |
399 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 1), 0); |
400 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0); |
401 | 1 | ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0); |
402 | 1 | ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b), 0); |
403 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0); |
404 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0); |
405 | 1 | REQUIRE_TENSOR_EQ(hb, bt, "result should be equal"); |
406 | 1 | ccv_nnc_tensor_free(ha); |
407 | 1 | ccv_nnc_tensor_free(hb); |
408 | 1 | ccv_nnc_tensor_free(a); |
409 | 1 | ccv_nnc_tensor_free(a16); |
410 | 1 | ccv_nnc_tensor_free(b); |
411 | 1 | ccv_nnc_tensor_free(bt); |
412 | 1 | } |
413 | | |
414 | | #include "case_main.h" |