/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/scatter_add.tests.c
Line | Count | Source |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include <3rdparty/dsfmt/dSFMT.h> |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | TEST_CASE("scatter add a tensor") |
15 | 1 | { |
16 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_MPS)); |
17 | 1 | float bp[] = { |
18 | 1 | 1, 2, |
19 | 1 | 2, 3, |
20 | 1 | }; |
21 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 2), 0); |
22 | 1 | int ip[] = {1, 1}; |
23 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(ip, CPU_TENSOR_NHWC(32S, 2), 0); |
24 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(bp, CPU_TENSOR_NHWC(32F, 2, 2), 0); |
25 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3, 2), 0); |
26 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 2), 0); |
27 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 2), 0); |
28 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(gb, gindices), 0); |
29 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_FORWARD(3), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gindices), TENSOR_LIST(ga), 0); |
30 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga), TENSOR_LIST(a), 0); |
31 | 1 | float atp[] = { |
32 | 1 | 0, 0, |
33 | 1 | 3, 5, |
34 | 1 | 0, 0, |
35 | 1 | }; |
36 | 1 | ccv_nnc_tensor_t const at = ccv_nnc_tensor(atp, CPU_TENSOR_NHWC(32F, 3, 2), 0); |
37 | 1 | REQUIRE_TENSOR_EQ(a, &at, "should be equal"); |
38 | 1 | ccv_nnc_tensor_free(a); |
39 | 1 | ccv_nnc_tensor_free(indices); |
40 | 1 | ccv_nnc_tensor_free(b); |
41 | 1 | ccv_nnc_tensor_free(ga); |
42 | 1 | ccv_nnc_tensor_free(gindices); |
43 | 1 | ccv_nnc_tensor_free(gb); |
44 | 1 | } |
45 | | |
46 | | TEST_CASE("scatter add a 1d tensor") |
47 | 1 | { |
48 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_MPS)); |
49 | 1 | float bp[] = { |
50 | 1 | 4, 3, 5, |
51 | 1 | }; |
52 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5), 0); |
53 | 1 | int ip[] = {3, 2, 4}; |
54 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(ip, CPU_TENSOR_NHWC(32S, 3), 0); |
55 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(bp, CPU_TENSOR_NHWC(32F, 3), 0); |
56 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5), 0); |
57 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 3), 0); |
58 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
59 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(gb, gindices), 0); |
60 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_FORWARD(5), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gindices), TENSOR_LIST(ga), 0); |
61 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga), TENSOR_LIST(a), 0); |
62 | 1 | float atp[] = { |
63 | 1 | 0, 0, 3, 4, 5 |
64 | 1 | }; |
65 | 1 | ccv_nnc_tensor_t const at = ccv_nnc_tensor(atp, CPU_TENSOR_NHWC(32F, 5), 0); |
66 | 1 | REQUIRE_TENSOR_EQ(a, &at, "should be equal"); |
67 | 1 | ccv_nnc_tensor_free(a); |
68 | 1 | ccv_nnc_tensor_free(indices); |
69 | 1 | ccv_nnc_tensor_free(b); |
70 | 1 | ccv_nnc_tensor_free(ga); |
71 | 1 | ccv_nnc_tensor_free(gindices); |
72 | 1 | ccv_nnc_tensor_free(gb); |
73 | 1 | } |
74 | | |
75 | | TEST_CASE("scatter add a tensor view") |
76 | 1 | { |
77 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_MPS)); |
78 | 1 | float bp[] = { |
79 | 1 | 0, 3, 4, 0, |
80 | 1 | 0, 1, 5, 0, |
81 | 1 | }; |
82 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 4), 0); |
83 | 1 | int i; |
84 | 13 | for (i = 0; i < 3 * 4; i++12 ) |
85 | 12 | a->data.f32[i] = i; |
86 | 1 | ccv_nnc_tensor_view_t* const av = ccv_nnc_tensor_view_new(a, CPU_TENSOR_NHWC(32F, 3, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
87 | 1 | int ip[] = {1, 1}; |
88 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(ip, CPU_TENSOR_NHWC(32S, 2), 0); |
89 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(bp, CPU_TENSOR_NHWC(32F, 2, 4), 0); |
90 | 1 | ccv_nnc_tensor_view_t* const bv = ccv_nnc_tensor_view_new(b, CPU_TENSOR_NHWC(32F, 2, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
91 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3, 4), 0); |
92 | 1 | ccv_nnc_tensor_view_t* const gav = ccv_nnc_tensor_view_new(ga, GPU_TENSOR_NHWC(000, 32F, 3, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
93 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 2), 0); |
94 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 4), 0); |
95 | 1 | ccv_nnc_tensor_view_t* const gbv = ccv_nnc_tensor_view_new(gb, GPU_TENSOR_NHWC(000, 32F, 2, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
96 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, indices, b), TENSOR_LIST(ga, gindices, gb), 0); |
97 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_FORWARD(3), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)gbv, gindices), TENSOR_LIST((ccv_nnc_tensor_t*)gav), 0); |
98 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga), TENSOR_LIST(a), 0); |
99 | 1 | float atp[] = { |
100 | 1 | 0, 0, 0, 3, |
101 | 1 | 4, 4, 9, 7, |
102 | 1 | 8, 0, 0, 11, |
103 | 1 | }; |
104 | 1 | ccv_nnc_tensor_t const at = ccv_nnc_tensor(atp, CPU_TENSOR_NHWC(32F, 3, 4), 0); |
105 | 1 | REQUIRE_TENSOR_EQ(a, &at, "should be equal"); |
106 | 1 | ccv_nnc_tensor_free(a); |
107 | 1 | ccv_nnc_tensor_view_free(av); |
108 | 1 | ccv_nnc_tensor_free(indices); |
109 | 1 | ccv_nnc_tensor_free(b); |
110 | 1 | ccv_nnc_tensor_view_free(bv); |
111 | 1 | ccv_nnc_tensor_free(ga); |
112 | 1 | ccv_nnc_tensor_view_free(gav); |
113 | 1 | ccv_nnc_tensor_free(gindices); |
114 | 1 | ccv_nnc_tensor_free(gb); |
115 | 1 | ccv_nnc_tensor_view_free(gbv); |
116 | 1 | } |
117 | | |
118 | | TEST_CASE("backward scatter add a tensor") |
119 | 1 | { |
120 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_MPS)); |
121 | 1 | float ap[] = { |
122 | 1 | 1, 2, |
123 | 1 | 2, 3, |
124 | 1 | 3, 4, |
125 | 1 | }; |
126 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(ap, CPU_TENSOR_NHWC(32F, 3, 2), 0); |
127 | 1 | int ip[] = {1, 1}; |
128 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(ip, CPU_TENSOR_NHWC(32S, 2), 0); |
129 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0); |
130 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3, 2), 0); |
131 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 2), 0); |
132 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 2), 0); |
133 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, indices), TENSOR_LIST(ga, gindices), 0); |
134 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_BACKWARD(3), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, 0, gindices), TENSOR_LIST(gb), 0); |
135 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb), TENSOR_LIST(b), 0); |
136 | 1 | float btp[] = { |
137 | 1 | 2, 3, |
138 | 1 | 2, 3, |
139 | 1 | }; |
140 | 1 | ccv_nnc_tensor_t const bt = ccv_nnc_tensor(btp, CPU_TENSOR_NHWC(32F, 2, 2), 0); |
141 | 1 | REQUIRE_TENSOR_EQ(b, &bt, "should be equal"); |
142 | 1 | ccv_nnc_tensor_free(a); |
143 | 1 | ccv_nnc_tensor_free(indices); |
144 | 1 | ccv_nnc_tensor_free(b); |
145 | 1 | ccv_nnc_tensor_free(ga); |
146 | 1 | ccv_nnc_tensor_free(gindices); |
147 | 1 | ccv_nnc_tensor_free(gb); |
148 | 1 | } |
149 | | |
150 | | TEST_CASE("backward scatter add a 1d tensor") |
151 | 1 | { |
152 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_MPS)); |
153 | 1 | float ap[] = { |
154 | 1 | 1, 2, 3, 4, 5 |
155 | 1 | }; |
156 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(ap, CPU_TENSOR_NHWC(32F, 5), 0); |
157 | 1 | int ip[] = {3, 2, 4}; |
158 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(ip, CPU_TENSOR_NHWC(32S, 3), 0); |
159 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0); |
160 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5), 0); |
161 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 3), 0); |
162 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0); |
163 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, indices), TENSOR_LIST(ga, gindices), 0); |
164 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_BACKWARD(5), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, 0, gindices), TENSOR_LIST(gb), 0); |
165 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb), TENSOR_LIST(b), 0); |
166 | 1 | float btp[] = { |
167 | 1 | 4, 3, 5 |
168 | 1 | }; |
169 | 1 | ccv_nnc_tensor_t const bt = ccv_nnc_tensor(btp, CPU_TENSOR_NHWC(32F, 3), 0); |
170 | 1 | REQUIRE_TENSOR_EQ(b, &bt, "should be equal"); |
171 | 1 | ccv_nnc_tensor_free(a); |
172 | 1 | ccv_nnc_tensor_free(indices); |
173 | 1 | ccv_nnc_tensor_free(b); |
174 | 1 | ccv_nnc_tensor_free(ga); |
175 | 1 | ccv_nnc_tensor_free(gindices); |
176 | 1 | ccv_nnc_tensor_free(gb); |
177 | 1 | } |
178 | | |
179 | | TEST_CASE("backward scatter add a tensor view") |
180 | 1 | { |
181 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_MPS)); |
182 | 1 | float ap[] = { |
183 | 1 | 1, 2, 3, 4, |
184 | 1 | 2, 3, 4, 5, |
185 | 1 | 3, 4, 5, 6, |
186 | 1 | }; |
187 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(ap, CPU_TENSOR_NHWC(32F, 3, 4), 0); |
188 | 1 | ccv_nnc_tensor_view_t* const av = ccv_nnc_tensor_view_new(a, CPU_TENSOR_NHWC(32F, 3, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
189 | 1 | int ip[] = {1, 1}; |
190 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(ip, CPU_TENSOR_NHWC(32S, 2), 0); |
191 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 4), 0); |
192 | 1 | memset(b->data.f32, 0, 2 * 4 * sizeof(float)); |
193 | 1 | ccv_nnc_tensor_view_t* const bv = ccv_nnc_tensor_view_new(b, CPU_TENSOR_NHWC(32F, 2, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
194 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3, 4), 0); |
195 | 1 | ccv_nnc_tensor_view_t* const gav = ccv_nnc_tensor_view_new(ga, GPU_TENSOR_NHWC(000, 32F, 3, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
196 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 2), 0); |
197 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 4), 0); |
198 | 1 | ccv_nnc_tensor_view_t* const gbv = ccv_nnc_tensor_view_new(gb, GPU_TENSOR_NHWC(000, 32F, 2, 2), DIM_ALLOC(0, 1), DIM_ALLOC(4, 1)); |
199 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, indices, b), TENSOR_LIST(ga, gindices, gb), 0); |
200 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_BACKWARD(3), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)gav, 0, gindices), TENSOR_LIST((ccv_nnc_tensor_t*)gbv), 0); |
201 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb), TENSOR_LIST(b), 0); |
202 | 1 | float btp[] = { |
203 | 1 | 0, 3, 4, 0, |
204 | 1 | 0, 3, 4, 0, |
205 | 1 | }; |
206 | 1 | ccv_nnc_tensor_t const bt = ccv_nnc_tensor(btp, CPU_TENSOR_NHWC(32F, 2, 4), 0); |
207 | 1 | REQUIRE_TENSOR_EQ(b, &bt, "should be equal"); |
208 | 1 | ccv_nnc_tensor_free(a); |
209 | 1 | ccv_nnc_tensor_view_free(av); |
210 | 1 | ccv_nnc_tensor_free(indices); |
211 | 1 | ccv_nnc_tensor_free(b); |
212 | 1 | ccv_nnc_tensor_view_free(bv); |
213 | 1 | ccv_nnc_tensor_free(ga); |
214 | 1 | ccv_nnc_tensor_view_free(gav); |
215 | 1 | ccv_nnc_tensor_free(gindices); |
216 | 1 | ccv_nnc_tensor_free(gb); |
217 | 1 | ccv_nnc_tensor_view_free(gbv); |
218 | 1 | } |
219 | | |
220 | | TEST_CASE("scatter add with half precision") |
221 | 1 | { |
222 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_MPS)); |
223 | 1 | dsfmt_t dsfmt; |
224 | 1 | dsfmt_init_gen_rand(&dsfmt, 0); |
225 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0); |
226 | 1 | int i; |
227 | 101 | for (i = 0; i < 10 * 10; i++100 ) |
228 | 100 | a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
229 | 1 | ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 10), 0); |
230 | 1 | ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0); |
231 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10), 0); |
232 | 11 | for (i = 0; i < 10; i++10 ) |
233 | 10 | indices->data.i32[i] = i * 9 + 1; |
234 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 10), 0); |
235 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 10), 0); |
236 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 10), 0); |
237 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 10), 0); |
238 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, indices), TENSOR_LIST(ga, gindices), 0); |
239 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_FORWARD(100), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gindices), TENSOR_LIST(gb), 0); |
240 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb), TENSOR_LIST(b), 0); |
241 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 10), 0); |
242 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_FORWARD(100), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, indices), TENSOR_LIST(bt), 0); |
243 | 1 | ccv_nnc_tensor_t* const b32 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10), 0); |
244 | 1 | ccv_nnc_tensor_t* const bt32 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10), 0); |
245 | 1 | ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, bt), TENSOR_LIST(b32, bt32), 0); |
246 | 1 | REQUIRE_TENSOR_EQ(b32, bt32, "should be equal"); |
247 | 1 | ccv_nnc_tensor_free(a); |
248 | 1 | ccv_nnc_tensor_free(indices); |
249 | 1 | ccv_nnc_tensor_free(b); |
250 | 1 | ccv_nnc_tensor_free(ga); |
251 | 1 | ccv_nnc_tensor_free(gindices); |
252 | 1 | ccv_nnc_tensor_free(gb); |
253 | 1 | ccv_nnc_tensor_free(a16); |
254 | 1 | ccv_nnc_tensor_free(bt); |
255 | 1 | ccv_nnc_tensor_free(b32); |
256 | 1 | ccv_nnc_tensor_free(bt32); |
257 | 1 | } |
258 | | |
259 | | TEST_CASE("backward scatter add with half precision") |
260 | 1 | { |
261 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_MPS)); |
262 | 1 | dsfmt_t dsfmt; |
263 | 1 | dsfmt_init_gen_rand(&dsfmt, 0); |
264 | 1 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10), 0); |
265 | 1 | int i; |
266 | 1.00k | for (i = 0; i < 100 * 10; i++1.00k ) |
267 | 1.00k | a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
268 | 1 | ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 10), 0); |
269 | 1 | ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0); |
270 | 1 | ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10), 0); |
271 | 11 | for (i = 0; i < 10; i++10 ) |
272 | 10 | indices->data.i32[i] = i * 9 + 1; |
273 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 10), 0); |
274 | 1 | ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 10), 0); |
275 | 1 | ccv_nnc_tensor_t* const gindices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 10), 0); |
276 | 1 | ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 10), 0); |
277 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, indices), TENSOR_LIST(ga, gindices), 0); |
278 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_BACKWARD(100), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, 0, gindices), TENSOR_LIST(gb), 0); |
279 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb), TENSOR_LIST(b), 0); |
280 | 1 | ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 10), 0); |
281 | 1 | ccv_nnc_cmd_exec(CMD_SCATTER_ADD_BACKWARD(100), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, 0, indices), TENSOR_LIST(bt), 0); |
282 | 1 | ccv_nnc_tensor_t* const b32 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0); |
283 | 1 | ccv_nnc_tensor_t* const bt32 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0); |
284 | 1 | ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, bt), TENSOR_LIST(b32, bt32), 0); |
285 | 1 | REQUIRE_TENSOR_EQ(b32, bt32, "should be equal"); |
286 | 1 | ccv_nnc_tensor_free(a); |
287 | 1 | ccv_nnc_tensor_free(indices); |
288 | 1 | ccv_nnc_tensor_free(b); |
289 | 1 | ccv_nnc_tensor_free(ga); |
290 | 1 | ccv_nnc_tensor_free(gindices); |
291 | 1 | ccv_nnc_tensor_free(gb); |
292 | 1 | ccv_nnc_tensor_free(a16); |
293 | 1 | ccv_nnc_tensor_free(bt); |
294 | 1 | ccv_nnc_tensor_free(b32); |
295 | 1 | ccv_nnc_tensor_free(bt32); |
296 | 1 | } |
297 | | |
298 | | #include "case_main.h" |