/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/cnnp.core.tests.c
Line | Count | Source |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include "3rdparty/dsfmt/dSFMT.h" |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | ccv_cnnp_model_t* _math_2_x_10() |
15 | 8 | { |
16 | 8 | ccv_cnnp_model_t* mul = ccv_cnnp_dense(1, 1, 0, 1, "mul"); |
17 | 8 | ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
18 | 8 | ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(input)); |
19 | 8 | ccv_cnnp_model_io_t fit = ccv_cnnp_input(); |
20 | | // Because we don't have L2 loss function available yet, manually create L2 loss. |
21 | 8 | ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply( |
22 | 8 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
23 | 8 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
24 | 8 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
25 | 8 | MODEL_IO_LIST(left_out, fit)); |
26 | 8 | ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply( |
27 | 8 | ccv_cnnp_cmd_exec(CMD_MUL_FORWARD(1), ccv_nnc_no_hint, 0, |
28 | 8 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
29 | 8 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
30 | 8 | MODEL_IO_LIST(diff, diff)); |
31 | 8 | return ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0); |
32 | 8 | } |
33 | | |
34 | | TEST_CASE("train a simple math 2 * x = 10, x = 5 and copy parameter to a new model entirely") |
35 | | { |
36 | | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
37 | | ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
38 | | ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
39 | | ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
40 | | ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF)); |
41 | | const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); |
42 | | GUARD_ELSE_RETURN(device_count >= 1); |
43 | | ccv_cnnp_model_t* const final = _math_2_x_10(); |
44 | | const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1); |
45 | | const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1); |
46 | | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
47 | | ccv_cnnp_model_set_data_parallel(final, device_count); |
48 | | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
49 | | ccv_nnc_tensor_param_t o = {}; |
50 | | ccv_cnnp_model_tensor_auto(final, &o, 1); |
51 | | ccv_nnc_tensor_t* a_tensor[device_count]; |
52 | | ccv_nnc_tensor_t* f_tensor[device_count]; |
53 | | ccv_nnc_tensor_t* o_tensor[device_count]; |
54 | | ccv_nnc_tensor_t* ingrad[device_count]; |
55 | | int i; |
56 | | for (i = 0; i < device_count; i++) |
57 | | { |
58 | | ccv_nnc_tensor_param_t ai = a; |
59 | | CCV_TENSOR_SET_DEVICE_ID(ai.type, i); |
60 | | a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0); |
61 | | ccv_nnc_tensor_param_t fi = f; |
62 | | CCV_TENSOR_SET_DEVICE_ID(fi.type, i); |
63 | | f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0); |
64 | | ccv_nnc_tensor_param_t oi = o; |
65 | | CCV_TENSOR_SET_DEVICE_ID(oi.type, i); |
66 | | o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0); |
67 | | ingrad[i] = ccv_nnc_tensor_new(0, oi, 0); |
68 | | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0); |
69 | | ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0); |
70 | | ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0); |
71 | | } |
72 | | ccv_nnc_tensor_t* inputs[device_count * 2]; |
73 | | for (i = 0; i < 10; i++) |
74 | | { |
75 | | int j; |
76 | | for (j = 0; j < device_count; j++) |
77 | | { |
78 | | inputs[j * 2] = a_tensor[j]; |
79 | | inputs[j * 2 + 1] = f_tensor[j]; |
80 | | } |
81 | | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
82 | | .requires_grad = 1, |
83 | | }, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
84 | | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
85 | | ccv_cnnp_model_apply_gradients(final, 0); |
86 | | } |
87 | | ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
88 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0); |
89 | | const float o_final = ho->data.f32[0]; |
90 | | ccv_cnnp_model_t* const final2 = _math_2_x_10(); |
91 | | ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
92 | | ccv_cnnp_model_set_data_parallel(final2, device_count); |
93 | | ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
94 | | for (i = 0; i < device_count; i++) |
95 | | { |
96 | | inputs[i * 2] = a_tensor[i]; |
97 | | inputs[i * 2 + 1] = f_tensor[i]; |
98 | | } |
99 | | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
100 | | for (i = 0; i < device_count; i++) |
101 | | { |
102 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
103 | | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output"); |
104 | | } |
105 | | ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0); |
106 | | for (i = 0; i < device_count; i++) |
107 | | { |
108 | | inputs[i * 2] = a_tensor[i]; |
109 | | inputs[i * 2 + 1] = f_tensor[i]; |
110 | | } |
111 | | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
112 | | for (i = 0; i < device_count; i++) |
113 | | { |
114 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
115 | | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 100, 1e-5, "should match the output when x is 0"); |
116 | | } |
117 | | ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final, 1); |
118 | | ccv_cnnp_model_set_data_parallel(final3, device_count); |
119 | | ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
120 | | for (i = 0; i < device_count; i++) |
121 | | { |
122 | | inputs[i * 2] = a_tensor[i]; |
123 | | inputs[i * 2 + 1] = f_tensor[i]; |
124 | | } |
125 | | ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
126 | | for (i = 0; i < device_count; i++) |
127 | | { |
128 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
129 | | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output"); |
130 | | } |
131 | | for (i = 0; i < device_count; i++) |
132 | | { |
133 | | ccv_nnc_tensor_free(a_tensor[i]); |
134 | | ccv_nnc_tensor_free(f_tensor[i]); |
135 | | ccv_nnc_tensor_free(o_tensor[i]); |
136 | | ccv_nnc_tensor_free(ingrad[i]); |
137 | | } |
138 | | ccv_nnc_tensor_free(ho); |
139 | | ccv_cnnp_model_free(final); |
140 | | ccv_cnnp_model_free(final2); |
141 | | ccv_cnnp_model_free(final3); |
142 | | } |
143 | | |
144 | | TEST_CASE("train a simple math 2 * x = 10, x = 5 and copy parameter to a new model entirely with a stream context") |
145 | 1 | { |
146 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
147 | 1 | ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
148 | 1 | ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
149 | 1 | ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
150 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF)); |
151 | 1 | const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); |
152 | 1 | GUARD_ELSE_RETURN(device_count >= 1); |
153 | 1 | ccv_cnnp_model_t* const final = _math_2_x_10(); |
154 | 1 | const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1); |
155 | 1 | const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1); |
156 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
157 | 1 | ccv_cnnp_model_set_data_parallel(final, device_count); |
158 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
159 | 1 | ccv_nnc_tensor_param_t o = {}; |
160 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
161 | 1 | ccv_nnc_tensor_t* a_tensor[device_count]; |
162 | 1 | ccv_nnc_tensor_t* f_tensor[device_count]; |
163 | 1 | ccv_nnc_tensor_t* o_tensor[device_count]; |
164 | 1 | ccv_nnc_tensor_t* ingrad[device_count]; |
165 | 1 | int i; |
166 | 5 | for (i = 0; i < device_count; i++4 ) |
167 | 4 | { |
168 | 4 | ccv_nnc_tensor_param_t ai = a; |
169 | 4 | CCV_TENSOR_SET_DEVICE_ID(ai.type, i); |
170 | 4 | a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0); |
171 | 4 | ccv_nnc_tensor_param_t fi = f; |
172 | 4 | CCV_TENSOR_SET_DEVICE_ID(fi.type, i); |
173 | 4 | f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0); |
174 | 4 | ccv_nnc_tensor_param_t oi = o; |
175 | 4 | CCV_TENSOR_SET_DEVICE_ID(oi.type, i); |
176 | 4 | o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0); |
177 | 4 | ingrad[i] = ccv_nnc_tensor_new(0, oi, 0); |
178 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0); |
179 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0); |
180 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0); |
181 | 4 | } |
182 | 1 | ccv_nnc_tensor_t* inputs[device_count * 2]; |
183 | 11 | for (i = 0; i < 10; i++10 ) |
184 | 10 | { |
185 | 10 | int j; |
186 | 50 | for (j = 0; j < device_count; j++40 ) |
187 | 40 | { |
188 | 40 | inputs[j * 2] = a_tensor[j]; |
189 | 40 | inputs[j * 2 + 1] = f_tensor[j]; |
190 | 40 | } |
191 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
192 | 10 | .requires_grad = 1, |
193 | 10 | }, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
194 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
195 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
196 | 10 | } |
197 | 1 | ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
198 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0); |
199 | 1 | const float o_final = ho->data.f32[0]; |
200 | 1 | ccv_cnnp_model_t* const final2 = _math_2_x_10(); |
201 | 1 | ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
202 | 1 | ccv_cnnp_model_set_data_parallel(final2, device_count); |
203 | 1 | ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
204 | 5 | for (i = 0; i < device_count; i++4 ) |
205 | 4 | { |
206 | 4 | inputs[i * 2] = a_tensor[i]; |
207 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
208 | 4 | } |
209 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
210 | 5 | for (i = 0; i < device_count; i++4 ) |
211 | 4 | { |
212 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
213 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output"); |
214 | 4 | } |
215 | 1 | ccv_nnc_stream_context_t* stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
216 | 1 | ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, 0, 0, stream_context); |
217 | 5 | for (i = 0; i < device_count; i++4 ) |
218 | 4 | { |
219 | 4 | inputs[i * 2] = a_tensor[i]; |
220 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
221 | 4 | } |
222 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, stream_context); |
223 | 1 | ccv_nnc_stream_context_wait(stream_context); |
224 | 5 | for (i = 0; i < device_count; i++4 ) |
225 | 4 | { |
226 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
227 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 100, 1e-5, "should match the output when x is 0"); |
228 | 4 | } |
229 | 1 | ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final, 1); |
230 | 1 | ccv_cnnp_model_set_data_parallel(final3, device_count); |
231 | 1 | ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
232 | 5 | for (i = 0; i < device_count; i++4 ) |
233 | 4 | { |
234 | 4 | inputs[i * 2] = a_tensor[i]; |
235 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
236 | 4 | } |
237 | 1 | ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
238 | 5 | for (i = 0; i < device_count; i++4 ) |
239 | 4 | { |
240 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
241 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output"); |
242 | 4 | } |
243 | 5 | for (i = 0; 1 i < device_count; i++4 ) |
244 | 4 | { |
245 | 4 | ccv_nnc_tensor_free(a_tensor[i]); |
246 | 4 | ccv_nnc_tensor_free(f_tensor[i]); |
247 | 4 | ccv_nnc_tensor_free(o_tensor[i]); |
248 | 4 | ccv_nnc_tensor_free(ingrad[i]); |
249 | 4 | } |
250 | 1 | ccv_nnc_tensor_free(ho); |
251 | 1 | ccv_cnnp_model_free(final); |
252 | 1 | ccv_cnnp_model_free(final2); |
253 | 1 | ccv_nnc_stream_context_free(stream_context); |
254 | 1 | ccv_cnnp_model_free(final3); |
255 | 1 | } |
256 | | |
257 | | TEST_CASE("train a simple math 2 * x = 10, x = 5 and merge parameters with a model") |
258 | 1 | { |
259 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
260 | 1 | ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
261 | 1 | ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
262 | 1 | ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
263 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF)); |
264 | 1 | const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); |
265 | 1 | ccv_cnnp_model_t* const final = _math_2_x_10(); |
266 | 1 | const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1); |
267 | 1 | const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1); |
268 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
269 | 1 | ccv_cnnp_model_set_data_parallel(final, device_count); |
270 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
271 | 1 | ccv_nnc_tensor_param_t o = {}; |
272 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
273 | 1 | ccv_nnc_tensor_t* a_tensor[device_count]; |
274 | 1 | ccv_nnc_tensor_t* f_tensor[device_count]; |
275 | 1 | ccv_nnc_tensor_t* o_tensor[device_count]; |
276 | 1 | ccv_nnc_tensor_t* ingrad[device_count]; |
277 | 1 | int i; |
278 | 5 | for (i = 0; i < device_count; i++4 ) |
279 | 4 | { |
280 | 4 | ccv_nnc_tensor_param_t ai = a; |
281 | 4 | CCV_TENSOR_SET_DEVICE_ID(ai.type, i); |
282 | 4 | a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0); |
283 | 4 | ccv_nnc_tensor_param_t fi = f; |
284 | 4 | CCV_TENSOR_SET_DEVICE_ID(fi.type, i); |
285 | 4 | f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0); |
286 | 4 | ccv_nnc_tensor_param_t oi = o; |
287 | 4 | CCV_TENSOR_SET_DEVICE_ID(oi.type, i); |
288 | 4 | o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0); |
289 | 4 | ingrad[i] = ccv_nnc_tensor_new(0, oi, 0); |
290 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0); |
291 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0); |
292 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0); |
293 | 4 | } |
294 | 1 | ccv_nnc_tensor_t* inputs[device_count * 2]; |
295 | 11 | for (i = 0; i < 10; i++10 ) |
296 | 10 | { |
297 | 10 | int j; |
298 | 50 | for (j = 0; j < device_count; j++40 ) |
299 | 40 | { |
300 | 40 | inputs[j * 2] = a_tensor[j]; |
301 | 40 | inputs[j * 2 + 1] = f_tensor[j]; |
302 | 40 | } |
303 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
304 | 10 | .requires_grad = 1, |
305 | 10 | }, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
306 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
307 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
308 | 10 | } |
309 | 1 | ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
310 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0); |
311 | 1 | const float o_final = ho->data.f32[0]; |
312 | 1 | ccv_cnnp_model_t* const final2 = _math_2_x_10(); |
313 | 1 | ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
314 | 1 | ccv_cnnp_model_set_data_parallel(final2, device_count); |
315 | 1 | ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
316 | 5 | for (i = 0; i < device_count; i++4 ) |
317 | 4 | { |
318 | 4 | inputs[i * 2] = a_tensor[i]; |
319 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
320 | 4 | } |
321 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
322 | 5 | for (i = 0; i < device_count; i++4 ) |
323 | 4 | { |
324 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
325 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output"); |
326 | 4 | } |
327 | 1 | ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0); |
328 | 5 | for (i = 0; i < device_count; i++4 ) |
329 | 4 | { |
330 | 4 | inputs[i * 2] = a_tensor[i]; |
331 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
332 | 4 | } |
333 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
334 | 5 | for (i = 0; i < device_count; i++4 ) |
335 | 4 | { |
336 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
337 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 64, 1e-5, "should match the output when x is 1"); |
338 | 4 | } |
339 | 1 | ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
340 | 5 | for (i = 0; i < device_count; i++4 ) |
341 | 4 | { |
342 | 4 | inputs[i * 2] = a_tensor[i]; |
343 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
344 | 4 | } |
345 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
346 | 1 | ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
347 | 1 | ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor); |
348 | 1 | const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6; |
349 | 5 | for (i = 0; i < device_count; i++4 ) |
350 | 4 | { |
351 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
352 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], (x_final * 2 - 10) * (x_final * 2 - 10), 1e-5, "should match the previous output"); |
353 | 4 | } |
354 | 5 | for (i = 0; 1 i < device_count; i++4 ) |
355 | 4 | { |
356 | 4 | ccv_nnc_tensor_free(a_tensor[i]); |
357 | 4 | ccv_nnc_tensor_free(f_tensor[i]); |
358 | 4 | ccv_nnc_tensor_free(o_tensor[i]); |
359 | 4 | ccv_nnc_tensor_free(ingrad[i]); |
360 | 4 | } |
361 | 1 | ccv_nnc_tensor_free(ho); |
362 | 1 | ccv_nnc_tensor_free(x_tensor); |
363 | 1 | ccv_cnnp_model_free(final); |
364 | 1 | ccv_cnnp_model_free(final2); |
365 | 1 | } |
366 | | |
367 | | TEST_CASE("train a simple math 2 * x = 10, x = 5 and merge parameters with a model with a stream context") |
368 | 1 | { |
369 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
370 | 1 | ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
371 | 1 | ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
372 | 1 | ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
373 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF)); |
374 | 1 | const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); |
375 | 1 | ccv_cnnp_model_t* const final = _math_2_x_10(); |
376 | 1 | const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1); |
377 | 1 | const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1); |
378 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
379 | 1 | ccv_cnnp_model_set_data_parallel(final, device_count); |
380 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
381 | 1 | ccv_nnc_tensor_param_t o = {}; |
382 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
383 | 1 | ccv_nnc_tensor_t* a_tensor[device_count]; |
384 | 1 | ccv_nnc_tensor_t* f_tensor[device_count]; |
385 | 1 | ccv_nnc_tensor_t* o_tensor[device_count]; |
386 | 1 | ccv_nnc_tensor_t* ingrad[device_count]; |
387 | 1 | int i; |
388 | 5 | for (i = 0; i < device_count; i++4 ) |
389 | 4 | { |
390 | 4 | ccv_nnc_tensor_param_t ai = a; |
391 | 4 | CCV_TENSOR_SET_DEVICE_ID(ai.type, i); |
392 | 4 | a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0); |
393 | 4 | ccv_nnc_tensor_param_t fi = f; |
394 | 4 | CCV_TENSOR_SET_DEVICE_ID(fi.type, i); |
395 | 4 | f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0); |
396 | 4 | ccv_nnc_tensor_param_t oi = o; |
397 | 4 | CCV_TENSOR_SET_DEVICE_ID(oi.type, i); |
398 | 4 | o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0); |
399 | 4 | ingrad[i] = ccv_nnc_tensor_new(0, oi, 0); |
400 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0); |
401 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0); |
402 | 4 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0); |
403 | 4 | } |
404 | 1 | ccv_nnc_tensor_t* inputs[device_count * 2]; |
405 | 11 | for (i = 0; i < 10; i++10 ) |
406 | 10 | { |
407 | 10 | int j; |
408 | 50 | for (j = 0; j < device_count; j++40 ) |
409 | 40 | { |
410 | 40 | inputs[j * 2] = a_tensor[j]; |
411 | 40 | inputs[j * 2 + 1] = f_tensor[j]; |
412 | 40 | } |
413 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
414 | 10 | .requires_grad = 1, |
415 | 10 | }, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
416 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
417 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
418 | 10 | } |
419 | 1 | ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
420 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0); |
421 | 1 | const float o_final = ho->data.f32[0]; |
422 | 1 | ccv_cnnp_model_t* const final2 = _math_2_x_10(); |
423 | 1 | ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP()); |
424 | 1 | ccv_cnnp_model_set_data_parallel(final2, device_count); |
425 | 1 | ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
426 | 5 | for (i = 0; i < device_count; i++4 ) |
427 | 4 | { |
428 | 4 | inputs[i * 2] = a_tensor[i]; |
429 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
430 | 4 | } |
431 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
432 | 5 | for (i = 0; i < device_count; i++4 ) |
433 | 4 | { |
434 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
435 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output"); |
436 | 4 | } |
437 | 1 | ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0); |
438 | 5 | for (i = 0; i < device_count; i++4 ) |
439 | 4 | { |
440 | 4 | inputs[i * 2] = a_tensor[i]; |
441 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
442 | 4 | } |
443 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0); |
444 | 5 | for (i = 0; i < device_count; i++4 ) |
445 | 4 | { |
446 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
447 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 64, 1e-5, "should match the output when x is 1"); |
448 | 4 | } |
449 | 1 | ccv_nnc_stream_context_t* stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
450 | 1 | ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, 0, 0, 0, stream_context, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
451 | 5 | for (i = 0; i < device_count; i++4 ) |
452 | 4 | { |
453 | 4 | inputs[i * 2] = a_tensor[i]; |
454 | 4 | inputs[i * 2 + 1] = f_tensor[i]; |
455 | 4 | } |
456 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, stream_context); |
457 | 1 | ccv_nnc_stream_context_wait(stream_context); |
458 | 1 | ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
459 | 1 | ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor); |
460 | 1 | const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6; |
461 | 5 | for (i = 0; i < device_count; i++4 ) |
462 | 4 | { |
463 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0); |
464 | 4 | REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], (x_final * 2 - 10) * (x_final * 2 - 10), 1e-5, "should match the previous output"); |
465 | 4 | } |
466 | 5 | for (i = 0; 1 i < device_count; i++4 ) |
467 | 4 | { |
468 | 4 | ccv_nnc_tensor_free(a_tensor[i]); |
469 | 4 | ccv_nnc_tensor_free(f_tensor[i]); |
470 | 4 | ccv_nnc_tensor_free(o_tensor[i]); |
471 | 4 | ccv_nnc_tensor_free(ingrad[i]); |
472 | 4 | } |
473 | 1 | ccv_nnc_tensor_free(ho); |
474 | 1 | ccv_nnc_tensor_free(x_tensor); |
475 | 1 | ccv_cnnp_model_free(final); |
476 | 1 | ccv_cnnp_model_free(final2); |
477 | 1 | ccv_nnc_stream_context_free(stream_context); |
478 | 1 | } |
479 | | |
480 | | #include "case_main.h" |