Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/cnnp.core.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
ccv_cnnp_model_t* _math_2_x_10()
15
8
{
16
8
  ccv_cnnp_model_t* mul = ccv_cnnp_dense(1, 1, 0, 1, "mul");
17
8
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
18
8
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(input));
19
8
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
20
  // Because we don't have L2 loss function available yet, manually create L2 loss.
21
8
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
22
8
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
23
8
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
24
8
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
25
8
    MODEL_IO_LIST(left_out, fit));
26
8
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
27
8
    ccv_cnnp_cmd_exec(CMD_MUL_FORWARD(1), ccv_nnc_no_hint, 0,
28
8
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
29
8
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
30
8
    MODEL_IO_LIST(diff, diff));
31
8
  return ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0);
32
8
}
33
34
TEST_CASE("train a simple math 2 * x = 10, x = 5 and copy parameter to a new model entirely")
35
{
36
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
37
    ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
38
    ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
39
    ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
40
    ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
41
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
42
  GUARD_ELSE_RETURN(device_count >= 1);
43
  ccv_cnnp_model_t* const final = _math_2_x_10();
44
  const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1);
45
  const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1);
46
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
47
  ccv_cnnp_model_set_data_parallel(final, device_count);
48
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
49
  ccv_nnc_tensor_param_t o = {};
50
  ccv_cnnp_model_tensor_auto(final, &o, 1);
51
  ccv_nnc_tensor_t* a_tensor[device_count];
52
  ccv_nnc_tensor_t* f_tensor[device_count];
53
  ccv_nnc_tensor_t* o_tensor[device_count];
54
  ccv_nnc_tensor_t* ingrad[device_count];
55
  int i;
56
  for (i = 0; i < device_count; i++)
57
  {
58
    ccv_nnc_tensor_param_t ai = a;
59
    CCV_TENSOR_SET_DEVICE_ID(ai.type, i);
60
    a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0);
61
    ccv_nnc_tensor_param_t fi = f;
62
    CCV_TENSOR_SET_DEVICE_ID(fi.type, i);
63
    f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0);
64
    ccv_nnc_tensor_param_t oi = o;
65
    CCV_TENSOR_SET_DEVICE_ID(oi.type, i);
66
    o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0);
67
    ingrad[i] = ccv_nnc_tensor_new(0, oi, 0);
68
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0);
69
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0);
70
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0);
71
  }
72
  ccv_nnc_tensor_t* inputs[device_count * 2];
73
  for (i = 0; i < 10; i++)
74
  {
75
    int j;
76
    for (j = 0; j < device_count; j++)
77
    {
78
      inputs[j * 2] = a_tensor[j];
79
      inputs[j * 2 + 1] = f_tensor[j];
80
    }
81
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
82
      .requires_grad = 1,
83
    }, inputs, device_count * 2, o_tensor, device_count, 0, 0);
84
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
85
    ccv_cnnp_model_apply_gradients(final, 0);
86
  }
87
  ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
88
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0);
89
  const float o_final = ho->data.f32[0];
90
  ccv_cnnp_model_t* const final2 = _math_2_x_10();
91
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
92
  ccv_cnnp_model_set_data_parallel(final2, device_count);
93
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
94
  for (i = 0; i < device_count; i++)
95
  {
96
    inputs[i * 2] = a_tensor[i];
97
    inputs[i * 2 + 1] = f_tensor[i];
98
  }
99
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
100
  for (i = 0; i < device_count; i++)
101
  {
102
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
103
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output");
104
  }
105
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0);
106
  for (i = 0; i < device_count; i++)
107
  {
108
    inputs[i * 2] = a_tensor[i];
109
    inputs[i * 2 + 1] = f_tensor[i];
110
  }
111
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
112
  for (i = 0; i < device_count; i++)
113
  {
114
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
115
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 100, 1e-5, "should match the output when x is 0");
116
  }
117
  ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final, 1);
118
  ccv_cnnp_model_set_data_parallel(final3, device_count);
119
  ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
120
  for (i = 0; i < device_count; i++)
121
  {
122
    inputs[i * 2] = a_tensor[i];
123
    inputs[i * 2 + 1] = f_tensor[i];
124
  }
125
  ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
126
  for (i = 0; i < device_count; i++)
127
  {
128
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
129
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output");
130
  }
131
  for (i = 0; i < device_count; i++)
132
  {
133
    ccv_nnc_tensor_free(a_tensor[i]);
134
    ccv_nnc_tensor_free(f_tensor[i]);
135
    ccv_nnc_tensor_free(o_tensor[i]);
136
    ccv_nnc_tensor_free(ingrad[i]);
137
  }
138
  ccv_nnc_tensor_free(ho);
139
  ccv_cnnp_model_free(final);
140
  ccv_cnnp_model_free(final2);
141
  ccv_cnnp_model_free(final3);
142
}
143
144
TEST_CASE("train a simple math 2 * x = 10, x = 5 and copy parameter to a new model entirely with a stream context")
145
1
{
146
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
147
1
    ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
148
1
    ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
149
1
    ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
150
1
    ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
151
1
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
152
1
  GUARD_ELSE_RETURN(device_count >= 1);
153
1
  ccv_cnnp_model_t* const final = _math_2_x_10();
154
1
  const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1);
155
1
  const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1);
156
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
157
1
  ccv_cnnp_model_set_data_parallel(final, device_count);
158
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
159
1
  ccv_nnc_tensor_param_t o = {};
160
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
161
1
  ccv_nnc_tensor_t* a_tensor[device_count];
162
1
  ccv_nnc_tensor_t* f_tensor[device_count];
163
1
  ccv_nnc_tensor_t* o_tensor[device_count];
164
1
  ccv_nnc_tensor_t* ingrad[device_count];
165
1
  int i;
166
5
  for (i = 0; i < device_count; 
i++4
)
167
4
  {
168
4
    ccv_nnc_tensor_param_t ai = a;
169
4
    CCV_TENSOR_SET_DEVICE_ID(ai.type, i);
170
4
    a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0);
171
4
    ccv_nnc_tensor_param_t fi = f;
172
4
    CCV_TENSOR_SET_DEVICE_ID(fi.type, i);
173
4
    f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0);
174
4
    ccv_nnc_tensor_param_t oi = o;
175
4
    CCV_TENSOR_SET_DEVICE_ID(oi.type, i);
176
4
    o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0);
177
4
    ingrad[i] = ccv_nnc_tensor_new(0, oi, 0);
178
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0);
179
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0);
180
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0);
181
4
  }
182
1
  ccv_nnc_tensor_t* inputs[device_count * 2];
183
11
  for (i = 0; i < 10; 
i++10
)
184
10
  {
185
10
    int j;
186
50
    for (j = 0; j < device_count; 
j++40
)
187
40
    {
188
40
      inputs[j * 2] = a_tensor[j];
189
40
      inputs[j * 2 + 1] = f_tensor[j];
190
40
    }
191
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
192
10
      .requires_grad = 1,
193
10
    }, inputs, device_count * 2, o_tensor, device_count, 0, 0);
194
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
195
10
    ccv_cnnp_model_apply_gradients(final, 0);
196
10
  }
197
1
  ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
198
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0);
199
1
  const float o_final = ho->data.f32[0];
200
1
  ccv_cnnp_model_t* const final2 = _math_2_x_10();
201
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
202
1
  ccv_cnnp_model_set_data_parallel(final2, device_count);
203
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
204
5
  for (i = 0; i < device_count; 
i++4
)
205
4
  {
206
4
    inputs[i * 2] = a_tensor[i];
207
4
    inputs[i * 2 + 1] = f_tensor[i];
208
4
  }
209
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
210
5
  for (i = 0; i < device_count; 
i++4
)
211
4
  {
212
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
213
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output");
214
4
  }
215
1
  ccv_nnc_stream_context_t* stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
216
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, 0, 0, stream_context);
217
5
  for (i = 0; i < device_count; 
i++4
)
218
4
  {
219
4
    inputs[i * 2] = a_tensor[i];
220
4
    inputs[i * 2 + 1] = f_tensor[i];
221
4
  }
222
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, stream_context);
223
1
  ccv_nnc_stream_context_wait(stream_context);
224
5
  for (i = 0; i < device_count; 
i++4
)
225
4
  {
226
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
227
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 100, 1e-5, "should match the output when x is 0");
228
4
  }
229
1
  ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final, 1);
230
1
  ccv_cnnp_model_set_data_parallel(final3, device_count);
231
1
  ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
232
5
  for (i = 0; i < device_count; 
i++4
)
233
4
  {
234
4
    inputs[i * 2] = a_tensor[i];
235
4
    inputs[i * 2 + 1] = f_tensor[i];
236
4
  }
237
1
  ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
238
5
  for (i = 0; i < device_count; 
i++4
)
239
4
  {
240
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
241
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output");
242
4
  }
243
5
  
for (i = 0; 1
i < device_count;
i++4
)
244
4
  {
245
4
    ccv_nnc_tensor_free(a_tensor[i]);
246
4
    ccv_nnc_tensor_free(f_tensor[i]);
247
4
    ccv_nnc_tensor_free(o_tensor[i]);
248
4
    ccv_nnc_tensor_free(ingrad[i]);
249
4
  }
250
1
  ccv_nnc_tensor_free(ho);
251
1
  ccv_cnnp_model_free(final);
252
1
  ccv_cnnp_model_free(final2);
253
1
  ccv_nnc_stream_context_free(stream_context);
254
1
  ccv_cnnp_model_free(final3);
255
1
}
256
257
TEST_CASE("train a simple math 2 * x = 10, x = 5 and merge parameters with a model")
258
1
{
259
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
260
1
    ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
261
1
    ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
262
1
    ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
263
1
    ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
264
1
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
265
1
  ccv_cnnp_model_t* const final = _math_2_x_10();
266
1
  const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1);
267
1
  const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1);
268
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
269
1
  ccv_cnnp_model_set_data_parallel(final, device_count);
270
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
271
1
  ccv_nnc_tensor_param_t o = {};
272
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
273
1
  ccv_nnc_tensor_t* a_tensor[device_count];
274
1
  ccv_nnc_tensor_t* f_tensor[device_count];
275
1
  ccv_nnc_tensor_t* o_tensor[device_count];
276
1
  ccv_nnc_tensor_t* ingrad[device_count];
277
1
  int i;
278
5
  for (i = 0; i < device_count; 
i++4
)
279
4
  {
280
4
    ccv_nnc_tensor_param_t ai = a;
281
4
    CCV_TENSOR_SET_DEVICE_ID(ai.type, i);
282
4
    a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0);
283
4
    ccv_nnc_tensor_param_t fi = f;
284
4
    CCV_TENSOR_SET_DEVICE_ID(fi.type, i);
285
4
    f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0);
286
4
    ccv_nnc_tensor_param_t oi = o;
287
4
    CCV_TENSOR_SET_DEVICE_ID(oi.type, i);
288
4
    o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0);
289
4
    ingrad[i] = ccv_nnc_tensor_new(0, oi, 0);
290
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0);
291
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0);
292
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0);
293
4
  }
294
1
  ccv_nnc_tensor_t* inputs[device_count * 2];
295
11
  for (i = 0; i < 10; 
i++10
)
296
10
  {
297
10
    int j;
298
50
    for (j = 0; j < device_count; 
j++40
)
299
40
    {
300
40
      inputs[j * 2] = a_tensor[j];
301
40
      inputs[j * 2 + 1] = f_tensor[j];
302
40
    }
303
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
304
10
      .requires_grad = 1,
305
10
    }, inputs, device_count * 2, o_tensor, device_count, 0, 0);
306
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
307
10
    ccv_cnnp_model_apply_gradients(final, 0);
308
10
  }
309
1
  ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
310
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0);
311
1
  const float o_final = ho->data.f32[0];
312
1
  ccv_cnnp_model_t* const final2 = _math_2_x_10();
313
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
314
1
  ccv_cnnp_model_set_data_parallel(final2, device_count);
315
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
316
5
  for (i = 0; i < device_count; 
i++4
)
317
4
  {
318
4
    inputs[i * 2] = a_tensor[i];
319
4
    inputs[i * 2 + 1] = f_tensor[i];
320
4
  }
321
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
322
5
  for (i = 0; i < device_count; 
i++4
)
323
4
  {
324
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
325
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output");
326
4
  }
327
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0);
328
5
  for (i = 0; i < device_count; 
i++4
)
329
4
  {
330
4
    inputs[i * 2] = a_tensor[i];
331
4
    inputs[i * 2 + 1] = f_tensor[i];
332
4
  }
333
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
334
5
  for (i = 0; i < device_count; 
i++4
)
335
4
  {
336
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
337
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 64, 1e-5, "should match the output when x is 1");
338
4
  }
339
1
  ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
340
5
  for (i = 0; i < device_count; 
i++4
)
341
4
  {
342
4
    inputs[i * 2] = a_tensor[i];
343
4
    inputs[i * 2 + 1] = f_tensor[i];
344
4
  }
345
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
346
1
  ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
347
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor);
348
1
  const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6;
349
5
  for (i = 0; i < device_count; 
i++4
)
350
4
  {
351
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
352
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], (x_final * 2 - 10) * (x_final * 2 - 10), 1e-5, "should match the previous output");
353
4
  }
354
5
  
for (i = 0; 1
i < device_count;
i++4
)
355
4
  {
356
4
    ccv_nnc_tensor_free(a_tensor[i]);
357
4
    ccv_nnc_tensor_free(f_tensor[i]);
358
4
    ccv_nnc_tensor_free(o_tensor[i]);
359
4
    ccv_nnc_tensor_free(ingrad[i]);
360
4
  }
361
1
  ccv_nnc_tensor_free(ho);
362
1
  ccv_nnc_tensor_free(x_tensor);
363
1
  ccv_cnnp_model_free(final);
364
1
  ccv_cnnp_model_free(final2);
365
1
}
366
367
TEST_CASE("train a simple math 2 * x = 10, x = 5 and merge parameters with a model with a stream context")
368
1
{
369
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
370
1
    ccv_nnc_cmd_ok(CCV_NNC_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
371
1
    ccv_nnc_cmd_ok(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
372
1
    ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
373
1
    ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
374
1
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
375
1
  ccv_cnnp_model_t* const final = _math_2_x_10();
376
1
  const ccv_nnc_tensor_param_t a = GPU_TENSOR_NCHW(000, 32F, 1);
377
1
  const ccv_nnc_tensor_param_t f = GPU_TENSOR_NCHW(000, 32F, 1);
378
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
379
1
  ccv_cnnp_model_set_data_parallel(final, device_count);
380
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
381
1
  ccv_nnc_tensor_param_t o = {};
382
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
383
1
  ccv_nnc_tensor_t* a_tensor[device_count];
384
1
  ccv_nnc_tensor_t* f_tensor[device_count];
385
1
  ccv_nnc_tensor_t* o_tensor[device_count];
386
1
  ccv_nnc_tensor_t* ingrad[device_count];
387
1
  int i;
388
5
  for (i = 0; i < device_count; 
i++4
)
389
4
  {
390
4
    ccv_nnc_tensor_param_t ai = a;
391
4
    CCV_TENSOR_SET_DEVICE_ID(ai.type, i);
392
4
    a_tensor[i] = ccv_nnc_tensor_new(0, ai, 0);
393
4
    ccv_nnc_tensor_param_t fi = f;
394
4
    CCV_TENSOR_SET_DEVICE_ID(fi.type, i);
395
4
    f_tensor[i] = ccv_nnc_tensor_new(0, fi, 0);
396
4
    ccv_nnc_tensor_param_t oi = o;
397
4
    CCV_TENSOR_SET_DEVICE_ID(oi.type, i);
398
4
    o_tensor[i] = ccv_nnc_tensor_new(0, oi, 0);
399
4
    ingrad[i] = ccv_nnc_tensor_new(0, oi, 0);
400
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad[i]), 0);
401
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(a_tensor[i]), 0);
402
4
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(10), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(f_tensor[i]), 0);
403
4
  }
404
1
  ccv_nnc_tensor_t* inputs[device_count * 2];
405
11
  for (i = 0; i < 10; 
i++10
)
406
10
  {
407
10
    int j;
408
50
    for (j = 0; j < device_count; 
j++40
)
409
40
    {
410
40
      inputs[j * 2] = a_tensor[j];
411
40
      inputs[j * 2 + 1] = f_tensor[j];
412
40
    }
413
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
414
10
      .requires_grad = 1,
415
10
    }, inputs, device_count * 2, o_tensor, device_count, 0, 0);
416
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
417
10
    ccv_cnnp_model_apply_gradients(final, 0);
418
10
  }
419
1
  ccv_nnc_tensor_t* ho = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
420
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[0]), TENSOR_LIST(ho), 0);
421
1
  const float o_final = ho->data.f32[0];
422
1
  ccv_cnnp_model_t* const final2 = _math_2_x_10();
423
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1.0 / device_count, 0.1, 0, 0), CMD_NOOP());
424
1
  ccv_cnnp_model_set_data_parallel(final2, device_count);
425
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
426
5
  for (i = 0; i < device_count; 
i++4
)
427
4
  {
428
4
    inputs[i * 2] = a_tensor[i];
429
4
    inputs[i * 2 + 1] = f_tensor[i];
430
4
  }
431
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
432
5
  for (i = 0; i < device_count; 
i++4
)
433
4
  {
434
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
435
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], o_final, 1e-5, "should match the previous output");
436
4
  }
437
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0);
438
5
  for (i = 0; i < device_count; 
i++4
)
439
4
  {
440
4
    inputs[i * 2] = a_tensor[i];
441
4
    inputs[i * 2 + 1] = f_tensor[i];
442
4
  }
443
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, 0);
444
5
  for (i = 0; i < device_count; 
i++4
)
445
4
  {
446
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
447
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], 64, 1e-5, "should match the output when x is 1");
448
4
  }
449
1
  ccv_nnc_stream_context_t* stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
450
1
  ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, 0, 0, 0, stream_context, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
451
5
  for (i = 0; i < device_count; 
i++4
)
452
4
  {
453
4
    inputs[i * 2] = a_tensor[i];
454
4
    inputs[i * 2 + 1] = f_tensor[i];
455
4
  }
456
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, inputs, device_count * 2, o_tensor, device_count, 0, stream_context);
457
1
  ccv_nnc_stream_context_wait(stream_context);
458
1
  ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
459
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor);
460
1
  const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6;
461
5
  for (i = 0; i < device_count; 
i++4
)
462
4
  {
463
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(o_tensor[i]), TENSOR_LIST(ho), 0);
464
4
    REQUIRE_EQ_WITH_TOLERANCE(ho->data.f32[0], (x_final * 2 - 10) * (x_final * 2 - 10), 1e-5, "should match the previous output");
465
4
  }
466
5
  
for (i = 0; 1
i < device_count;
i++4
)
467
4
  {
468
4
    ccv_nnc_tensor_free(a_tensor[i]);
469
4
    ccv_nnc_tensor_free(f_tensor[i]);
470
4
    ccv_nnc_tensor_free(o_tensor[i]);
471
4
    ccv_nnc_tensor_free(ingrad[i]);
472
4
  }
473
1
  ccv_nnc_tensor_free(ho);
474
1
  ccv_nnc_tensor_free(x_tensor);
475
1
  ccv_cnnp_model_free(final);
476
1
  ccv_cnnp_model_free(final2);
477
1
  ccv_nnc_stream_context_free(stream_context);
478
1
}
479
480
#include "case_main.h"