Coverage Report

Created: 2022-07-27 23:53

/home/liu/buildslave/linux-x64-runtests/build/test/unit/nnc/cnnp.core.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
static ccv_cnnp_model_t* simple_cifar_10(void)
15
2
{
16
2
  return ccv_cnnp_sequential_new(MODEL_LIST(
17
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
18
2
    ccv_cnnp_relu(0),
19
2
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
20
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
21
2
    ccv_cnnp_relu(0),
22
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
23
2
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
24
2
    ccv_cnnp_relu(0),
25
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
26
2
    ccv_cnnp_flatten(0),
27
2
    ccv_cnnp_dense(256, 0, 0),
28
2
    ccv_cnnp_relu(0),
29
2
    ccv_cnnp_dense(10, 0, 0),
30
2
    ccv_cnnp_softmax(0)
31
2
  ), 0);
32
2
}
33
34
TEST_CASE("compile simple cifar-10 model")
35
1
{
36
1
  ccv_cnnp_model_t* const sequential0 = simple_cifar_10();
37
1
  ccv_cnnp_model_t* const sequential = ccv_cnnp_model_copy(sequential0);
38
1
  ccv_cnnp_model_free(sequential0);
39
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
40
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
41
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
42
1
  dsfmt_t dsfmt;
43
1
  int i;
44
1
  dsfmt_init_gen_rand(&dsfmt, 1);
45
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
46
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
47
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
48
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
49
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
50
1
    .is_test = 1
51
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
52
1
  int t = 0;
53
1
  float max = output_tensor->data.f32[0];
54
10
  for (i = 1; i < 10; 
i++9
)
55
9
    if (output_tensor->data.f32[i] > max)
56
1
      max = output_tensor->data.f32[i], t = i;
57
1
  const int target = (t + 1) % 10;
58
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
59
  // Doing training.
60
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
61
1
  fit_tensor->data.f32[0] = target;
62
101
  for (i = 0; i < 100; 
i++100
)
63
100
    ccv_cnnp_model_fit(sequential, TENSOR_LIST(input_tensor), TENSOR_LIST(fit_tensor), TENSOR_LIST(output_tensor), 0, 0);
64
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
65
  // After training, it should fit.
66
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
67
1
    .is_test = 1
68
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
69
1
  t = 0;
70
1
  max = output_tensor->data.f32[0];
71
10
  for (i = 1; i < 10; 
i++9
)
72
9
    if (output_tensor->data.f32[i] > max)
73
2
      max = output_tensor->data.f32[i], t = i;
74
1
  REQUIRE_EQ(target, t, "should fit");
75
1
  remove("/tmp/compile_simple_cifar_10_model.checkpoint");
76
1
  ccv_cnnp_model_checkpoint(sequential, "/tmp/compile_simple_cifar_10_model.checkpoint", 0);
77
1
  CNNP_MODEL_GEN(sequential, CCV_NNC_LONG_DOT_GRAPH);
78
1
  ccv_cnnp_model_free(sequential);
79
1
  ccv_cnnp_model_t* const sequential2 = simple_cifar_10();
80
1
  ccv_cnnp_model_compile(sequential2, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
81
  // Load from the checkpoint file.
82
1
  ccv_cnnp_model_checkpoint(sequential2, "/tmp/compile_simple_cifar_10_model.checkpoint", 0);
83
1
  remove("/tmp/compile_simple_cifar_10_model.checkpoint");
84
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
85
1
  ccv_cnnp_model_evaluate(sequential2, (ccv_cnnp_evaluate_param_t){
86
1
    .is_test = 1
87
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
88
1
  t = 0;
89
1
  max = output_tensor->data.f32[0];
90
10
  for (i = 1; i < 10; 
i++9
)
91
9
    if (output_tensor->data.f32[i] > max)
92
2
      max = output_tensor->data.f32[i], t = i;
93
1
  REQUIRE_EQ(target, t, "should fit");
94
1
  ccv_cnnp_model_free(sequential2);
95
1
  ccv_nnc_tensor_free(input_tensor);
96
1
  ccv_nnc_tensor_free(fit_tensor);
97
1
  ccv_nnc_tensor_free(output_tensor);
98
1
}
99
100
static int _ccv_cnnp_model_notified = 0;
101
102
static void _ccv_cnnp_model_hook(const ccv_cnnp_model_t* const model, const int tag, void* const payload, void* const context)
103
3
{
104
3
  if (payload)
105
3
    ++_ccv_cnnp_model_notified;
106
3
}
107
108
TEST_CASE("inception layer for model")
109
1
{
110
1
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
111
1
  _ccv_cnnp_model_notified = 0;
112
1
  ccv_cnnp_model_t* const conv_1 = ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0);
113
1
  ccv_cnnp_model_notify_hook(conv_1, _ccv_cnnp_model_hook, 0);
114
1
  ccv_cnnp_model_io_t tower_1 = ccv_cnnp_model_apply(conv_1, MODEL_IO_LIST(x));
115
1
  ccv_cnnp_model_t* const relu_1 = ccv_cnnp_relu(0);
116
1
  ccv_cnnp_model_notify_hook(relu_1, _ccv_cnnp_model_hook, 0);
117
1
  tower_1 = ccv_cnnp_model_apply(relu_1, MODEL_IO_LIST(tower_1));
118
1
  tower_1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), 0, HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(tower_1));
119
1
  tower_1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_1));
120
121
1
  ccv_cnnp_model_io_t tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(x));
122
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2));
123
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0), MODEL_IO_LIST(tower_2));
124
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2));
125
126
1
  ccv_cnnp_model_io_t tower_3 = ccv_cnnp_model_apply(ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(x));
127
1
  tower_3 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(tower_3));
128
1
  tower_3 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_3));
129
1
  ccv_cnnp_model_t* const add_1 = ccv_cnnp_sum(0);
130
1
  ccv_cnnp_model_notify_hook(add_1, _ccv_cnnp_model_hook, 0);
131
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(add_1, MODEL_IO_LIST(tower_1, tower_2, tower_3));
132
1
  REQUIRE_EQ(0, _ccv_cnnp_model_notified, "haven't notified");
133
1
  ccv_cnnp_model_t* const inception0 = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(output), 0);
134
1
  ccv_cnnp_model_notify(inception0, 0, inception0);
135
1
  ccv_cnnp_model_t* const inception = ccv_cnnp_model_copy(inception0);
136
1
  REQUIRE_EQ(3, _ccv_cnnp_model_notified, "3 models changed owner");
137
1
  ccv_cnnp_model_free(inception0);
138
1
  const ccv_nnc_tensor_param_t input = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
139
1
  ccv_cnnp_model_compile(inception, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
140
1
  CNNP_MODEL_GEN(inception, CCV_NNC_LONG_DOT_GRAPH);
141
1
  ccv_cnnp_model_free(inception);
142
1
}
143
144
static ccv_cnnp_model_t* _ccv_multiple_outputs_functional_model(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
145
1
{
146
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
147
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
148
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(input0));
149
1
  output0 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output0));
150
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), 0, HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(input1));
151
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1));
152
1
  ccv_cnnp_model_t* model0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(output0, output1), 0);
153
1
  input0 = ccv_cnnp_input();
154
1
  input1 = ccv_cnnp_input();
155
1
  output0 = ccv_cnnp_model_apply(model0, MODEL_IO_LIST(input0, input1));
156
1
  ccv_cnnp_model_io_t input2 = ccv_cnnp_input();
157
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0), MODEL_IO_LIST(input2));
158
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1));
159
1
  ccv_cnnp_model_t* interim = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0, output1), 0);
160
1
  input0 = ccv_cnnp_input();
161
1
  input1 = ccv_cnnp_input();
162
1
  input2 = ccv_cnnp_input();
163
1
  output0 = ccv_cnnp_model_apply(interim, MODEL_IO_LIST(input0, input1, input2));
164
1
  output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0));
165
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0), 0);
166
1
}
167
168
TEST_CASE("functional model's IO can represent multiple outputs")
169
1
{
170
1
  ccv_cnnp_model_t* const final = ccv_cnnp_dynamic_new(_ccv_multiple_outputs_functional_model, 0, 0);
171
1
  const ccv_nnc_tensor_param_t a0 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
172
1
  const ccv_nnc_tensor_param_t a1 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
173
1
  const ccv_nnc_tensor_param_t a2 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
174
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
175
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
176
1
  ccv_cnnp_model_free(final);
177
1
}
178
179
TEST_CASE("make sure reuse model enables share weights")
180
1
{
181
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
182
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
183
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0);
184
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
185
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
186
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0, output1));
187
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(final_output), 0);
188
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
189
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
190
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
191
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
192
1
  ccv_cnnp_model_free(final);
193
1
}
194
195
TEST_CASE("train model with share weights and L2 loss")
196
1
{
197
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
198
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
199
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0);
200
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
201
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
202
1
  ccv_cnnp_model_io_t fit0 = ccv_cnnp_input();
203
1
  ccv_cnnp_model_io_t fit1 = ccv_cnnp_input();
204
  // Because we don't have L2 loss function available yet, manually create L2 loss.
205
1
  ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply(
206
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
207
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
208
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
209
1
    MODEL_IO_LIST(output0, fit0));
210
1
  ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply(
211
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
212
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
213
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
214
1
    MODEL_IO_LIST(diff0, diff0));
215
1
  ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply(
216
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
217
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
218
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
219
1
    MODEL_IO_LIST(output1, fit1));
220
1
  ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply(
221
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
222
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
223
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
224
1
    MODEL_IO_LIST(diff1, diff1));
225
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1));
226
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 0);
227
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
228
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
229
1
  ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1);
230
1
  ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1);
231
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
232
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
233
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
234
1
  ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
235
1
  ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0);
236
1
  ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0);
237
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
238
1
  a0_tensor->data.f32[0] = 1;
239
1
  a1_tensor->data.f32[0] = 3;
240
1
  b0_tensor->data.f32[0] = 2;
241
1
  b1_tensor->data.f32[0] = 3;
242
1
  int i;
243
11
  for (i = 0; i < 10; 
i++10
)
244
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
245
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
246
101
  for (i = 0; i < 100; 
i++100
)
247
100
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
248
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
249
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
250
1.00k
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
251
1
  a0_tensor->data.f32[0] = 2;
252
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
253
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
254
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
255
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
256
1
    .is_test = 1
257
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
258
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this.");
259
1
  ccv_nnc_tensor_free(a0_tensor);
260
1
  ccv_nnc_tensor_free(a1_tensor);
261
1
  ccv_nnc_tensor_free(b0_tensor);
262
1
  ccv_nnc_tensor_free(b1_tensor);
263
1
  ccv_nnc_tensor_free(o0_tensor);
264
1
  ccv_cnnp_model_free(final);
265
1
}
266
267
static ccv_cnnp_model_t* simple_cifar_10_no_softmax(void)
268
2
{
269
2
  return ccv_cnnp_sequential_new(MODEL_LIST(
270
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
271
2
    ccv_cnnp_relu(0),
272
2
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
273
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
274
2
    ccv_cnnp_relu(0),
275
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
276
2
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
277
2
    ccv_cnnp_relu(0),
278
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
279
2
    ccv_cnnp_flatten(0),
280
2
    ccv_cnnp_dense(256, 0, 0),
281
2
    ccv_cnnp_relu(0),
282
2
    ccv_cnnp_dense(10, 0, 0)
283
2
  ), 0);
284
2
}
285
286
TEST_CASE("evaluate cifar-10 model in multi-stage mode")
287
1
{
288
1
  ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax();
289
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
290
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), CMD_NOOP());
291
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
292
1
  dsfmt_t dsfmt;
293
1
  int i;
294
1
  dsfmt_init_gen_rand(&dsfmt, 1);
295
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
296
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
297
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
298
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
299
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
300
1
    .is_test = 1
301
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
302
1
  int t = 0;
303
1
  float max = output_tensor->data.f32[0];
304
10
  for (i = 1; i < 10; 
i++9
)
305
9
    if (output_tensor->data.f32[i] > max)
306
1
      max = output_tensor->data.f32[i], t = i;
307
1
  const int target = (t + 1) % 10;
308
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
309
  // Doing training.
310
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
311
1
  fit_tensor->data.f32[0] = target;
312
1
  ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
313
1
  ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
314
1
  ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
315
101
  for (i = 0; i < 100; 
i++100
)
316
100
  {
317
100
    ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
318
100
      .requires_grad = 1
319
100
    }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
320
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0);
321
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0);
322
100
    ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
323
100
    ccv_cnnp_model_apply_gradients(sequential, 0);
324
100
  }
325
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
326
  // After training, it should fit.
327
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
328
1
    .is_test = 1
329
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
330
1
  t = 0;
331
1
  max = output_tensor->data.f32[0];
332
10
  for (i = 1; i < 10; 
i++9
)
333
9
    if (output_tensor->data.f32[i] > max)
334
2
      max = output_tensor->data.f32[i], t = i;
335
1
  REQUIRE_EQ(target, t, "should fit");
336
1
  ccv_nnc_tensor_free(ingrad_tensor);
337
1
  ccv_nnc_tensor_free(fit_tensor);
338
1
  ccv_nnc_tensor_free(softmax_tensor);
339
1
  ccv_nnc_tensor_free(loss_tensor);
340
1
  ccv_nnc_tensor_free(input_tensor);
341
1
  ccv_nnc_tensor_free(output_tensor);
342
1
  ccv_cnnp_model_free(sequential);
343
1
}
344
345
TEST_CASE("evaluate cifar-10 model in multi-stage mode with gradient accumulated")
346
1
{
347
1
  ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax();
348
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
349
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.00033, 1, 0.99, 0.9, 0.9), CMD_NOOP());
350
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
351
1
  dsfmt_t dsfmt;
352
1
  int i;
353
1
  dsfmt_init_gen_rand(&dsfmt, 1);
354
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
355
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
356
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
357
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
358
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
359
1
    .is_test = 1
360
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
361
1
  int t = 0;
362
1
  float max = output_tensor->data.f32[0];
363
10
  for (i = 1; i < 10; 
i++9
)
364
9
    if (output_tensor->data.f32[i] > max)
365
2
      max = output_tensor->data.f32[i], t = i;
366
1
  const int target = (t + 1) % 10;
367
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
368
  // Doing training.
369
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
370
1
  fit_tensor->data.f32[0] = target;
371
1
  ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
372
1
  ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
373
1
  ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
374
101
  for (i = 0; i < 100; 
i++100
)
375
100
  {
376
100
    ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
377
100
      .requires_grad = 1
378
100
    }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
379
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0);
380
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0);
381
100
    ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
382
    // Backward again to accumulate gradient.
383
100
    if (i % 2 == 0)
384
50
    {
385
50
      ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
386
      // Backward again to accumulate gradient.
387
50
      if (i % 3 == 0)
388
17
        ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
389
50
    }
390
100
    ccv_cnnp_model_apply_gradients(sequential, 0);
391
100
  }
392
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
393
  // After training, it should fit.
394
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
395
1
    .is_test = 1
396
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
397
1
  t = 0;
398
1
  max = output_tensor->data.f32[0];
399
10
  for (i = 1; i < 10; 
i++9
)
400
9
    if (output_tensor->data.f32[i] > max)
401
3
      max = output_tensor->data.f32[i], t = i;
402
1
  REQUIRE_EQ(target, t, "should fit");
403
1
  ccv_nnc_tensor_free(ingrad_tensor);
404
1
  ccv_nnc_tensor_free(fit_tensor);
405
1
  ccv_nnc_tensor_free(softmax_tensor);
406
1
  ccv_nnc_tensor_free(loss_tensor);
407
1
  ccv_nnc_tensor_free(input_tensor);
408
1
  ccv_nnc_tensor_free(output_tensor);
409
1
  ccv_cnnp_model_free(sequential);
410
1
}
411
412
TEST_CASE("train model with share weights and L2 loss and check out gradients")
413
1
{
414
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
415
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
416
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0);
417
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
418
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
419
1
  ccv_cnnp_model_io_t fit0 = ccv_cnnp_input();
420
1
  ccv_cnnp_model_io_t fit1 = ccv_cnnp_input();
421
  // Because we don't have L2 loss function available yet, manually create L2 loss.
422
1
  ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply(
423
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
424
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
425
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
426
1
    MODEL_IO_LIST(output0, fit0));
427
1
  ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply(
428
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
429
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
430
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
431
1
    MODEL_IO_LIST(diff0, diff0));
432
1
  ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply(
433
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
434
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
435
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
436
1
    MODEL_IO_LIST(output1, fit1));
437
1
  ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply(
438
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
439
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
440
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
441
1
    MODEL_IO_LIST(diff1, diff1));
442
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1));
443
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 0);
444
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
445
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
446
1
  ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1);
447
1
  ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1);
448
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
449
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
450
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
451
1
  ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
452
1
  ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0);
453
1
  ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0);
454
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
455
  // It should fit to 1*0.5+1.5=2, 3*0.5+1.5=3
456
1
  a0_tensor->data.f32[0] = 1;
457
1
  a1_tensor->data.f32[0] = 3;
458
1
  b0_tensor->data.f32[0] = 2;
459
1
  b1_tensor->data.f32[0] = 3;
460
1
  int i;
461
11
  for (i = 0; i < 10; 
i++10
)
462
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
463
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
464
101
  for (i = 0; i < 100; 
i++100
)
465
100
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
466
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
467
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
468
1.00k
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
469
1
  a0_tensor->data.f32[0] = 2;
470
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
471
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
472
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
473
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
474
1
    .is_test = 1
475
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
476
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this.");
477
  // Figure out the actual weight and bias term in the model.
478
1
  a0_tensor->data.f32[0] = 0;
479
1
  a1_tensor->data.f32[0] = 0;
480
1
  b0_tensor->data.f32[0] = 0;
481
1
  b1_tensor->data.f32[0] = 0;
482
  // The output will be 2*bias^2
483
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
484
1
    .is_test = 1
485
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
486
1
  const float bias = sqrtf(o0_tensor->data.f32[0] * 0.5);
487
1
  a0_tensor->data.f32[0] = 1;
488
1
  a1_tensor->data.f32[0] = 1;
489
1
  b0_tensor->data.f32[0] = 0;
490
1
  b1_tensor->data.f32[0] = 0;
491
  // The output will be 2*(w+bias)^2
492
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
493
1
    .is_test = 1
494
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
495
1
  const float w = sqrt(o0_tensor->data.f32[0] * 0.5) - bias;
496
  // Compute the out gradient to verify.
497
1
  a0_tensor->data.f32[0] = 2;
498
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
499
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
500
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
501
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
502
1
    .requires_grad = 1,
503
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
504
  // Note that I have to use new tensors and have to keep these tensors around since they were binded to the model when evaluate.
505
1
  ccv_nnc_tensor_t* da0_tensor = ccv_nnc_tensor_new(0, a0, 0);
506
1
  ccv_nnc_tensor_t* da1_tensor = ccv_nnc_tensor_new(0, a1, 0);
507
1
  ccv_nnc_tensor_t* db0_tensor = ccv_nnc_tensor_new(0, b0, 0);
508
1
  ccv_nnc_tensor_t* db1_tensor = ccv_nnc_tensor_new(0, b1, 0);
509
1
  ccv_nnc_tensor_t* do0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
510
1
  do0_tensor->data.f32[0] = 1;
511
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(do0_tensor), TENSOR_LIST(da0_tensor, da1_tensor, db0_tensor, db1_tensor), 0, 0);
512
1
  REQUIRE_EQ_WITH_TOLERANCE(da0_tensor->data.f32[0], 2 * w * (w * 2 + bias - 2), 1e-5, "da0=2*w*(w*a0+bias-b0), thus, 0.5");
513
1
  REQUIRE_EQ_WITH_TOLERANCE(da1_tensor->data.f32[0], 2 * w * (w * 2 + bias - 3), 1e-5, "da1=2*w*(w*a1+bias-b1), thus, -0.5");
514
1
  REQUIRE_EQ_WITH_TOLERANCE(db0_tensor->data.f32[0], -2 * (w * 2 + bias - 2), 1e-5, "db0=-2*(w*a0+bias-b0), thus, -1");
515
1
  REQUIRE_EQ_WITH_TOLERANCE(db1_tensor->data.f32[0], -2 * (w * 2 + bias - 3), 1e-5, "db1=-2*(w*a1+bias-b1), thus, 1");
516
1
  ccv_nnc_tensor_free(a0_tensor);
517
1
  ccv_nnc_tensor_free(a1_tensor);
518
1
  ccv_nnc_tensor_free(b0_tensor);
519
1
  ccv_nnc_tensor_free(b1_tensor);
520
1
  ccv_nnc_tensor_free(o0_tensor);
521
1
  ccv_nnc_tensor_free(da0_tensor);
522
1
  ccv_nnc_tensor_free(da1_tensor);
523
1
  ccv_nnc_tensor_free(db0_tensor);
524
1
  ccv_nnc_tensor_free(db1_tensor);
525
1
  ccv_nnc_tensor_free(do0_tensor);
526
1
  ccv_cnnp_model_free(final);
527
1
}
528
529
TEST_CASE("apply functional model as forward pass")
530
1
{
531
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
532
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
533
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))),
534
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
535
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
536
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(input));
537
1
  output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(output));
538
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
539
1
  b->data.f32[0] = -1;
540
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
541
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
542
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
543
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
544
1
  output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output));
545
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), "final");
546
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
547
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
548
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
549
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
550
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0);
551
1
  a0_tensor->data.f32[0] = 1.12;
552
1
  o0_tensor->data.f32[0] = 0;
553
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
554
1
    .is_test = 1
555
1
  }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0);
556
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1");
557
1
  ccv_nnc_tensor_free(a0_tensor);
558
1
  ccv_nnc_tensor_free(b);
559
1
  ccv_nnc_tensor_free(o0_tensor);
560
1
  ccv_cnnp_model_free(final);
561
1
}
562
563
TEST_CASE("apply sequential model as forward pass")
564
1
{
565
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
566
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
567
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))),
568
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
569
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
570
1
  b->data.f32[0] = -1;
571
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
572
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
573
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
574
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
575
1
  ccv_cnnp_model_t* const final = ccv_cnnp_sequential_new(MODEL_LIST(mul, mul, add), "seq");
576
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
577
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
578
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
579
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
580
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0);
581
1
  a0_tensor->data.f32[0] = 1.12;
582
1
  o0_tensor->data.f32[0] = 0;
583
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
584
1
    .is_test = 1
585
1
  }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0);
586
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1");
587
1
  ccv_nnc_tensor_free(a0_tensor);
588
1
  ccv_nnc_tensor_free(b);
589
1
  ccv_nnc_tensor_free(o0_tensor);
590
1
  ccv_cnnp_model_free(final);
591
1
}
592
593
ccv_cnnp_model_t* _math_2_x_1_1_10(const ccv_nnc_tensor_t* const b)
594
6
{
595
6
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
596
6
    MODEL_CMD_EXEC_IO_MAP(
597
6
      KV(CCV_CNNP_IO),
598
6
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_set_by(CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1))),
599
6
    ),
600
6
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
601
6
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
602
6
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
603
6
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
604
6
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
605
6
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add, add), "seq");
606
6
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
607
6
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
608
6
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
609
  // Because we don't have L2 loss function available yet, manually create L2 loss.
610
6
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
611
6
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
612
6
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
613
6
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
614
6
    MODEL_IO_LIST(left_out, fit));
615
6
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
616
6
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
617
6
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
618
6
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
619
6
    MODEL_IO_LIST(diff, diff));
620
6
  return ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
621
6
}
622
623
TEST_CASE("learn simple math of 2 * x + 1 + 1 = 10, x = 4")
624
1
{
625
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
626
1
  b->data.f32[0] = 1;
627
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
628
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
629
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
630
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
631
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
632
1
  ccv_nnc_tensor_param_t o = {};
633
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
634
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
635
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
636
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
637
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
638
1
  ccv_nnc_tensor_t* outgrad0 = ccv_nnc_tensor_new(0, a, 0);
639
1
  ccv_nnc_tensor_t* outgrad1 = ccv_nnc_tensor_new(0, f, 0);
640
1
  ingrad->data.f32[0] = 1;
641
1
  a_tensor->data.f32[0] = 2;
642
1
  f_tensor->data.f32[0] = 10;
643
1
  int i;
644
1
  float old_o = 10;
645
11
  for (i = 0; i < 10; 
i++10
)
646
10
  {
647
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
648
10
      .requires_grad = 1,
649
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
650
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
651
10
    ccv_cnnp_model_apply_gradients(final, 0);
652
10
  }
653
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be different");
654
1
  old_o = o_tensor->data.f32[0];
655
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0, 0, 0), 0, 0, 0); // No decay.
656
1
  ingrad->data.f32[0] = 0; // ingrad is 0, no update at all.
657
11
  for (i = 0; i < 10; 
i++10
)
658
10
  {
659
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
660
10
      .requires_grad = 1,
661
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
662
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(outgrad0, outgrad1), 0, 0);
663
10
    ccv_cnnp_model_apply_gradients(final, 0);
664
10
  }
665
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad");
666
1
  old_o = o_tensor->data.f32[0];
667
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
668
101
  for (i = 0; i < 100; 
i++100
)
669
100
  {
670
100
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
671
100
      .requires_grad = 1,
672
100
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
673
100
    ccv_cnnp_model_backward(final, TENSOR_LIST(0), TENSOR_LIST(outgrad0, outgrad1), 0, 0);
674
100
    ccv_cnnp_model_apply_gradients(final, 0);
675
100
  }
676
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 100 iterations, output should be different");
677
1
  old_o = o_tensor->data.f32[0];
678
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0, 0, 0), 0, 0, 0); // No decay.
679
  // Note we still use the old ingrad which is 0.
680
11
  for (i = 0; i < 10; 
i++10
)
681
10
  {
682
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
683
10
      .requires_grad = 1,
684
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
685
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0);
686
10
    ccv_cnnp_model_apply_gradients(final, 0);
687
10
  }
688
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad");
689
1
  ingrad->data.f32[0] = 1; // ingrad reset to 1.
690
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
691
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
692
1.00k
  {
693
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
694
1.00k
      .requires_grad = 1,
695
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
696
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0);
697
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
698
1.00k
  }
699
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 1000 iterations, output should be different");
700
1
  o_tensor->data.f32[0] = 10;
701
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
702
1
    .is_test = 1,
703
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
704
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "(2 * x + 1 + 1 - 10) ^ 2 should equal to 0");
705
1
  ccv_nnc_tensor_free(a_tensor);
706
1
  ccv_nnc_tensor_free(b);
707
1
  ccv_nnc_tensor_free(f_tensor);
708
1
  ccv_nnc_tensor_free(o_tensor);
709
1
  ccv_nnc_tensor_free(ingrad);
710
1
  ccv_nnc_tensor_free(outgrad0);
711
1
  ccv_nnc_tensor_free(outgrad1);
712
1
  ccv_cnnp_model_free(final);
713
1
}
714
715
static int _ccv_cnnp_model_clip_grad_norm_reduce_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
716
2
{
717
2
  ccv_nnc_tensor_t* const old_norm2 = outputs[1];
718
2
  ccv_nnc_tensor_t* const norm2 = outputs[2];
719
2
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(), hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(norm2), stream_context);
720
2
  ccv_nnc_cmd_exec(CMD_ADD_FORWARD(1, 1), hint, flags, TENSOR_LIST(old_norm2, norm2), TENSOR_LIST(old_norm2), stream_context);
721
2
  return CCV_NNC_EXEC_SUCCESS;
722
2
}
723
724
static ccv_nnc_cmd_vtab_t clip_grad_norm_reduce_norm2_vtab = {
725
  .exec = _ccv_cnnp_model_clip_grad_norm_reduce_norm2
726
};
727
728
TEST_CASE("learn simple math of 2 * x + 1 + 1 = 10, x = 4 and clip grad to max_norm = 0.5")
729
1
{
730
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
731
1
  b->data.f32[0] = 1;
732
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
733
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
734
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
735
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
736
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
737
1
  ccv_nnc_tensor_param_t o = {};
738
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
739
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
740
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
741
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
742
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
743
1
  ccv_nnc_tensor_t* outgrad0 = ccv_nnc_tensor_new(0, a, 0);
744
1
  ccv_nnc_tensor_t* outgrad1 = ccv_nnc_tensor_new(0, f, 0);
745
1
  ingrad->data.f32[0] = 1;
746
1
  a_tensor->data.f32[0] = 2;
747
1
  f_tensor->data.f32[0] = 10;
748
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
749
1
    .requires_grad = 1,
750
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
751
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
752
1
  ccv_cnnp_model_parameters_clip_grad_norm(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), 2, 0.5, 0);
753
1
  ccv_nnc_tensor_t* old_norm2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
754
1
  ccv_nnc_tensor_t* norm2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
755
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(old_norm2), 0);
756
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(norm2), 0);
757
1
  ccv_cnnp_model_apply_gradients(final, 0);
758
1
  ccv_nnc_cmd_t reduce_cmd = {
759
1
    .cmd = CCV_NNC_CUSTOM_FORWARD,
760
1
    .isa = &clip_grad_norm_reduce_norm2_vtab,
761
1
  };
762
1
  ccv_cnnp_model_parameter_gradients_map(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(old_norm2, norm2), 0);
763
1
  REQUIRE(norm2->data.f32[0] < 0.5 + 1e-5, "norm2 should be smaller than max_norm");
764
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
765
1
    .requires_grad = 1,
766
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
767
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
768
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
769
1
    .requires_grad = 1,
770
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
771
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
772
1
  ccv_cnnp_model_parameters_clip_grad_norm(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), 2, 0.5, 0);
773
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(old_norm2), 0);
774
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(norm2), 0);
775
1
  ccv_cnnp_model_parameter_gradients_map(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(old_norm2, norm2), 0);
776
1
  REQUIRE(norm2->data.f32[0] < 0.5 + 1e-5, "norm2 should be smaller than max_norm");
777
1
  ccv_cnnp_model_apply_gradients(final, 0);
778
1
  ccv_nnc_tensor_free(a_tensor);
779
1
  ccv_nnc_tensor_free(b);
780
1
  ccv_nnc_tensor_free(f_tensor);
781
1
  ccv_nnc_tensor_free(o_tensor);
782
1
  ccv_nnc_tensor_free(ingrad);
783
1
  ccv_nnc_tensor_free(outgrad0);
784
1
  ccv_nnc_tensor_free(outgrad1);
785
1
  ccv_cnnp_model_free(final);
786
1
  ccv_nnc_tensor_free(old_norm2);
787
1
  ccv_nnc_tensor_free(norm2);
788
1
}
789
790
TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and copy parameter to a new model entirely")
791
1
{
792
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
793
1
  b->data.f32[0] = 1;
794
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
795
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
796
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
797
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
798
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
799
1
  ccv_nnc_tensor_param_t o = {};
800
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
801
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
802
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
803
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
804
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
805
1
  ingrad->data.f32[0] = 1;
806
1
  a_tensor->data.f32[0] = 2;
807
1
  f_tensor->data.f32[0] = 10;
808
1
  int i;
809
11
  for (i = 0; i < 10; 
i++10
)
810
10
  {
811
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
812
10
      .requires_grad = 1,
813
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
814
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
815
10
    ccv_cnnp_model_apply_gradients(final, 0);
816
10
  }
817
1
  const float o_final = o_tensor->data.f32[0];
818
1
  ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b);
819
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
820
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
821
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
822
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
823
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0);
824
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
825
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 64, 1e-5, "should match the output when x is 0");
826
1
  ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final);
827
1
  ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
828
1
  ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
829
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
830
1
  ccv_nnc_tensor_free(a_tensor);
831
1
  ccv_nnc_tensor_free(b);
832
1
  ccv_nnc_tensor_free(f_tensor);
833
1
  ccv_nnc_tensor_free(o_tensor);
834
1
  ccv_nnc_tensor_free(ingrad);
835
1
  ccv_cnnp_model_free(final);
836
1
  ccv_cnnp_model_free(final2);
837
1
  ccv_cnnp_model_free(final3);
838
1
}
839
840
TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and merge parameters with a model")
841
1
{
842
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
843
1
  b->data.f32[0] = 1;
844
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
845
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
846
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
847
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
848
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
849
1
  ccv_nnc_tensor_param_t o = {};
850
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
851
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
852
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
853
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
854
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
855
1
  ingrad->data.f32[0] = 1;
856
1
  a_tensor->data.f32[0] = 2;
857
1
  f_tensor->data.f32[0] = 10;
858
1
  int i;
859
11
  for (i = 0; i < 10; 
i++10
)
860
10
  {
861
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
862
10
      .requires_grad = 1,
863
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
864
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
865
10
    ccv_cnnp_model_apply_gradients(final, 0);
866
10
  }
867
1
  const float o_final = o_tensor->data.f32[0];
868
1
  ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b);
869
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
870
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
871
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
872
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
873
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0);
874
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
875
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 36, 1e-5, "should match the output when x is 1");
876
1
  ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
877
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
878
1
  ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, a, 0);
879
1
  const ccv_nnc_tensor_param_t params = ccv_cnnp_model_parameter_tensor_params(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
880
1
  REQUIRE_EQ(1, params.dim[0], "should match parameter shape");
881
1
  REQUIRE_EQ(0, params.dim[1], "should match parameter shape");
882
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor);
883
1
  const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6;
884
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], (x_final * 2 + 1 + 1 - 10) * (x_final * 2 + 1 + 1 - 10), 1e-5, "should match the previous output");
885
1
  ccv_nnc_tensor_free(a_tensor);
886
1
  ccv_nnc_tensor_free(b);
887
1
  ccv_nnc_tensor_free(f_tensor);
888
1
  ccv_nnc_tensor_free(o_tensor);
889
1
  ccv_nnc_tensor_free(x_tensor);
890
1
  ccv_nnc_tensor_free(ingrad);
891
1
  ccv_cnnp_model_free(final);
892
1
  ccv_cnnp_model_free(final2);
893
1
}
894
895
TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate convergence")
896
1
{
897
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
898
1
  x->data.f32[0] = 1;
899
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
900
1
    MODEL_CMD_EXEC_IO_MAP(
901
1
      KV(CCV_CNNP_IO),
902
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))),
903
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
904
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
905
1
  y->data.f32[0] = 2;
906
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
907
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
908
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))),
909
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
910
1
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), "seq");
911
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
912
1
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
913
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
914
  // Because we don't have L2 loss function available yet, manually create L2 loss.
915
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
916
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
917
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
918
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
919
1
    MODEL_IO_LIST(left_out, fit));
920
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
921
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
922
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
923
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
924
1
    MODEL_IO_LIST(diff, diff));
925
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
926
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
927
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
928
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
929
  // Train add exclusively.
930
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
931
1
  ccv_nnc_tensor_param_t o = {};
932
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
933
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
934
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
935
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
936
1
  a_tensor->data.f32[0] = 2;
937
1
  f_tensor->data.f32[0] = 12;
938
1
  o_tensor->data.f32[0] = 12;
939
1
  int i;
940
11
  for (i = 0; i < 10; 
i++10
)
941
10
  {
942
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
943
10
      .requires_grad = 1,
944
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
945
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
946
10
    ccv_cnnp_model_apply_gradients(final, 0);
947
10
  }
948
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 12, 1e-5, "after 10 iterations, output should not be the original");
949
  // Switch to train mul exclusively.
950
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
951
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS)));
952
1
  float old_o = o_tensor->data.f32[0];
953
11
  for (i = 0; i < 10; 
i++10
)
954
10
  {
955
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
956
10
      .requires_grad = 1,
957
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
958
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
959
10
    ccv_cnnp_model_apply_gradients(final, 0);
960
10
  }
961
1
  REQUIRE(o_tensor->data.f32[0] < old_o, "we should be closer to 0 at this point");
962
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
963
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
964
1.00k
  {
965
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
966
1.00k
      .requires_grad = 1,
967
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
968
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
969
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
970
1.00k
  }
971
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "the mean squared error should be 0 at this point");
972
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
973
1
  ccv_nnc_tensor_free(a_tensor);
974
1
  ccv_nnc_tensor_free(o_tensor);
975
1
  ccv_nnc_tensor_free(f_tensor);
976
1
  ccv_nnc_tensor_free(x);
977
1
  ccv_nnc_tensor_free(y);
978
1
  ccv_cnnp_model_free(final);
979
1
}
980
981
TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate learn-ability")
982
1
{
983
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
984
1
  x->data.f32[0] = 1;
985
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
986
1
    MODEL_CMD_EXEC_IO_MAP(
987
1
      KV(CCV_CNNP_IO),
988
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))),
989
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
990
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
991
1
  y->data.f32[0] = 2;
992
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
993
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
994
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))),
995
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
996
1
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), "seq");
997
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
998
1
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
999
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
1000
  // Because we don't have L2 loss function available yet, manually create L2 loss.
1001
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
1002
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
1003
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1004
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
1005
1
    MODEL_IO_LIST(left_out, fit));
1006
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
1007
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
1008
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1009
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
1010
1
    MODEL_IO_LIST(diff, diff));
1011
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
1012
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
1013
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
1014
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP());
1015
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(mul, 0, 0), x);
1016
  // Train add exclusively.
1017
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
1018
1
  ccv_nnc_tensor_param_t o = {};
1019
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
1020
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
1021
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
1022
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
1023
1
  a_tensor->data.f32[0] = 2;
1024
1
  f_tensor->data.f32[0] = 12;
1025
1
  o_tensor->data.f32[0] = 12;
1026
1
  int i;
1027
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1028
1.00k
  {
1029
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1030
1.00k
      .requires_grad = 1,
1031
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
1032
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1033
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1034
1.00k
  }
1035
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point");
1036
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(add, 0, 0), x);
1037
1
  REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 10, 1e-1, "the weight on add should be 10");
1038
  // Switch to train mul exclusively. Reset its value.
1039
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(add, 0, 0), y);
1040
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
1041
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS)));
1042
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1043
1.00k
  {
1044
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1045
1.00k
      .requires_grad = 1,
1046
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
1047
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1048
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1049
1.00k
  }
1050
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point");
1051
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(mul, 0, 0), x);
1052
1
  REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 5, 1e-2, "the weight on add should be 10");
1053
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1054
1
  ccv_nnc_tensor_free(a_tensor);
1055
1
  ccv_nnc_tensor_free(o_tensor);
1056
1
  ccv_nnc_tensor_free(f_tensor);
1057
1
  ccv_nnc_tensor_free(x);
1058
1
  ccv_nnc_tensor_free(y);
1059
1
  ccv_cnnp_model_free(final);
1060
1
}
1061
1062
TEST_CASE("a compiled model absorbs a new model with slightly different configuration")
1063
1
{
1064
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST(
1065
1
    ccv_cnnp_dense(2, 0, 0),
1066
1
    ccv_cnnp_dense(2, 0, 0),
1067
1
    ccv_cnnp_dense(1, 0, 0)
1068
1
  ), "multi_layer");
1069
1
  ccv_nnc_tensor_param_t x = CPU_TENSOR_NHWC(32F, 2, 2);
1070
1
  ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(x), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP());
1071
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, x, 0);
1072
1
  dsfmt_t dsfmt;
1073
1
  int i;
1074
1
  dsfmt_init_gen_rand(&dsfmt, 1);
1075
5
  for (i = 0; i < 4; 
i++4
)
1076
4
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1077
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 1), 0);
1078
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1079
1
    .requires_grad = 1,
1080
1
  }, TENSOR_LIST(x_tensor), TENSOR_LIST(y_tensor), 0, 0);
1081
1
  ccv_cnnp_model_t* const small_model = ccv_cnnp_sequential_new(MODEL_LIST(
1082
1
    ccv_cnnp_dense(2, 0, 0),
1083
1
    ccv_cnnp_dense(2, 0, 0),
1084
1
    ccv_cnnp_dense(1, 0, 0)
1085
1
  ), "multi_layer");
1086
1
  x = CPU_TENSOR_NHWC(32F, 1, 2);
1087
1
  ccv_cnnp_model_absorb(multi_layer, small_model, TENSOR_PARAM_LIST(x));
1088
1
  ccv_nnc_tensor_t* const small_x = ccv_nnc_tensor_new(0, x, 0);
1089
1
  ccv_nnc_tensor_t* const small_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0);
1090
1
  memcpy(small_x->data.f32, x_tensor->data.f32, sizeof(float) * 2);
1091
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1092
1
    .requires_grad = 1,
1093
1
  }, TENSOR_LIST(small_x), TENSOR_LIST(small_y), 0, 0);
1094
1
  REQUIRE_EQ_WITH_TOLERANCE(small_y->data.f32[0], y_tensor->data.f32[0], 1e-5, "the parameters retained, the value should be too");
1095
1
  ccv_cnnp_model_t* const large_model = ccv_cnnp_sequential_new(MODEL_LIST(
1096
1
    ccv_cnnp_dense(2, 0, 0),
1097
1
    ccv_cnnp_dense(2, 0, 0),
1098
1
    ccv_cnnp_dense(1, 0, 0)
1099
1
  ), "multi_layer");
1100
1
  x = CPU_TENSOR_NHWC(32F, 4, 2);
1101
1
  ccv_cnnp_model_absorb(multi_layer, large_model, TENSOR_PARAM_LIST(x));
1102
1
  ccv_nnc_tensor_t* const large_x = ccv_nnc_tensor_new(0, x, 0);
1103
1
  ccv_nnc_tensor_t* const large_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 1), 0);
1104
1
  memcpy(large_x->data.f32, x_tensor->data.f32, sizeof(float) * 4);
1105
5
  for (i = 4; i < 8; 
i++4
)
1106
4
    large_x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1107
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1108
1
    .requires_grad = 1,
1109
1
  }, TENSOR_LIST(large_x), TENSOR_LIST(large_y), 0, 0);
1110
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, large_y->data.f32, y_tensor->data.f32, 2, 1e-5, "the parameters retained, the value should be too");
1111
1
  ccv_nnc_tensor_free(y_tensor);
1112
1
  ccv_nnc_tensor_free(x_tensor);
1113
1
  ccv_nnc_tensor_free(small_y);
1114
1
  ccv_nnc_tensor_free(small_x);
1115
1
  ccv_nnc_tensor_free(large_y);
1116
1
  ccv_nnc_tensor_free(large_x);
1117
1
  ccv_cnnp_model_free(multi_layer);
1118
1
}
1119
1120
TEST_CASE("use linear model's parameter as the input for more computation")
1121
1
{
1122
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 0, 0);
1123
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST(
1124
1
    linear,
1125
1
  ), "multi_layer");
1126
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1127
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(multi_layer, MODEL_IO_LIST(input));
1128
1
  out = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, NO_TRANSPOSE, 0), MODEL_IO_LIST(out, ccv_cnnp_model_parameters(linear, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0)));
1129
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
1130
  // Because we don't have L2 loss function available yet, manually create L2 loss.
1131
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
1132
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
1133
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1134
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
1135
1
    MODEL_IO_LIST(out, fit));
1136
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
1137
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
1138
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1139
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
1140
1
    MODEL_IO_LIST(diff, diff));
1141
1
  ccv_cnnp_model_t* const model = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
1142
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32F, 1);
1143
1
  const ccv_nnc_tensor_param_t t_params = CPU_TENSOR_NHWC(32F, 1);
1144
1
  ccv_cnnp_model_compile(model, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP());
1145
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_copy(model);
1146
1
  ccv_cnnp_model_free(model);
1147
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP());
1148
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1149
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1150
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1151
1
  x->data.f32[0] = 1.4;
1152
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), x);
1153
1
  x->data.f32[0] = 0;
1154
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_BIAS, 0), x);
1155
1
  int i;
1156
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1157
1.00k
  {
1158
1.00k
    if (i % 2 == 0)
1159
500
    {
1160
500
      x->data.f32[0] = 1;
1161
500
      t->data.f32[0] = 3;
1162
500
    } else {
1163
500
      x->data.f32[0] = 2;
1164
500
      t->data.f32[0] = 4;
1165
500
    }
1166
1.00k
    float lr = 0.05;
1167
1.00k
    if (i >= 100)
1168
900
      lr = 0.01;
1169
100
    else if (i >= 500)
1170
0
      lr = 0.001;
1171
1.00k
    ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, lr, 1, 0, 0, 0), 0, 0, 0);
1172
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1173
1.00k
      .requires_grad = 1,
1174
1.00k
    }, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0);
1175
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1176
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1177
1.00k
  }
1178
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1179
1
  x->data.f32[0] = 1;
1180
1
  t->data.f32[0] = 3;
1181
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0);
1182
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 0, 1e-2, "the mean squared error should be 0 at this point");
1183
1
  ccv_nnc_tensor_free(x);
1184
1
  ccv_nnc_tensor_free(t);
1185
1
  ccv_nnc_tensor_free(y);
1186
1
  ccv_cnnp_model_free(final);
1187
1
}
1188
1189
TEST_CASE("model can have multiple outputs and some of them can be used in the computation")
1190
1
{
1191
1
  ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0);
1192
1
  ccv_cnnp_model_t* const linear2 = ccv_cnnp_dense(1, 1, 0);
1193
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1194
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input));
1195
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(linear2, MODEL_IO_LIST(out1));
1196
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 0);
1197
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1198
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1199
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1200
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1201
1
  ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1202
1
  t->data.f32[0] = 2.4;
1203
1
  ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear1, ALL_PARAMETERS, 0), t);
1204
1
  t->data.f32[0] = -1.5;
1205
1
  ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear2, ALL_PARAMETERS, 0), t);
1206
1
  x->data.f32[0] = 10;
1207
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(t, y), 0, 0);
1208
1
  REQUIRE_EQ_WITH_TOLERANCE(t->data.f32[0], 10 * 2.4, 1e-5, "should be equal to expected value");
1209
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], -10 * 2.4 * 1.5, 1e-5, "should be equal to expected value");
1210
1
  ccv_nnc_tensor_free(x);
1211
1
  ccv_nnc_tensor_free(t);
1212
1
  ccv_nnc_tensor_free(y);
1213
1
  ccv_cnnp_model_free(multi_layer);
1214
1
}
1215
1216
TEST_CASE("index select model can select a part from vocabulary")
1217
1
{
1218
1
  ccv_cnnp_model_t* const index_select = ccv_cnnp_index_select(0);
1219
1
  const ccv_nnc_tensor_param_t v_params = CPU_TENSOR_NHWC(32F, 10, 8);
1220
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, v_params, 0);
1221
1
  dsfmt_t dsfmt;
1222
1
  int i;
1223
1
  dsfmt_init_gen_rand(&dsfmt, 1);
1224
81
  for (i = 0; i < 10 * 8; 
i++80
)
1225
80
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1226
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32S, 3);
1227
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, x_params, 0);
1228
1
  ccv_cnnp_model_compile(index_select, TENSOR_PARAM_LIST(v_params, x_params), CMD_NOOP(), CMD_NOOP());
1229
1
  x->data.i32[0] = 1;
1230
1
  x->data.i32[1] = 0;
1231
1
  x->data.i32[2] = 5;
1232
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 8), 0);
1233
1
  ccv_cnnp_model_evaluate(index_select, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(v, x), TENSOR_LIST(y), 0, 0);
1234
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 1 * 8, y->data.f32, 8, 1e-5, "index 1st vector");
1235
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 0 * 8, y->data.f32 + 8, 8, 1e-5, "index 0th vector");
1236
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 5 * 8, y->data.f32 + 8 * 2, 8, 1e-5, "index 5th vector");
1237
1
  ccv_nnc_tensor_free(x);
1238
1
  ccv_nnc_tensor_free(y);
1239
1
  ccv_nnc_tensor_free(v);
1240
1
  ccv_cnnp_model_free(index_select);
1241
1
}
1242
1243
TEST_CASE("embedding model can generate vector embedding")
1244
1
{
1245
1
  ccv_cnnp_model_t* const embedding = ccv_cnnp_embedding(CCV_32F, 10, 8, 0);
1246
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32S, 3);
1247
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, x_params, 0);
1248
1
  ccv_cnnp_model_compile(embedding, TENSOR_PARAM_LIST(x_params), CMD_NOOP(), CMD_NOOP());
1249
1
  x->data.i32[0] = 1;
1250
1
  x->data.i32[1] = 0;
1251
1
  x->data.i32[2] = 5;
1252
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 8), 0);
1253
1
  ccv_cnnp_model_evaluate(embedding, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0);
1254
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 8), 0);
1255
1
  ccv_cnnp_model_parameter_copy(embedding, ccv_cnnp_model_parameters(embedding, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), v);
1256
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 1 * 8, y->data.f32, 8, 1e-5, "index 1st vector");
1257
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 0 * 8, y->data.f32 + 8, 8, 1e-5, "index 0th vector");
1258
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 5 * 8, y->data.f32 + 8 * 2, 8, 1e-5, "index 5th vector");
1259
1
  ccv_nnc_tensor_free(x);
1260
1
  ccv_nnc_tensor_free(y);
1261
1
  ccv_nnc_tensor_free(v);
1262
1
  ccv_cnnp_model_free(embedding);
1263
1
}
1264
1265
static ccv_cnnp_model_t* _resnet_block_new(const int filters, const int expansion, const int strides, const int projection_shortcut)
1266
16
{
1267
16
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
1268
16
  ccv_cnnp_model_io_t shortcut = input;
1269
16
  if (projection_shortcut)
1270
4
  {
1271
4
    ccv_cnnp_model_t* const avgdown = ccv_cnnp_average_pool(DIM_ALLOC(strides, strides), HINT((strides, strides), (0, 0)), 0);
1272
4
    shortcut = ccv_cnnp_model_apply(avgdown, MODEL_IO_LIST(input));
1273
4
    ccv_cnnp_model_t* const conv0 = ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), 1, HINT((1, 1), (0, 0)), 0);
1274
4
    shortcut = ccv_cnnp_model_apply(conv0, MODEL_IO_LIST(shortcut));
1275
4
  }
1276
16
  ccv_cnnp_model_t* const conv1 = ccv_cnnp_sequential_new(MODEL_LIST(
1277
16
    ccv_cnnp_convolution(1, filters, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0),
1278
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1279
16
    ccv_cnnp_relu(0)
1280
16
  ), 0);
1281
16
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(conv1, MODEL_IO_LIST(input));
1282
16
  ccv_cnnp_model_t* const conv2 = ccv_cnnp_sequential_new(MODEL_LIST(
1283
16
    ccv_cnnp_convolution(1, filters, DIM_ALLOC(3, 3), 0, HINT((strides, strides), (1, 1)), 0),
1284
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1285
16
    ccv_cnnp_relu(0)
1286
16
  ), 0);
1287
16
  output = ccv_cnnp_model_apply(conv2, MODEL_IO_LIST(output));
1288
16
  ccv_cnnp_model_t* const conv3 = ccv_cnnp_sequential_new(MODEL_LIST(
1289
16
    ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0),
1290
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 0)
1291
16
  ), 0);
1292
16
  output = ccv_cnnp_model_apply(conv3, MODEL_IO_LIST(output));
1293
16
  ccv_cnnp_model_t* const add = ccv_cnnp_sum(0);
1294
16
  output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output, shortcut));
1295
16
  ccv_cnnp_model_t* const relu = ccv_cnnp_relu(0);
1296
16
  output = ccv_cnnp_model_apply(relu, MODEL_IO_LIST(output));
1297
16
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 0);
1298
16
}
1299
1300
static ccv_cnnp_model_t* _resnet_block_layer_new(const int filters, const int expansion, const int strides, const int blocks)
1301
4
{
1302
4
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
1303
4
  ccv_cnnp_model_t* first_block = _resnet_block_new(filters, expansion, strides, 1);
1304
4
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(first_block, MODEL_IO_LIST(input));
1305
4
  int i;
1306
16
  for (i = 1; i < blocks; 
i++12
)
1307
12
  {
1308
12
    ccv_cnnp_model_t* block = _resnet_block_new(filters, expansion, 1, 0);
1309
12
    output = ccv_cnnp_model_apply(block, MODEL_IO_LIST(output));
1310
12
  }
1311
4
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 0);
1312
4
}
1313
1314
static void _fpn(const int d, const ccv_cnnp_model_io_t* const c, const int c_size, ccv_cnnp_model_io_t* const p)
1315
1
{
1316
1
  int i;
1317
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(c[c_size - 1]));
1318
1
  p[c_size - 1] = output;
1319
4
  for (i = c_size - 2; i >= 0; 
i--3
)
1320
3
  {
1321
3
    const ccv_cnnp_model_io_t lateral = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(c[i]));
1322
3
    const ccv_cnnp_model_io_t up = ccv_cnnp_model_apply(ccv_cnnp_upsample(2, 2, 0), MODEL_IO_LIST(output));
1323
3
    const ccv_cnnp_model_io_t sum = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(lateral, up));
1324
3
    output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(3, 3), 1, HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(sum));
1325
3
    p[i] = output;
1326
3
  }
1327
1
}
1328
1329
ccv_cnnp_model_t* _imagenet_resnet50_v1d_fpn(void)
1330
1
{
1331
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1332
1
  ccv_cnnp_model_t* init_conv = ccv_cnnp_sequential_new(MODEL_LIST(
1333
1
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), 1, HINT((2, 2), (1, 1)), 0),
1334
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1335
1
    ccv_cnnp_relu(0),
1336
1
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), 1, HINT((1, 1), (1, 1)), 0),
1337
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1338
1
    ccv_cnnp_relu(0),
1339
1
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), 1, HINT((1, 1), (1, 1)), 0),
1340
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1341
1
    ccv_cnnp_relu(0),
1342
1
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (1, 1)), 0)
1343
1
  ), 0);
1344
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(init_conv, MODEL_IO_LIST(input));
1345
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(64, 4, 1, 3), MODEL_IO_LIST(output));
1346
1
  const ccv_cnnp_model_io_t c2 = output;
1347
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(128, 4, 2, 4), MODEL_IO_LIST(output));
1348
1
  const ccv_cnnp_model_io_t c3 = output;
1349
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(256, 4, 2, 6), MODEL_IO_LIST(output));
1350
1
  const ccv_cnnp_model_io_t c4 = output;
1351
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(512, 4, 2, 3), MODEL_IO_LIST(output));
1352
1
  const ccv_cnnp_model_io_t c5 = output;
1353
1
  const ccv_cnnp_model_io_t c[] = { c2, c3, c4, c5 };
1354
1
  ccv_cnnp_model_io_t p[5];
1355
1
  _fpn(256, c, 4, p);
1356
1
  p[4] = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(2, 2), HINT((2, 2), (0, 0)), 0), MODEL_IO_LIST(p[3]));
1357
  // 3 aspect ratios (1:2, 1:1, 2:1). Each has 4 + 2 (x, y, w, h, object, non-object), total 18.
1358
1
  ccv_cnnp_model_t* const rpn_proposals = ccv_cnnp_convolution(1, 18, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), "rpn");
1359
1
  ccv_cnnp_model_io_t proposals[5];
1360
1
  int i;
1361
6
  for (i = 0; i < 5; 
i++5
)
1362
5
    proposals[i] = ccv_cnnp_model_apply(rpn_proposals, MODEL_IO_LIST(p[i]));
1363
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), proposals, 5, 0);
1364
1
}
1365
1366
TEST_CASE("FPN-RPN use cnnp model with multiple outputs")
1367
1
{
1368
1
  ccv_cnnp_model_t* rpn = _imagenet_resnet50_v1d_fpn();
1369
1
  ccv_nnc_tensor_param_t input_params = GPU_TENSOR_NCHW(000, 32F, 4, 3, 835, 1146);
1370
1
  ccv_cnnp_model_compile(rpn, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1371
1
  ccv_nnc_tensor_param_t output_params[5];
1372
1
  ccv_cnnp_model_tensor_auto(rpn, output_params, 5);
1373
1
  REQUIRE_EQ(output_params[0].dim[2], 209, "should be equal");
1374
1
  REQUIRE_EQ(output_params[0].dim[3], 287, "should be equal");
1375
1
  REQUIRE_EQ(output_params[1].dim[2], 105, "should be equal");
1376
1
  REQUIRE_EQ(output_params[1].dim[3], 144, "should be equal");
1377
1
  REQUIRE_EQ(output_params[2].dim[2], 53, "should be equal");
1378
1
  REQUIRE_EQ(output_params[2].dim[3], 72, "should be equal");
1379
1
  REQUIRE_EQ(output_params[3].dim[2], 27, "should be equal");
1380
1
  REQUIRE_EQ(output_params[3].dim[3], 36, "should be equal");
1381
1
  REQUIRE_EQ(output_params[4].dim[2], 13, "should be equal");
1382
1
  REQUIRE_EQ(output_params[4].dim[3], 18, "should be equal");
1383
1
  ccv_cnnp_model_free(rpn);
1384
1
}
1385
1386
#include "case_main.h"