Coverage Report

Created: 2021-09-30 20:21

/home/liu/buildslave/linux-x64-runtests/build/test/unit/nnc/cnnp.core.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
static ccv_cnnp_model_t* simple_cifar_10(void)
15
2
{
16
2
  return ccv_cnnp_sequential_new(MODEL_LIST(
17
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
18
2
    ccv_cnnp_relu(0),
19
2
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
20
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
21
2
    ccv_cnnp_relu(0),
22
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
23
2
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
24
2
    ccv_cnnp_relu(0),
25
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
26
2
    ccv_cnnp_flatten(0),
27
2
    ccv_cnnp_dense(256, 0, 0),
28
2
    ccv_cnnp_relu(0),
29
2
    ccv_cnnp_dense(10, 0, 0),
30
2
    ccv_cnnp_softmax(0)
31
2
  ), 0);
32
2
}
33
34
TEST_CASE("compile simple cifar-10 model")
35
1
{
36
1
  ccv_cnnp_model_t* const sequential0 = simple_cifar_10();
37
1
  ccv_cnnp_model_t* const sequential = ccv_cnnp_model_copy(sequential0);
38
1
  ccv_cnnp_model_free(sequential0);
39
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
40
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
41
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
42
1
  dsfmt_t dsfmt;
43
1
  int i;
44
1
  dsfmt_init_gen_rand(&dsfmt, 1);
45
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
46
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
47
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
48
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
49
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
50
1
    .is_test = 1
51
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
52
1
  int t = 0;
53
1
  float max = output_tensor->data.f32[0];
54
10
  for (i = 1; i < 10; 
i++9
)
55
9
    if (output_tensor->data.f32[i] > max)
56
1
      max = output_tensor->data.f32[i], t = i;
57
1
  const int target = (t + 1) % 10;
58
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
59
1
  // Doing training.
60
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
61
1
  fit_tensor->data.f32[0] = target;
62
101
  for (i = 0; i < 100; 
i++100
)
63
100
    ccv_cnnp_model_fit(sequential, TENSOR_LIST(input_tensor), TENSOR_LIST(fit_tensor), TENSOR_LIST(output_tensor), 0, 0);
64
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
65
1
  // After training, it should fit.
66
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
67
1
    .is_test = 1
68
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
69
1
  t = 0;
70
1
  max = output_tensor->data.f32[0];
71
10
  for (i = 1; i < 10; 
i++9
)
72
9
    if (output_tensor->data.f32[i] > max)
73
1
      max = output_tensor->data.f32[i], t = i;
74
1
  REQUIRE_EQ(target, t, "should fit");
75
1
  remove("/tmp/compile_simple_cifar_10_model.checkpoint");
76
1
  ccv_cnnp_model_checkpoint(sequential, "/tmp/compile_simple_cifar_10_model.checkpoint", 0);
77
1
  CNNP_MODEL_GEN(sequential, CCV_NNC_LONG_DOT_GRAPH);
78
1
  ccv_cnnp_model_free(sequential);
79
1
  ccv_cnnp_model_t* const sequential2 = simple_cifar_10();
80
1
  ccv_cnnp_model_compile(sequential2, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
81
1
  // Load from the checkpoint file.
82
1
  ccv_cnnp_model_checkpoint(sequential2, "/tmp/compile_simple_cifar_10_model.checkpoint", 0);
83
1
  remove("/tmp/compile_simple_cifar_10_model.checkpoint");
84
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
85
1
  ccv_cnnp_model_evaluate(sequential2, (ccv_cnnp_evaluate_param_t){
86
1
    .is_test = 1
87
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
88
1
  t = 0;
89
1
  max = output_tensor->data.f32[0];
90
10
  for (i = 1; i < 10; 
i++9
)
91
9
    if (output_tensor->data.f32[i] > max)
92
1
      max = output_tensor->data.f32[i], t = i;
93
1
  REQUIRE_EQ(target, t, "should fit");
94
1
  ccv_cnnp_model_free(sequential2);
95
1
  ccv_nnc_tensor_free(input_tensor);
96
1
  ccv_nnc_tensor_free(fit_tensor);
97
1
  ccv_nnc_tensor_free(output_tensor);
98
1
}
99
100
static int _ccv_cnnp_model_notified = 0;
101
102
static void _ccv_cnnp_model_hook(const ccv_cnnp_model_t* const model, const int tag, void* const payload, void* const context)
103
3
{
104
3
  if (payload)
105
3
    ++_ccv_cnnp_model_notified;
106
3
}
107
108
TEST_CASE("inception layer for model")
109
1
{
110
1
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
111
1
  _ccv_cnnp_model_notified = 0;
112
1
  ccv_cnnp_model_t* const conv_1 = ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0);
113
1
  ccv_cnnp_model_notify_hook(conv_1, _ccv_cnnp_model_hook, 0);
114
1
  ccv_cnnp_model_io_t tower_1 = ccv_cnnp_model_apply(conv_1, MODEL_IO_LIST(x));
115
1
  ccv_cnnp_model_t* const relu_1 = ccv_cnnp_relu(0);
116
1
  ccv_cnnp_model_notify_hook(relu_1, _ccv_cnnp_model_hook, 0);
117
1
  tower_1 = ccv_cnnp_model_apply(relu_1, MODEL_IO_LIST(tower_1));
118
1
  tower_1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), 0, HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(tower_1));
119
1
  tower_1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_1));
120
1
121
1
  ccv_cnnp_model_io_t tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(x));
122
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2));
123
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0), MODEL_IO_LIST(tower_2));
124
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2));
125
1
126
1
  ccv_cnnp_model_io_t tower_3 = ccv_cnnp_model_apply(ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(x));
127
1
  tower_3 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(tower_3));
128
1
  tower_3 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_3));
129
1
  ccv_cnnp_model_t* const add_1 = ccv_cnnp_sum(0);
130
1
  ccv_cnnp_model_notify_hook(add_1, _ccv_cnnp_model_hook, 0);
131
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(add_1, MODEL_IO_LIST(tower_1, tower_2, tower_3));
132
1
  REQUIRE_EQ(0, _ccv_cnnp_model_notified, "haven't notified");
133
1
  ccv_cnnp_model_t* const inception0 = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(output), 0);
134
1
  ccv_cnnp_model_notify(inception0, 0, inception0);
135
1
  ccv_cnnp_model_t* const inception = ccv_cnnp_model_copy(inception0);
136
1
  REQUIRE_EQ(3, _ccv_cnnp_model_notified, "3 models changed owner");
137
1
  ccv_cnnp_model_free(inception0);
138
1
  const ccv_nnc_tensor_param_t input = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
139
1
  ccv_cnnp_model_compile(inception, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
140
1
  CNNP_MODEL_GEN(inception, CCV_NNC_LONG_DOT_GRAPH);
141
1
  ccv_cnnp_model_free(inception);
142
1
}
143
144
static ccv_cnnp_model_t* _ccv_multiple_outputs_functional_model(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
145
1
{
146
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
147
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
148
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(input0));
149
1
  output0 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output0));
150
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), 0, HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(input1));
151
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1));
152
1
  ccv_cnnp_model_t* model0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(output0, output1), 0);
153
1
  input0 = ccv_cnnp_input();
154
1
  input1 = ccv_cnnp_input();
155
1
  output0 = ccv_cnnp_model_apply(model0, MODEL_IO_LIST(input0, input1));
156
1
  ccv_cnnp_model_io_t input2 = ccv_cnnp_input();
157
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0), MODEL_IO_LIST(input2));
158
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1));
159
1
  ccv_cnnp_model_t* interim = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0, output1), 0);
160
1
  input0 = ccv_cnnp_input();
161
1
  input1 = ccv_cnnp_input();
162
1
  input2 = ccv_cnnp_input();
163
1
  output0 = ccv_cnnp_model_apply(interim, MODEL_IO_LIST(input0, input1, input2));
164
1
  output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0));
165
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0), 0);
166
1
}
167
168
TEST_CASE("functional model's IO can represent multiple outputs")
169
1
{
170
1
  ccv_cnnp_model_t* const final = ccv_cnnp_dynamic_new(_ccv_multiple_outputs_functional_model, 0, 0);
171
1
  const ccv_nnc_tensor_param_t a0 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
172
1
  const ccv_nnc_tensor_param_t a1 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
173
1
  const ccv_nnc_tensor_param_t a2 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
174
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
175
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
176
1
  ccv_cnnp_model_free(final);
177
1
}
178
179
TEST_CASE("make sure reuse model enables share weights")
180
1
{
181
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
182
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
183
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0);
184
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
185
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
186
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0, output1));
187
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(final_output), 0);
188
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
189
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
190
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
191
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
192
1
  ccv_cnnp_model_free(final);
193
1
}
194
195
TEST_CASE("train model with share weights and L2 loss")
196
1
{
197
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
198
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
199
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0);
200
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
201
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
202
1
  ccv_cnnp_model_io_t fit0 = ccv_cnnp_input();
203
1
  ccv_cnnp_model_io_t fit1 = ccv_cnnp_input();
204
1
  // Because we don't have L2 loss function available yet, manually create L2 loss.
205
1
  ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply(
206
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
207
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
208
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
209
1
    MODEL_IO_LIST(output0, fit0));
210
1
  ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply(
211
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
212
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
213
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
214
1
    MODEL_IO_LIST(diff0, diff0));
215
1
  ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply(
216
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
217
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
218
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
219
1
    MODEL_IO_LIST(output1, fit1));
220
1
  ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply(
221
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
222
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
223
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
224
1
    MODEL_IO_LIST(diff1, diff1));
225
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1));
226
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 0);
227
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
228
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
229
1
  ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1);
230
1
  ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1);
231
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
232
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
233
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
234
1
  ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
235
1
  ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0);
236
1
  ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0);
237
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
238
1
  a0_tensor->data.f32[0] = 1;
239
1
  a1_tensor->data.f32[0] = 3;
240
1
  b0_tensor->data.f32[0] = 2;
241
1
  b1_tensor->data.f32[0] = 3;
242
1
  int i;
243
11
  for (i = 0; i < 10; 
i++10
)
244
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
245
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
246
101
  for (i = 0; i < 100; 
i++100
)
247
100
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
248
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
249
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
250
1.00k
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
251
1
  a0_tensor->data.f32[0] = 2;
252
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
253
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
254
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
255
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
256
1
    .is_test = 1
257
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
258
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this.");
259
1
  ccv_nnc_tensor_free(a0_tensor);
260
1
  ccv_nnc_tensor_free(a1_tensor);
261
1
  ccv_nnc_tensor_free(b0_tensor);
262
1
  ccv_nnc_tensor_free(b1_tensor);
263
1
  ccv_nnc_tensor_free(o0_tensor);
264
1
  ccv_cnnp_model_free(final);
265
1
}
266
267
static ccv_cnnp_model_t* simple_cifar_10_no_softmax(void)
268
2
{
269
2
  return ccv_cnnp_sequential_new(MODEL_LIST(
270
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
271
2
    ccv_cnnp_relu(0),
272
2
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
273
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
274
2
    ccv_cnnp_relu(0),
275
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
276
2
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), 0, HINT((1, 1), (2, 2)), 0),
277
2
    ccv_cnnp_relu(0),
278
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
279
2
    ccv_cnnp_flatten(0),
280
2
    ccv_cnnp_dense(256, 0, 0),
281
2
    ccv_cnnp_relu(0),
282
2
    ccv_cnnp_dense(10, 0, 0)
283
2
  ), 0);
284
2
}
285
286
TEST_CASE("evaluate cifar-10 model in multi-stage mode")
287
1
{
288
1
  ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax();
289
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
290
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), CMD_NOOP());
291
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
292
1
  dsfmt_t dsfmt;
293
1
  int i;
294
1
  dsfmt_init_gen_rand(&dsfmt, 1);
295
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
296
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
297
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
298
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
299
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
300
1
    .is_test = 1
301
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
302
1
  int t = 0;
303
1
  float max = output_tensor->data.f32[0];
304
10
  for (i = 1; i < 10; 
i++9
)
305
9
    if (output_tensor->data.f32[i] > max)
306
1
      max = output_tensor->data.f32[i], t = i;
307
1
  const int target = (t + 1) % 10;
308
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
309
1
  // Doing training.
310
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
311
1
  fit_tensor->data.f32[0] = target;
312
1
  ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
313
1
  ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
314
1
  ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
315
101
  for (i = 0; i < 100; 
i++100
)
316
100
  {
317
100
    ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
318
100
      .requires_grad = 1
319
100
    }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
320
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0);
321
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0);
322
100
    ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
323
100
    ccv_cnnp_model_apply_gradients(sequential, 0);
324
100
  }
325
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
326
1
  // After training, it should fit.
327
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
328
1
    .is_test = 1
329
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
330
1
  t = 0;
331
1
  max = output_tensor->data.f32[0];
332
10
  for (i = 1; i < 10; 
i++9
)
333
9
    if (output_tensor->data.f32[i] > max)
334
1
      max = output_tensor->data.f32[i], t = i;
335
1
  REQUIRE_EQ(target, t, "should fit");
336
1
  ccv_nnc_tensor_free(ingrad_tensor);
337
1
  ccv_nnc_tensor_free(fit_tensor);
338
1
  ccv_nnc_tensor_free(softmax_tensor);
339
1
  ccv_nnc_tensor_free(loss_tensor);
340
1
  ccv_nnc_tensor_free(input_tensor);
341
1
  ccv_nnc_tensor_free(output_tensor);
342
1
  ccv_cnnp_model_free(sequential);
343
1
}
344
345
TEST_CASE("evaluate cifar-10 model in multi-stage mode with gradient accumulated")
346
1
{
347
1
  ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax();
348
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
349
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.00033, 1, 0.99, 0.9, 0.9), CMD_NOOP());
350
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
351
1
  dsfmt_t dsfmt;
352
1
  int i;
353
1
  dsfmt_init_gen_rand(&dsfmt, 1);
354
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
355
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
356
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
357
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
358
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
359
1
    .is_test = 1
360
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
361
1
  int t = 0;
362
1
  float max = output_tensor->data.f32[0];
363
10
  for (i = 1; i < 10; 
i++9
)
364
9
    if (output_tensor->data.f32[i] > max)
365
1
      max = output_tensor->data.f32[i], t = i;
366
1
  const int target = (t + 1) % 10;
367
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
368
1
  // Doing training.
369
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
370
1
  fit_tensor->data.f32[0] = target;
371
1
  ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
372
1
  ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
373
1
  ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
374
101
  for (i = 0; i < 100; 
i++100
)
375
100
  {
376
100
    ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
377
100
      .requires_grad = 1
378
100
    }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
379
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0);
380
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0);
381
100
    ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
382
100
    // Backward again to accumulate gradient.
383
100
    if (i % 2 == 0)
384
50
    {
385
50
      ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
386
50
      // Backward again to accumulate gradient.
387
50
      if (i % 3 == 0)
388
17
        ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
389
50
    }
390
100
    ccv_cnnp_model_apply_gradients(sequential, 0);
391
100
  }
392
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
393
1
  // After training, it should fit.
394
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
395
1
    .is_test = 1
396
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
397
1
  t = 0;
398
1
  max = output_tensor->data.f32[0];
399
10
  for (i = 1; i < 10; 
i++9
)
400
9
    if (output_tensor->data.f32[i] > max)
401
1
      max = output_tensor->data.f32[i], t = i;
402
1
  REQUIRE_EQ(target, t, "should fit");
403
1
  ccv_nnc_tensor_free(ingrad_tensor);
404
1
  ccv_nnc_tensor_free(fit_tensor);
405
1
  ccv_nnc_tensor_free(softmax_tensor);
406
1
  ccv_nnc_tensor_free(loss_tensor);
407
1
  ccv_nnc_tensor_free(input_tensor);
408
1
  ccv_nnc_tensor_free(output_tensor);
409
1
  ccv_cnnp_model_free(sequential);
410
1
}
411
412
TEST_CASE("train model with share weights and L2 loss and check out gradients")
413
1
{
414
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
415
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
416
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0);
417
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
418
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
419
1
  ccv_cnnp_model_io_t fit0 = ccv_cnnp_input();
420
1
  ccv_cnnp_model_io_t fit1 = ccv_cnnp_input();
421
1
  // Because we don't have L2 loss function available yet, manually create L2 loss.
422
1
  ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply(
423
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
424
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
425
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
426
1
    MODEL_IO_LIST(output0, fit0));
427
1
  ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply(
428
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
429
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
430
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
431
1
    MODEL_IO_LIST(diff0, diff0));
432
1
  ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply(
433
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
434
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
435
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
436
1
    MODEL_IO_LIST(output1, fit1));
437
1
  ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply(
438
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
439
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
440
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
441
1
    MODEL_IO_LIST(diff1, diff1));
442
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1));
443
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 0);
444
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
445
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
446
1
  ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1);
447
1
  ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1);
448
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
449
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
450
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
451
1
  ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
452
1
  ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0);
453
1
  ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0);
454
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
455
1
  // It should fit to 1*0.5+1.5=2, 3*0.5+1.5=3
456
1
  a0_tensor->data.f32[0] = 1;
457
1
  a1_tensor->data.f32[0] = 3;
458
1
  b0_tensor->data.f32[0] = 2;
459
1
  b1_tensor->data.f32[0] = 3;
460
1
  int i;
461
11
  for (i = 0; i < 10; 
i++10
)
462
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
463
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
464
101
  for (i = 0; i < 100; 
i++100
)
465
100
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
466
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
467
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
468
1.00k
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
469
1
  a0_tensor->data.f32[0] = 2;
470
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
471
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
472
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
473
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
474
1
    .is_test = 1
475
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
476
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this.");
477
1
  // Figure out the actual weight and bias term in the model.
478
1
  a0_tensor->data.f32[0] = 0;
479
1
  a1_tensor->data.f32[0] = 0;
480
1
  b0_tensor->data.f32[0] = 0;
481
1
  b1_tensor->data.f32[0] = 0;
482
1
  // The output will be 2*bias^2
483
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
484
1
    .is_test = 1
485
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
486
1
  const float bias = sqrtf(o0_tensor->data.f32[0] * 0.5);
487
1
  a0_tensor->data.f32[0] = 1;
488
1
  a1_tensor->data.f32[0] = 1;
489
1
  b0_tensor->data.f32[0] = 0;
490
1
  b1_tensor->data.f32[0] = 0;
491
1
  // The output will be 2*(w+bias)^2
492
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
493
1
    .is_test = 1
494
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
495
1
  const float w = sqrt(o0_tensor->data.f32[0] * 0.5) - bias;
496
1
  // Compute the out gradient to verify.
497
1
  a0_tensor->data.f32[0] = 2;
498
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
499
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
500
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
501
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
502
1
    .requires_grad = 1,
503
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
504
1
  // Note that I have to use new tensors and have to keep these tensors around since they were binded to the model when evaluate.
505
1
  ccv_nnc_tensor_t* da0_tensor = ccv_nnc_tensor_new(0, a0, 0);
506
1
  ccv_nnc_tensor_t* da1_tensor = ccv_nnc_tensor_new(0, a1, 0);
507
1
  ccv_nnc_tensor_t* db0_tensor = ccv_nnc_tensor_new(0, b0, 0);
508
1
  ccv_nnc_tensor_t* db1_tensor = ccv_nnc_tensor_new(0, b1, 0);
509
1
  ccv_nnc_tensor_t* do0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
510
1
  do0_tensor->data.f32[0] = 1;
511
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(do0_tensor), TENSOR_LIST(da0_tensor, da1_tensor, db0_tensor, db1_tensor), 0, 0);
512
1
  REQUIRE_EQ_WITH_TOLERANCE(da0_tensor->data.f32[0], 2 * w * (w * 2 + bias - 2), 1e-5, "da0=2*w*(w*a0+bias-b0), thus, 0.5");
513
1
  REQUIRE_EQ_WITH_TOLERANCE(da1_tensor->data.f32[0], 2 * w * (w * 2 + bias - 3), 1e-5, "da1=2*w*(w*a1+bias-b1), thus, -0.5");
514
1
  REQUIRE_EQ_WITH_TOLERANCE(db0_tensor->data.f32[0], -2 * (w * 2 + bias - 2), 1e-5, "db0=-2*(w*a0+bias-b0), thus, -1");
515
1
  REQUIRE_EQ_WITH_TOLERANCE(db1_tensor->data.f32[0], -2 * (w * 2 + bias - 3), 1e-5, "db1=-2*(w*a1+bias-b1), thus, 1");
516
1
  ccv_nnc_tensor_free(a0_tensor);
517
1
  ccv_nnc_tensor_free(a1_tensor);
518
1
  ccv_nnc_tensor_free(b0_tensor);
519
1
  ccv_nnc_tensor_free(b1_tensor);
520
1
  ccv_nnc_tensor_free(o0_tensor);
521
1
  ccv_nnc_tensor_free(da0_tensor);
522
1
  ccv_nnc_tensor_free(da1_tensor);
523
1
  ccv_nnc_tensor_free(db0_tensor);
524
1
  ccv_nnc_tensor_free(db1_tensor);
525
1
  ccv_nnc_tensor_free(do0_tensor);
526
1
  ccv_cnnp_model_free(final);
527
1
}
528
529
TEST_CASE("apply functional model as forward pass")
530
1
{
531
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
532
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
533
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))),
534
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
535
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
536
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(input));
537
1
  output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(output));
538
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
539
1
  b->data.f32[0] = -1;
540
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
541
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
542
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
543
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
544
1
  output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output));
545
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), "final");
546
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
547
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
548
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
549
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
550
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0);
551
1
  a0_tensor->data.f32[0] = 1.12;
552
1
  o0_tensor->data.f32[0] = 0;
553
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
554
1
    .is_test = 1
555
1
  }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0);
556
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1");
557
1
  ccv_nnc_tensor_free(a0_tensor);
558
1
  ccv_nnc_tensor_free(b);
559
1
  ccv_nnc_tensor_free(o0_tensor);
560
1
  ccv_cnnp_model_free(final);
561
1
}
562
563
TEST_CASE("apply sequential model as forward pass")
564
1
{
565
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
566
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
567
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))),
568
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
569
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
570
1
  b->data.f32[0] = -1;
571
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
572
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
573
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
574
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
575
1
  ccv_cnnp_model_t* const final = ccv_cnnp_sequential_new(MODEL_LIST(mul, mul, add), "seq");
576
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
577
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
578
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
579
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
580
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0);
581
1
  a0_tensor->data.f32[0] = 1.12;
582
1
  o0_tensor->data.f32[0] = 0;
583
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
584
1
    .is_test = 1
585
1
  }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0);
586
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1");
587
1
  ccv_nnc_tensor_free(a0_tensor);
588
1
  ccv_nnc_tensor_free(b);
589
1
  ccv_nnc_tensor_free(o0_tensor);
590
1
  ccv_cnnp_model_free(final);
591
1
}
592
593
ccv_cnnp_model_t* _math_2_x_1_1_10(const ccv_nnc_tensor_t* const b)
594
5
{
595
5
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
596
5
    MODEL_CMD_EXEC_IO_MAP(
597
5
      KV(CCV_CNNP_IO),
598
5
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_set_by(CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1))),
599
5
    ),
600
5
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
601
5
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
602
5
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
603
5
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
604
5
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
605
5
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add, add), "seq");
606
5
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
607
5
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
608
5
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
609
5
  // Because we don't have L2 loss function available yet, manually create L2 loss.
610
5
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
611
5
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
612
5
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
613
5
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
614
5
    MODEL_IO_LIST(left_out, fit));
615
5
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
616
5
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
617
5
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
618
5
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
619
5
    MODEL_IO_LIST(diff, diff));
620
5
  return ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
621
5
}
622
623
TEST_CASE("learn simple math of 2 * x + 1 + 1 = 10, x = 4")
624
1
{
625
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
626
1
  b->data.f32[0] = 1;
627
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
628
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
629
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
630
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
631
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
632
1
  ccv_nnc_tensor_param_t o = {};
633
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
634
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
635
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
636
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
637
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
638
1
  ccv_nnc_tensor_t* outgrad0 = ccv_nnc_tensor_new(0, a, 0);
639
1
  ccv_nnc_tensor_t* outgrad1 = ccv_nnc_tensor_new(0, f, 0);
640
1
  ingrad->data.f32[0] = 1;
641
1
  a_tensor->data.f32[0] = 2;
642
1
  f_tensor->data.f32[0] = 10;
643
1
  int i;
644
1
  float old_o = 10;
645
11
  for (i = 0; i < 10; 
i++10
)
646
10
  {
647
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
648
10
      .requires_grad = 1,
649
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
650
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
651
10
    ccv_cnnp_model_apply_gradients(final, 0);
652
10
  }
653
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be different");
654
1
  old_o = o_tensor->data.f32[0];
655
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0, 0, 0), 0, 0, 0); // No decay.
656
1
  ingrad->data.f32[0] = 0; // ingrad is 0, no update at all.
657
11
  for (i = 0; i < 10; 
i++10
)
658
10
  {
659
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
660
10
      .requires_grad = 1,
661
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
662
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(outgrad0, outgrad1), 0, 0);
663
10
    ccv_cnnp_model_apply_gradients(final, 0);
664
10
  }
665
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad");
666
1
  old_o = o_tensor->data.f32[0];
667
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
668
101
  for (i = 0; i < 100; 
i++100
)
669
100
  {
670
100
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
671
100
      .requires_grad = 1,
672
100
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
673
100
    ccv_cnnp_model_backward(final, TENSOR_LIST(0), TENSOR_LIST(outgrad0, outgrad1), 0, 0);
674
100
    ccv_cnnp_model_apply_gradients(final, 0);
675
100
  }
676
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 100 iterations, output should be different");
677
1
  old_o = o_tensor->data.f32[0];
678
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0, 0, 0), 0, 0, 0); // No decay.
679
1
  // Note we still use the old ingrad which is 0.
680
11
  for (i = 0; i < 10; 
i++10
)
681
10
  {
682
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
683
10
      .requires_grad = 1,
684
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
685
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0);
686
10
    ccv_cnnp_model_apply_gradients(final, 0);
687
10
  }
688
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad");
689
1
  ingrad->data.f32[0] = 1; // ingrad reset to 1.
690
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
691
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
692
1.00k
  {
693
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
694
1.00k
      .requires_grad = 1,
695
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
696
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0);
697
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
698
1.00k
  }
699
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 1000 iterations, output should be different");
700
1
  o_tensor->data.f32[0] = 10;
701
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
702
1
    .is_test = 1,
703
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
704
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "(2 * x + 1 + 1 - 10) ^ 2 should equal to 0");
705
1
  ccv_nnc_tensor_free(a_tensor);
706
1
  ccv_nnc_tensor_free(b);
707
1
  ccv_nnc_tensor_free(f_tensor);
708
1
  ccv_nnc_tensor_free(o_tensor);
709
1
  ccv_nnc_tensor_free(ingrad);
710
1
  ccv_nnc_tensor_free(outgrad0);
711
1
  ccv_nnc_tensor_free(outgrad1);
712
1
  ccv_cnnp_model_free(final);
713
1
}
714
715
TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and copy parameter to a new model entirely")
716
1
{
717
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
718
1
  b->data.f32[0] = 1;
719
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
720
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
721
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
722
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
723
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
724
1
  ccv_nnc_tensor_param_t o = {};
725
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
726
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
727
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
728
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
729
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
730
1
  ingrad->data.f32[0] = 1;
731
1
  a_tensor->data.f32[0] = 2;
732
1
  f_tensor->data.f32[0] = 10;
733
1
  int i;
734
11
  for (i = 0; i < 10; 
i++10
)
735
10
  {
736
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
737
10
      .requires_grad = 1,
738
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
739
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
740
10
    ccv_cnnp_model_apply_gradients(final, 0);
741
10
  }
742
1
  const float o_final = o_tensor->data.f32[0];
743
1
  ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b);
744
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
745
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
746
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
747
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
748
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0);
749
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
750
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 64, 1e-5, "should match the output when x is 0");
751
1
  ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final);
752
1
  ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
753
1
  ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
754
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
755
1
  ccv_nnc_tensor_free(a_tensor);
756
1
  ccv_nnc_tensor_free(b);
757
1
  ccv_nnc_tensor_free(f_tensor);
758
1
  ccv_nnc_tensor_free(o_tensor);
759
1
  ccv_nnc_tensor_free(ingrad);
760
1
  ccv_cnnp_model_free(final);
761
1
  ccv_cnnp_model_free(final2);
762
1
  ccv_cnnp_model_free(final3);
763
1
}
764
765
TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and merge parameters with a model")
766
1
{
767
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
768
1
  b->data.f32[0] = 1;
769
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
770
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
771
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
772
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
773
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
774
1
  ccv_nnc_tensor_param_t o = {};
775
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
776
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
777
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
778
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
779
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
780
1
  ingrad->data.f32[0] = 1;
781
1
  a_tensor->data.f32[0] = 2;
782
1
  f_tensor->data.f32[0] = 10;
783
1
  int i;
784
11
  for (i = 0; i < 10; 
i++10
)
785
10
  {
786
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
787
10
      .requires_grad = 1,
788
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
789
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
790
10
    ccv_cnnp_model_apply_gradients(final, 0);
791
10
  }
792
1
  const float o_final = o_tensor->data.f32[0];
793
1
  ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b);
794
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
795
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
796
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
797
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
798
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0);
799
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
800
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 36, 1e-5, "should match the output when x is 1");
801
1
  ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
802
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
803
1
  ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, a, 0);
804
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor);
805
1
  const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6;
806
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], (x_final * 2 + 1 + 1 - 10) * (x_final * 2 + 1 + 1 - 10), 1e-5, "should match the previous output");
807
1
  ccv_nnc_tensor_free(a_tensor);
808
1
  ccv_nnc_tensor_free(b);
809
1
  ccv_nnc_tensor_free(f_tensor);
810
1
  ccv_nnc_tensor_free(o_tensor);
811
1
  ccv_nnc_tensor_free(x_tensor);
812
1
  ccv_nnc_tensor_free(ingrad);
813
1
  ccv_cnnp_model_free(final);
814
1
  ccv_cnnp_model_free(final2);
815
1
}
816
817
TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate convergence")
818
1
{
819
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
820
1
  x->data.f32[0] = 1;
821
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
822
1
    MODEL_CMD_EXEC_IO_MAP(
823
1
      KV(CCV_CNNP_IO),
824
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))),
825
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
826
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
827
1
  y->data.f32[0] = 2;
828
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
829
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
830
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))),
831
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
832
1
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), "seq");
833
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
834
1
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
835
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
836
1
  // Because we don't have L2 loss function available yet, manually create L2 loss.
837
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
838
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
839
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
840
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
841
1
    MODEL_IO_LIST(left_out, fit));
842
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
843
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
844
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
845
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
846
1
    MODEL_IO_LIST(diff, diff));
847
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
848
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
849
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
850
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
851
1
  // Train add exclusively.
852
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
853
1
  ccv_nnc_tensor_param_t o = {};
854
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
855
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
856
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
857
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
858
1
  a_tensor->data.f32[0] = 2;
859
1
  f_tensor->data.f32[0] = 12;
860
1
  o_tensor->data.f32[0] = 12;
861
1
  int i;
862
11
  for (i = 0; i < 10; 
i++10
)
863
10
  {
864
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
865
10
      .requires_grad = 1,
866
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
867
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
868
10
    ccv_cnnp_model_apply_gradients(final, 0);
869
10
  }
870
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 12, 1e-5, "after 10 iterations, output should not be the original");
871
1
  // Switch to train mul exclusively.
872
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
873
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS)));
874
1
  float old_o = o_tensor->data.f32[0];
875
11
  for (i = 0; i < 10; 
i++10
)
876
10
  {
877
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
878
10
      .requires_grad = 1,
879
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
880
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
881
10
    ccv_cnnp_model_apply_gradients(final, 0);
882
10
  }
883
1
  REQUIRE(o_tensor->data.f32[0] < old_o, "we should be closer to 0 at this point");
884
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
885
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
886
1.00k
  {
887
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
888
1.00k
      .requires_grad = 1,
889
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
890
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
891
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
892
1.00k
  }
893
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "the mean squared error should be 0 at this point");
894
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
895
1
  ccv_nnc_tensor_free(a_tensor);
896
1
  ccv_nnc_tensor_free(o_tensor);
897
1
  ccv_nnc_tensor_free(f_tensor);
898
1
  ccv_nnc_tensor_free(x);
899
1
  ccv_nnc_tensor_free(y);
900
1
  ccv_cnnp_model_free(final);
901
1
}
902
903
TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate learn-ability")
904
1
{
905
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
906
1
  x->data.f32[0] = 1;
907
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
908
1
    MODEL_CMD_EXEC_IO_MAP(
909
1
      KV(CCV_CNNP_IO),
910
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))),
911
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "mul");
912
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
913
1
  y->data.f32[0] = 2;
914
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
915
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
916
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))),
917
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), "add");
918
1
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), "seq");
919
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
920
1
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
921
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
922
1
  // Because we don't have L2 loss function available yet, manually create L2 loss.
923
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
924
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
925
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
926
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
927
1
    MODEL_IO_LIST(left_out, fit));
928
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
929
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
930
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
931
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
932
1
    MODEL_IO_LIST(diff, diff));
933
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
934
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
935
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
936
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP());
937
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(mul, 0, 0), x);
938
1
  // Train add exclusively.
939
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
940
1
  ccv_nnc_tensor_param_t o = {};
941
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
942
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
943
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
944
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
945
1
  a_tensor->data.f32[0] = 2;
946
1
  f_tensor->data.f32[0] = 12;
947
1
  o_tensor->data.f32[0] = 12;
948
1
  int i;
949
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
950
1.00k
  {
951
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
952
1.00k
      .requires_grad = 1,
953
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
954
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
955
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
956
1.00k
  }
957
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point");
958
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(add, 0, 0), x);
959
1
  REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 10, 1e-1, "the weight on add should be 10");
960
1
  // Switch to train mul exclusively. Reset its value.
961
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(add, 0, 0), y);
962
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
963
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS)));
964
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
965
1.00k
  {
966
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
967
1.00k
      .requires_grad = 1,
968
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
969
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
970
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
971
1.00k
  }
972
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point");
973
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(mul, 0, 0), x);
974
1
  REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 5, 1e-2, "the weight on add should be 10");
975
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
976
1
  ccv_nnc_tensor_free(a_tensor);
977
1
  ccv_nnc_tensor_free(o_tensor);
978
1
  ccv_nnc_tensor_free(f_tensor);
979
1
  ccv_nnc_tensor_free(x);
980
1
  ccv_nnc_tensor_free(y);
981
1
  ccv_cnnp_model_free(final);
982
1
}
983
984
TEST_CASE("a compiled model absorbs a new model with slightly different configuration")
985
1
{
986
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST(
987
1
    ccv_cnnp_dense(2, 0, 0),
988
1
    ccv_cnnp_dense(2, 0, 0),
989
1
    ccv_cnnp_dense(1, 0, 0)
990
1
  ), "multi_layer");
991
1
  ccv_nnc_tensor_param_t x = CPU_TENSOR_NHWC(32F, 2, 2);
992
1
  ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(x), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP());
993
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, x, 0);
994
1
  dsfmt_t dsfmt;
995
1
  int i;
996
1
  dsfmt_init_gen_rand(&dsfmt, 1);
997
5
  for (i = 0; i < 4; 
i++4
)
998
4
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
999
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 1), 0);
1000
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1001
1
    .requires_grad = 1,
1002
1
  }, TENSOR_LIST(x_tensor), TENSOR_LIST(y_tensor), 0, 0);
1003
1
  ccv_cnnp_model_t* const small_model = ccv_cnnp_sequential_new(MODEL_LIST(
1004
1
    ccv_cnnp_dense(2, 0, 0),
1005
1
    ccv_cnnp_dense(2, 0, 0),
1006
1
    ccv_cnnp_dense(1, 0, 0)
1007
1
  ), "multi_layer");
1008
1
  x = CPU_TENSOR_NHWC(32F, 1, 2);
1009
1
  ccv_cnnp_model_absorb(multi_layer, small_model, TENSOR_PARAM_LIST(x));
1010
1
  ccv_nnc_tensor_t* const small_x = ccv_nnc_tensor_new(0, x, 0);
1011
1
  ccv_nnc_tensor_t* const small_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0);
1012
1
  memcpy(small_x->data.f32, x_tensor->data.f32, sizeof(float) * 2);
1013
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1014
1
    .requires_grad = 1,
1015
1
  }, TENSOR_LIST(small_x), TENSOR_LIST(small_y), 0, 0);
1016
1
  REQUIRE_EQ_WITH_TOLERANCE(small_y->data.f32[0], y_tensor->data.f32[0], 1e-5, "the parameters retained, the value should be too");
1017
1
  ccv_cnnp_model_t* const large_model = ccv_cnnp_sequential_new(MODEL_LIST(
1018
1
    ccv_cnnp_dense(2, 0, 0),
1019
1
    ccv_cnnp_dense(2, 0, 0),
1020
1
    ccv_cnnp_dense(1, 0, 0)
1021
1
  ), "multi_layer");
1022
1
  x = CPU_TENSOR_NHWC(32F, 4, 2);
1023
1
  ccv_cnnp_model_absorb(multi_layer, large_model, TENSOR_PARAM_LIST(x));
1024
1
  ccv_nnc_tensor_t* const large_x = ccv_nnc_tensor_new(0, x, 0);
1025
1
  ccv_nnc_tensor_t* const large_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 1), 0);
1026
1
  memcpy(large_x->data.f32, x_tensor->data.f32, sizeof(float) * 4);
1027
5
  for (i = 4; i < 8; 
i++4
)
1028
4
    large_x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1029
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1030
1
    .requires_grad = 1,
1031
1
  }, TENSOR_LIST(large_x), TENSOR_LIST(large_y), 0, 0);
1032
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, large_y->data.f32, y_tensor->data.f32, 2, 1e-5, "the parameters retained, the value should be too");
1033
1
  ccv_nnc_tensor_free(y_tensor);
1034
1
  ccv_nnc_tensor_free(x_tensor);
1035
1
  ccv_nnc_tensor_free(small_y);
1036
1
  ccv_nnc_tensor_free(small_x);
1037
1
  ccv_nnc_tensor_free(large_y);
1038
1
  ccv_nnc_tensor_free(large_x);
1039
1
  ccv_cnnp_model_free(multi_layer);
1040
1
}
1041
1042
TEST_CASE("use linear model's parameter as the input for more computation")
1043
1
{
1044
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 0, 0);
1045
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST(
1046
1
    linear,
1047
1
  ), "multi_layer");
1048
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1049
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(multi_layer, MODEL_IO_LIST(input));
1050
1
  out = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, NO_TRANSPOSE, 0), MODEL_IO_LIST(out, ccv_cnnp_model_parameters(linear, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0)));
1051
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
1052
1
  // Because we don't have L2 loss function available yet, manually create L2 loss.
1053
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
1054
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
1055
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1056
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
1057
1
    MODEL_IO_LIST(out, fit));
1058
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
1059
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
1060
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1061
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 0),
1062
1
    MODEL_IO_LIST(diff, diff));
1063
1
  ccv_cnnp_model_t* const model = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 0);
1064
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32F, 1);
1065
1
  const ccv_nnc_tensor_param_t t_params = CPU_TENSOR_NHWC(32F, 1);
1066
1
  ccv_cnnp_model_compile(model, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP());
1067
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_copy(model);
1068
1
  ccv_cnnp_model_free(model);
1069
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP());
1070
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1071
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1072
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1073
1
  x->data.f32[0] = 1.4;
1074
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), x);
1075
1
  x->data.f32[0] = 0;
1076
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_BIAS, 0), x);
1077
1
  int i;
1078
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1079
1.00k
  {
1080
1.00k
    if (i % 2 == 0)
1081
500
    {
1082
500
      x->data.f32[0] = 1;
1083
500
      t->data.f32[0] = 3;
1084
500
    } else {
1085
500
      x->data.f32[0] = 2;
1086
500
      t->data.f32[0] = 4;
1087
500
    }
1088
1.00k
    float lr = 0.05;
1089
1.00k
    if (i >= 100)
1090
900
      lr = 0.01;
1091
100
    else if (i >= 500)
1092
0
      lr = 0.001;
1093
1.00k
    ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, lr, 1, 0, 0, 0), 0, 0, 0);
1094
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1095
1.00k
      .requires_grad = 1,
1096
1.00k
    }, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0);
1097
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1098
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1099
1.00k
  }
1100
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1101
1
  x->data.f32[0] = 1;
1102
1
  t->data.f32[0] = 3;
1103
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0);
1104
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 0, 1e-2, "the mean squared error should be 0 at this point");
1105
1
  ccv_nnc_tensor_free(x);
1106
1
  ccv_nnc_tensor_free(t);
1107
1
  ccv_nnc_tensor_free(y);
1108
1
  ccv_cnnp_model_free(final);
1109
1
}
1110
1111
TEST_CASE("model can have multiple outputs and some of them can be used in the computation")
1112
1
{
1113
1
  ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0);
1114
1
  ccv_cnnp_model_t* const linear2 = ccv_cnnp_dense(1, 1, 0);
1115
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1116
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input));
1117
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(linear2, MODEL_IO_LIST(out1));
1118
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 0);
1119
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1120
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1121
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1122
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1123
1
  ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1124
1
  t->data.f32[0] = 2.4;
1125
1
  ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear1, ALL_PARAMETERS, 0), t);
1126
1
  t->data.f32[0] = -1.5;
1127
1
  ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear2, ALL_PARAMETERS, 0), t);
1128
1
  x->data.f32[0] = 10;
1129
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(t, y), 0, 0);
1130
1
  REQUIRE_EQ_WITH_TOLERANCE(t->data.f32[0], 10 * 2.4, 1e-5, "should be equal to expected value");
1131
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], -10 * 2.4 * 1.5, 1e-5, "should be equal to expected value");
1132
1
  ccv_nnc_tensor_free(x);
1133
1
  ccv_nnc_tensor_free(t);
1134
1
  ccv_nnc_tensor_free(y);
1135
1
  ccv_cnnp_model_free(multi_layer);
1136
1
}
1137
1138
TEST_CASE("index select can generate vector embedding")
1139
1
{
1140
1
  ccv_cnnp_model_t* const index_select = ccv_cnnp_index_select(CCV_32F, 10, 8, 0);
1141
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32S, 3);
1142
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, x_params, 0);
1143
1
  ccv_cnnp_model_compile(index_select, TENSOR_PARAM_LIST(x_params), CMD_NOOP(), CMD_NOOP());
1144
1
  x->data.i32[0] = 1;
1145
1
  x->data.i32[1] = 0;
1146
1
  x->data.i32[2] = 5;
1147
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 8), 0);
1148
1
  ccv_cnnp_model_evaluate(index_select, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0);
1149
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 8), 0);
1150
1
  ccv_cnnp_model_parameter_copy(index_select, ccv_cnnp_model_parameters(index_select, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), v);
1151
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 1 * 8, y->data.f32, 8, 1e-5, "index 1st vector");
1152
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 0 * 8, y->data.f32 + 8, 8, 1e-5, "index 0th vector");
1153
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 5 * 8, y->data.f32 + 8 * 2, 8, 1e-5, "index 5th vector");
1154
1
  ccv_nnc_tensor_free(x);
1155
1
  ccv_nnc_tensor_free(y);
1156
1
  ccv_nnc_tensor_free(v);
1157
1
  ccv_cnnp_model_free(index_select);
1158
1
}
1159
1160
static ccv_cnnp_model_t* _resnet_block_new(const int filters, const int expansion, const int strides, const int projection_shortcut)
1161
16
{
1162
16
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
1163
16
  ccv_cnnp_model_io_t shortcut = input;
1164
16
  if (projection_shortcut)
1165
4
  {
1166
4
    ccv_cnnp_model_t* const avgdown = ccv_cnnp_average_pool(DIM_ALLOC(strides, strides), HINT((strides, strides), (0, 0)), 0);
1167
4
    shortcut = ccv_cnnp_model_apply(avgdown, MODEL_IO_LIST(input));
1168
4
    ccv_cnnp_model_t* const conv0 = ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), 1, HINT((1, 1), (0, 0)), 0);
1169
4
    shortcut = ccv_cnnp_model_apply(conv0, MODEL_IO_LIST(shortcut));
1170
4
  }
1171
16
  ccv_cnnp_model_t* const conv1 = ccv_cnnp_sequential_new(MODEL_LIST(
1172
16
    ccv_cnnp_convolution(1, filters, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0),
1173
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1174
16
    ccv_cnnp_relu(0)
1175
16
  ), 0);
1176
16
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(conv1, MODEL_IO_LIST(input));
1177
16
  ccv_cnnp_model_t* const conv2 = ccv_cnnp_sequential_new(MODEL_LIST(
1178
16
    ccv_cnnp_convolution(1, filters, DIM_ALLOC(3, 3), 0, HINT((strides, strides), (1, 1)), 0),
1179
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1180
16
    ccv_cnnp_relu(0)
1181
16
  ), 0);
1182
16
  output = ccv_cnnp_model_apply(conv2, MODEL_IO_LIST(output));
1183
16
  ccv_cnnp_model_t* const conv3 = ccv_cnnp_sequential_new(MODEL_LIST(
1184
16
    ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0),
1185
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 0)
1186
16
  ), 0);
1187
16
  output = ccv_cnnp_model_apply(conv3, MODEL_IO_LIST(output));
1188
16
  ccv_cnnp_model_t* const add = ccv_cnnp_sum(0);
1189
16
  output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output, shortcut));
1190
16
  ccv_cnnp_model_t* const relu = ccv_cnnp_relu(0);
1191
16
  output = ccv_cnnp_model_apply(relu, MODEL_IO_LIST(output));
1192
16
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 0);
1193
16
}
1194
1195
static ccv_cnnp_model_t* _resnet_block_layer_new(const int filters, const int expansion, const int strides, const int blocks)
1196
4
{
1197
4
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
1198
4
  ccv_cnnp_model_t* first_block = _resnet_block_new(filters, expansion, strides, 1);
1199
4
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(first_block, MODEL_IO_LIST(input));
1200
4
  int i;
1201
16
  for (i = 1; i < blocks; 
i++12
)
1202
12
  {
1203
12
    ccv_cnnp_model_t* block = _resnet_block_new(filters, expansion, 1, 0);
1204
12
    output = ccv_cnnp_model_apply(block, MODEL_IO_LIST(output));
1205
12
  }
1206
4
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 0);
1207
4
}
1208
1209
static void _fpn(const int d, const ccv_cnnp_model_io_t* const c, const int c_size, ccv_cnnp_model_io_t* const p)
1210
1
{
1211
1
  int i;
1212
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(c[c_size - 1]));
1213
1
  p[c_size - 1] = output;
1214
4
  for (i = c_size - 2; i >= 0; 
i--3
)
1215
3
  {
1216
3
    const ccv_cnnp_model_io_t lateral = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), 0), MODEL_IO_LIST(c[i]));
1217
3
    const ccv_cnnp_model_io_t up = ccv_cnnp_model_apply(ccv_cnnp_upsample(2, 2, 0), MODEL_IO_LIST(output));
1218
3
    const ccv_cnnp_model_io_t sum = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(lateral, up));
1219
3
    output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(3, 3), 1, HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(sum));
1220
3
    p[i] = output;
1221
3
  }
1222
1
}
1223
1224
ccv_cnnp_model_t* _imagenet_resnet50_v1d_fpn(void)
1225
1
{
1226
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1227
1
  ccv_cnnp_model_t* init_conv = ccv_cnnp_sequential_new(MODEL_LIST(
1228
1
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), 1, HINT((2, 2), (1, 1)), 0),
1229
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1230
1
    ccv_cnnp_relu(0),
1231
1
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), 1, HINT((1, 1), (1, 1)), 0),
1232
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1233
1
    ccv_cnnp_relu(0),
1234
1
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), 1, HINT((1, 1), (1, 1)), 0),
1235
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 0),
1236
1
    ccv_cnnp_relu(0),
1237
1
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (1, 1)), 0)
1238
1
  ), 0);
1239
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(init_conv, MODEL_IO_LIST(input));
1240
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(64, 4, 1, 3), MODEL_IO_LIST(output));
1241
1
  const ccv_cnnp_model_io_t c2 = output;
1242
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(128, 4, 2, 4), MODEL_IO_LIST(output));
1243
1
  const ccv_cnnp_model_io_t c3 = output;
1244
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(256, 4, 2, 6), MODEL_IO_LIST(output));
1245
1
  const ccv_cnnp_model_io_t c4 = output;
1246
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(512, 4, 2, 3), MODEL_IO_LIST(output));
1247
1
  const ccv_cnnp_model_io_t c5 = output;
1248
1
  const ccv_cnnp_model_io_t c[] = { c2, c3, c4, c5 };
1249
1
  ccv_cnnp_model_io_t p[5];
1250
1
  _fpn(256, c, 4, p);
1251
1
  p[4] = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(2, 2), HINT((2, 2), (0, 0)), 0), MODEL_IO_LIST(p[3]));
1252
1
  // 3 aspect ratios (1:2, 1:1, 2:1). Each has 4 + 2 (x, y, w, h, object, non-object), total 18.
1253
1
  ccv_cnnp_model_t* const rpn_proposals = ccv_cnnp_convolution(1, 18, DIM_ALLOC(1, 1), 0, HINT((1, 1), (0, 0)), "rpn");
1254
1
  ccv_cnnp_model_io_t proposals[5];
1255
1
  int i;
1256
6
  for (i = 0; i < 5; 
i++5
)
1257
5
    proposals[i] = ccv_cnnp_model_apply(rpn_proposals, MODEL_IO_LIST(p[i]));
1258
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), proposals, 5, 0);
1259
1
}
1260
1261
TEST_CASE("FPN-RPN use cnnp model with multiple outputs")
1262
1
{
1263
1
  ccv_cnnp_model_t* rpn = _imagenet_resnet50_v1d_fpn();
1264
1
  ccv_nnc_tensor_param_t input_params = GPU_TENSOR_NCHW(000, 32F, 4, 3, 835, 1146);
1265
1
  ccv_cnnp_model_compile(rpn, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1266
1
  ccv_nnc_tensor_param_t output_params[5];
1267
1
  ccv_cnnp_model_tensor_auto(rpn, output_params, 5);
1268
1
  REQUIRE_EQ(output_params[0].dim[2], 209, "should be equal");
1269
1
  REQUIRE_EQ(output_params[0].dim[3], 287, "should be equal");
1270
1
  REQUIRE_EQ(output_params[1].dim[2], 105, "should be equal");
1271
1
  REQUIRE_EQ(output_params[1].dim[3], 144, "should be equal");
1272
1
  REQUIRE_EQ(output_params[2].dim[2], 53, "should be equal");
1273
1
  REQUIRE_EQ(output_params[2].dim[3], 72, "should be equal");
1274
1
  REQUIRE_EQ(output_params[3].dim[2], 27, "should be equal");
1275
1
  REQUIRE_EQ(output_params[3].dim[3], 36, "should be equal");
1276
1
  REQUIRE_EQ(output_params[4].dim[2], 13, "should be equal");
1277
1
  REQUIRE_EQ(output_params[4].dim[3], 18, "should be equal");
1278
1
  ccv_cnnp_model_free(rpn);
1279
1
}
1280
1281
#include "case_main.h"