Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/cnnp.core.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
static ccv_cnnp_model_t* simple_cifar_10(void)
15
2
{
16
2
  return ccv_cnnp_sequential_new(MODEL_LIST(
17
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0),
18
2
    ccv_cnnp_relu(0),
19
2
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
20
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0),
21
2
    ccv_cnnp_relu(0),
22
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
23
2
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0),
24
2
    ccv_cnnp_relu(0),
25
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
26
2
    ccv_cnnp_flatten(0),
27
2
    ccv_cnnp_dense(256, 0, 0, 1, 0),
28
2
    ccv_cnnp_relu(0),
29
2
    ccv_cnnp_dense(10, 0, 0, 1, 0),
30
2
    ccv_cnnp_softmax(0)
31
2
  ), 1, 0);
32
2
}
33
34
TEST_CASE("compile simple cifar-10 model")
35
1
{
36
1
  ccv_cnnp_model_t* const sequential0 = simple_cifar_10();
37
1
  ccv_cnnp_model_t* const sequential = ccv_cnnp_model_copy(sequential0, 1);
38
1
  ccv_cnnp_model_free(sequential0);
39
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
40
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
41
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
42
1
  dsfmt_t dsfmt;
43
1
  int i;
44
1
  dsfmt_init_gen_rand(&dsfmt, 1);
45
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
46
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
47
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
48
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
49
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
50
1
    .is_test = 1
51
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
52
1
  int t = 0;
53
1
  float max = output_tensor->data.f32[0];
54
10
  for (i = 1; i < 10; 
i++9
)
55
9
    if (output_tensor->data.f32[i] > max)
56
2
      max = output_tensor->data.f32[i], t = i;
57
1
  const int target = (t + 1) % 10;
58
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
59
  // Doing training.
60
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
61
1
  fit_tensor->data.f32[0] = target;
62
101
  for (i = 0; i < 100; 
i++100
)
63
100
    ccv_cnnp_model_fit(sequential, TENSOR_LIST(input_tensor), TENSOR_LIST(fit_tensor), TENSOR_LIST(output_tensor), 0, 0);
64
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
65
  // After training, it should fit.
66
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
67
1
    .is_test = 1
68
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
69
1
  t = 0;
70
1
  max = output_tensor->data.f32[0];
71
10
  for (i = 1; i < 10; 
i++9
)
72
9
    if (output_tensor->data.f32[i] > max)
73
5
      max = output_tensor->data.f32[i], t = i;
74
1
  REQUIRE_EQ(target, t, "should fit");
75
1
  remove("/tmp/compile_simple_cifar_10_model.checkpoint");
76
1
  ccv_cnnp_model_write_to_file(sequential, "/tmp/compile_simple_cifar_10_model.checkpoint", 0);
77
1
  CNNP_MODEL_GEN(sequential, CCV_NNC_LONG_DOT_GRAPH);
78
1
  ccv_cnnp_model_free(sequential);
79
1
  ccv_cnnp_model_t* const sequential2 = simple_cifar_10();
80
1
  ccv_cnnp_model_compile(sequential2, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
81
  // Load from the checkpoint file.
82
1
  ccv_cnnp_model_read_from_file("/tmp/compile_simple_cifar_10_model.checkpoint", 0, sequential2);
83
1
  remove("/tmp/compile_simple_cifar_10_model.checkpoint");
84
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
85
1
  ccv_cnnp_model_evaluate(sequential2, (ccv_cnnp_evaluate_param_t){
86
1
    .is_test = 1
87
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
88
1
  t = 0;
89
1
  max = output_tensor->data.f32[0];
90
10
  for (i = 1; i < 10; 
i++9
)
91
9
    if (output_tensor->data.f32[i] > max)
92
5
      max = output_tensor->data.f32[i], t = i;
93
1
  REQUIRE_EQ(target, t, "should fit");
94
1
  ccv_cnnp_model_free(sequential2);
95
1
  ccv_nnc_tensor_free(input_tensor);
96
1
  ccv_nnc_tensor_free(fit_tensor);
97
1
  ccv_nnc_tensor_free(output_tensor);
98
1
}
99
100
static int _ccv_cnnp_model_notified = 0;
101
102
static void _ccv_cnnp_model_hook(const ccv_cnnp_model_t* const model, const int tag, void* const payload, void* const context)
103
3
{
104
3
  if (payload)
105
3
    ++_ccv_cnnp_model_notified;
106
3
}
107
108
TEST_CASE("inception layer for model")
109
1
{
110
1
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
111
1
  _ccv_cnnp_model_notified = 0;
112
1
  ccv_cnnp_model_t* const conv_1 = ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0);
113
1
  ccv_cnnp_model_notify_hook(conv_1, _ccv_cnnp_model_hook, 0);
114
1
  ccv_cnnp_model_io_t tower_1 = ccv_cnnp_model_apply(conv_1, MODEL_IO_LIST(x));
115
1
  ccv_cnnp_model_t* const relu_1 = ccv_cnnp_relu(0);
116
1
  ccv_cnnp_model_notify_hook(relu_1, _ccv_cnnp_model_hook, 0);
117
1
  tower_1 = ccv_cnnp_model_apply(relu_1, MODEL_IO_LIST(tower_1));
118
1
  tower_1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, 1, 0), MODEL_IO_LIST(tower_1));
119
1
  tower_1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_1));
120
121
1
  ccv_cnnp_model_io_t tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(x));
122
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2));
123
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), MODEL_IO_LIST(tower_2));
124
1
  tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2));
125
126
1
  ccv_cnnp_model_io_t tower_3 = ccv_cnnp_model_apply(ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(x));
127
1
  tower_3 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(tower_3));
128
1
  tower_3 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_3));
129
1
  ccv_cnnp_model_t* const add_1 = ccv_cnnp_sum(0);
130
1
  ccv_cnnp_model_notify_hook(add_1, _ccv_cnnp_model_hook, 0);
131
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(add_1, MODEL_IO_LIST(tower_1, tower_2, tower_3));
132
1
  REQUIRE_EQ(0, _ccv_cnnp_model_notified, "haven't notified");
133
1
  ccv_cnnp_model_t* const inception0 = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(output), 1, 0);
134
1
  ccv_cnnp_model_notify(inception0, 0, inception0);
135
1
  ccv_cnnp_model_t* const inception = ccv_cnnp_model_copy(inception0, 1);
136
1
  REQUIRE_EQ(3, _ccv_cnnp_model_notified, "3 models changed owner");
137
1
  ccv_cnnp_model_free(inception0);
138
1
  const ccv_nnc_tensor_param_t input = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
139
1
  ccv_cnnp_model_compile(inception, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
140
1
  CNNP_MODEL_GEN(inception, CCV_NNC_LONG_DOT_GRAPH);
141
1
  ccv_cnnp_model_free(inception);
142
1
}
143
144
static ccv_cnnp_model_t* _ccv_multiple_outputs_functional_model(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
145
1
{
146
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
147
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
148
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(input0));
149
1
  output0 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output0));
150
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, 1, 0), MODEL_IO_LIST(input1));
151
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1));
152
1
  ccv_cnnp_model_t* model0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(output0, output1), 1, 0);
153
1
  input0 = ccv_cnnp_input();
154
1
  input1 = ccv_cnnp_input();
155
1
  output0 = ccv_cnnp_model_apply(model0, MODEL_IO_LIST(input0, input1));
156
1
  ccv_cnnp_model_io_t input2 = ccv_cnnp_input();
157
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), MODEL_IO_LIST(input2));
158
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1));
159
1
  ccv_cnnp_model_t* interim = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0, output1), 1, 0);
160
1
  input0 = ccv_cnnp_input();
161
1
  input1 = ccv_cnnp_input();
162
1
  input2 = ccv_cnnp_input();
163
1
  output0 = ccv_cnnp_model_apply(interim, MODEL_IO_LIST(input0, input1, input2));
164
1
  output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0));
165
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0), 1, 0);
166
1
}
167
168
TEST_CASE("functional model's IO can represent multiple outputs")
169
1
{
170
1
  ccv_cnnp_model_t* const final = ccv_cnnp_dynamic_new(_ccv_multiple_outputs_functional_model, 0, 0);
171
1
  const ccv_nnc_tensor_param_t a0 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
172
1
  const ccv_nnc_tensor_param_t a1 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
173
1
  const ccv_nnc_tensor_param_t a2 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256);
174
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
175
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
176
1
  ccv_cnnp_model_free(final);
177
1
}
178
179
TEST_CASE("functional model's IO outputs can be non-terminal")
180
1
{
181
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
182
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
183
1
  ccv_cnnp_model_io_t input2 = ccv_cnnp_input();
184
1
  ccv_cnnp_model_io_t input3 = ccv_cnnp_input();
185
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(input0, input1));
186
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_mul(1, 0), MODEL_IO_LIST(output0, input2));
187
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output1, input3));
188
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2, input3), MODEL_IO_LIST(output0, output1), 1, 0);
189
1
  const ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
190
1
  const ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1);
191
1
  const ccv_nnc_tensor_param_t a2 = CPU_TENSOR_NCHW(32F, 1);
192
1
  const ccv_nnc_tensor_param_t a3 = CPU_TENSOR_NCHW(32F, 1);
193
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2, a3), CMD_NOOP(), CMD_NOOP());
194
1
  ccv_nnc_tensor_t* const a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
195
1
  ccv_nnc_tensor_t* const a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
196
1
  ccv_nnc_tensor_t* const a2_tensor = ccv_nnc_tensor_new(0, a2, 0);
197
1
  ccv_nnc_tensor_t* const a3_tensor = ccv_nnc_tensor_new(0, a3, 0);
198
1
  ccv_nnc_tensor_t* const b0_tensor = ccv_nnc_tensor_new(0, a0, 0);
199
1
  ccv_nnc_tensor_t* const b1_tensor = ccv_nnc_tensor_new(0, a0, 0);
200
1
  a0_tensor->data.f32[0] = 0.5;
201
1
  a1_tensor->data.f32[0] = 0.75;
202
1
  a2_tensor->data.f32[0] = 1.75;
203
1
  a3_tensor->data.f32[0] = 2.5;
204
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
205
1
    .is_test = 1
206
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, a2_tensor, a3_tensor), TENSOR_LIST(b0_tensor, b1_tensor), 0, 0);
207
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
208
1
  REQUIRE_EQ_WITH_TOLERANCE(b0_tensor->data.f32[0], 0.5 + 0.75, 1e-5, "should match the intermediate result");
209
1
  REQUIRE_EQ_WITH_TOLERANCE(b1_tensor->data.f32[0], (0.5 + 0.75) * 1.75 + 2.5, 1e-5, "should match the final result");
210
1
  ccv_cnnp_model_free(final);
211
1
  ccv_nnc_tensor_free(a0_tensor);
212
1
  ccv_nnc_tensor_free(a1_tensor);
213
1
  ccv_nnc_tensor_free(a2_tensor);
214
1
  ccv_nnc_tensor_free(a3_tensor);
215
1
  ccv_nnc_tensor_free(b0_tensor);
216
1
  ccv_nnc_tensor_free(b1_tensor);
217
1
}
218
219
TEST_CASE("functional model's IO can introduce non-functional dependencies")
220
1
{
221
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
222
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
223
1
  ccv_cnnp_model_io_t input2 = ccv_cnnp_input();
224
1
  ccv_cnnp_model_io_t input3 = ccv_cnnp_input();
225
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(input0, input1));
226
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_mul(1, 0), MODEL_IO_LIST(input2, input3));
227
  // non-functional dependency.
228
1
  ccv_cnnp_model_add_dependencies(output1, MODEL_IO_LIST(output0));
229
1
  output1 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0, output1));
230
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2, input3), MODEL_IO_LIST(output0, output1), 1, 0);
231
1
  const ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
232
1
  const ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1);
233
1
  const ccv_nnc_tensor_param_t a2 = CPU_TENSOR_NCHW(32F, 1);
234
1
  const ccv_nnc_tensor_param_t a3 = CPU_TENSOR_NCHW(32F, 1);
235
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2, a3), CMD_NOOP(), CMD_NOOP());
236
1
  ccv_nnc_tensor_t* const a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
237
1
  ccv_nnc_tensor_t* const a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
238
1
  ccv_nnc_tensor_t* const a2_tensor = ccv_nnc_tensor_new(0, a2, 0);
239
1
  ccv_nnc_tensor_t* const a3_tensor = ccv_nnc_tensor_new(0, a3, 0);
240
1
  ccv_nnc_tensor_t* const b0_tensor = ccv_nnc_tensor_new(0, a0, 0);
241
1
  ccv_nnc_tensor_t* const b1_tensor = ccv_nnc_tensor_new(0, a0, 0);
242
1
  a0_tensor->data.f32[0] = 0.5;
243
1
  a1_tensor->data.f32[0] = 0.75;
244
1
  a2_tensor->data.f32[0] = 1.75;
245
1
  a3_tensor->data.f32[0] = 2.5;
246
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
247
1
    .is_test = 1
248
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, a2_tensor, a3_tensor), TENSOR_LIST(b0_tensor, b1_tensor), 0, 0);
249
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
250
1
  REQUIRE_EQ_WITH_TOLERANCE(b0_tensor->data.f32[0], 0.5 + 0.75, 1e-5, "should match the intermediate result");
251
1
  REQUIRE_EQ_WITH_TOLERANCE(b1_tensor->data.f32[0], (0.5 + 0.75) + (1.75 * 2.5), 1e-5, "should match the final result");
252
1
  ccv_cnnp_model_free(final);
253
1
  ccv_nnc_tensor_free(a0_tensor);
254
1
  ccv_nnc_tensor_free(a1_tensor);
255
1
  ccv_nnc_tensor_free(a2_tensor);
256
1
  ccv_nnc_tensor_free(a3_tensor);
257
1
  ccv_nnc_tensor_free(b0_tensor);
258
1
  ccv_nnc_tensor_free(b1_tensor);
259
1
}
260
261
TEST_CASE("make sure reuse model enables share weights")
262
1
{
263
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
264
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
265
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0, 1, 0);
266
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
267
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
268
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0, output1));
269
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(final_output), 1, 0);
270
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
271
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
272
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD());
273
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
274
1
  ccv_cnnp_model_free(final);
275
1
}
276
277
TEST_CASE("train model with share weights and L2 loss")
278
1
{
279
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
280
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
281
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0, 1, 0);
282
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
283
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
284
1
  ccv_cnnp_model_io_t fit0 = ccv_cnnp_input();
285
1
  ccv_cnnp_model_io_t fit1 = ccv_cnnp_input();
286
  // Because we don't have L2 loss function available yet, manually create L2 loss.
287
1
  ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply(
288
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
289
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
290
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
291
1
    MODEL_IO_LIST(output0, fit0));
292
1
  ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply(
293
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
294
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
295
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
296
1
    MODEL_IO_LIST(diff0, diff0));
297
1
  ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply(
298
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
299
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
300
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
301
1
    MODEL_IO_LIST(output1, fit1));
302
1
  ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply(
303
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
304
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
305
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
306
1
    MODEL_IO_LIST(diff1, diff1));
307
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1));
308
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 1, 0);
309
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
310
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
311
1
  ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1);
312
1
  ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1);
313
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
314
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
315
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
316
1
  ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
317
1
  ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0);
318
1
  ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0);
319
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
320
1
  a0_tensor->data.f32[0] = 1;
321
1
  a1_tensor->data.f32[0] = 3;
322
1
  b0_tensor->data.f32[0] = 2;
323
1
  b1_tensor->data.f32[0] = 3;
324
1
  int i;
325
11
  for (i = 0; i < 10; 
i++10
)
326
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
327
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
328
101
  for (i = 0; i < 100; 
i++100
)
329
100
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
330
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
331
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
332
1.00k
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
333
1
  a0_tensor->data.f32[0] = 2;
334
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
335
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
336
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
337
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
338
1
    .is_test = 1
339
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
340
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this.");
341
1
  ccv_nnc_tensor_free(a0_tensor);
342
1
  ccv_nnc_tensor_free(a1_tensor);
343
1
  ccv_nnc_tensor_free(b0_tensor);
344
1
  ccv_nnc_tensor_free(b1_tensor);
345
1
  ccv_nnc_tensor_free(o0_tensor);
346
1
  ccv_cnnp_model_free(final);
347
1
}
348
349
static ccv_cnnp_model_t* simple_cifar_10_no_softmax(void)
350
2
{
351
2
  return ccv_cnnp_sequential_new(MODEL_LIST(
352
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0),
353
2
    ccv_cnnp_relu(0),
354
2
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
355
2
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0),
356
2
    ccv_cnnp_relu(0),
357
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
358
2
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0),
359
2
    ccv_cnnp_relu(0),
360
2
    ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0),
361
2
    ccv_cnnp_flatten(0),
362
2
    ccv_cnnp_dense(256, 0, 0, 1, 0),
363
2
    ccv_cnnp_relu(0),
364
2
    ccv_cnnp_dense(10, 0, 0, 1, 0)
365
2
  ), 1, 0);
366
2
}
367
368
TEST_CASE("evaluate cifar-10 model in multi-stage mode")
369
1
{
370
1
  ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax();
371
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
372
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), CMD_NOOP());
373
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
374
1
  dsfmt_t dsfmt;
375
1
  int i;
376
1
  dsfmt_init_gen_rand(&dsfmt, 1);
377
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
378
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
379
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
380
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
381
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
382
1
    .is_test = 1
383
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
384
1
  int t = 0;
385
1
  float max = output_tensor->data.f32[0];
386
10
  for (i = 1; i < 10; 
i++9
)
387
9
    if (output_tensor->data.f32[i] > max)
388
3
      max = output_tensor->data.f32[i], t = i;
389
1
  const int target = (t + 1) % 10;
390
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
391
  // Doing training.
392
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
393
1
  fit_tensor->data.f32[0] = target;
394
1
  ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
395
1
  ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
396
1
  ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
397
101
  for (i = 0; i < 100; 
i++100
)
398
100
  {
399
100
    ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
400
100
      .requires_grad = 1
401
100
    }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
402
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0);
403
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0);
404
100
    ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
405
100
    ccv_cnnp_model_apply_gradients(sequential, 0);
406
100
  }
407
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
408
  // After training, it should fit.
409
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
410
1
    .is_test = 1
411
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
412
1
  t = 0;
413
1
  max = output_tensor->data.f32[0];
414
10
  for (i = 1; i < 10; 
i++9
)
415
9
    if (output_tensor->data.f32[i] > max)
416
4
      max = output_tensor->data.f32[i], t = i;
417
1
  REQUIRE_EQ(target, t, "should fit");
418
1
  ccv_nnc_tensor_free(ingrad_tensor);
419
1
  ccv_nnc_tensor_free(fit_tensor);
420
1
  ccv_nnc_tensor_free(softmax_tensor);
421
1
  ccv_nnc_tensor_free(loss_tensor);
422
1
  ccv_nnc_tensor_free(input_tensor);
423
1
  ccv_nnc_tensor_free(output_tensor);
424
1
  ccv_cnnp_model_free(sequential);
425
1
}
426
427
TEST_CASE("evaluate cifar-10 model in multi-stage mode with gradient accumulated")
428
1
{
429
1
  ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax();
430
1
  const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3);
431
1
  ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.00033, 1, 0.99, 0.9, 0.9), CMD_NOOP());
432
1
  ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0);
433
1
  dsfmt_t dsfmt;
434
1
  int i;
435
1
  dsfmt_init_gen_rand(&dsfmt, 1);
436
2.88k
  for (i = 0; i < 31 * 31 * 3; 
i++2.88k
)
437
2.88k
    input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
438
1
  ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
439
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
440
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
441
1
    .is_test = 1
442
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
443
1
  int t = 0;
444
1
  float max = output_tensor->data.f32[0];
445
10
  for (i = 1; i < 10; 
i++9
)
446
9
    if (output_tensor->data.f32[i] > max)
447
4
      max = output_tensor->data.f32[i], t = i;
448
1
  const int target = (t + 1) % 10;
449
1
  REQUIRE_NOT_EQ(target, t, "should not fit");
450
  // Doing training.
451
1
  ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
452
1
  fit_tensor->data.f32[0] = target;
453
1
  ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
454
1
  ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
455
1
  ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0);
456
101
  for (i = 0; i < 100; 
i++100
)
457
100
  {
458
100
    ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
459
100
      .requires_grad = 1
460
100
    }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
461
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0);
462
100
    ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0);
463
100
    ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
464
    // Backward again to accumulate gradient.
465
100
    if (i % 2 == 0)
466
50
    {
467
50
      ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
468
      // Backward again to accumulate gradient.
469
50
      if (i % 3 == 0)
470
17
        ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0);
471
50
    }
472
100
    ccv_cnnp_model_apply_gradients(sequential, 0);
473
100
  }
474
1
  memset(output_tensor->data.f32, 0, sizeof(float) * 10);
475
  // After training, it should fit.
476
1
  ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){
477
1
    .is_test = 1
478
1
  }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0);
479
1
  t = 0;
480
1
  max = output_tensor->data.f32[0];
481
10
  for (i = 1; i < 10; 
i++9
)
482
9
    if (output_tensor->data.f32[i] > max)
483
4
      max = output_tensor->data.f32[i], t = i;
484
1
  REQUIRE_EQ(target, t, "should fit");
485
1
  ccv_nnc_tensor_free(ingrad_tensor);
486
1
  ccv_nnc_tensor_free(fit_tensor);
487
1
  ccv_nnc_tensor_free(softmax_tensor);
488
1
  ccv_nnc_tensor_free(loss_tensor);
489
1
  ccv_nnc_tensor_free(input_tensor);
490
1
  ccv_nnc_tensor_free(output_tensor);
491
1
  ccv_cnnp_model_free(sequential);
492
1
}
493
494
TEST_CASE("train model with share weights and L2 loss and check out gradients")
495
1
{
496
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
497
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
498
1
  ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0, 1, 0);
499
1
  ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0));
500
1
  ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1));
501
1
  ccv_cnnp_model_io_t fit0 = ccv_cnnp_input();
502
1
  ccv_cnnp_model_io_t fit1 = ccv_cnnp_input();
503
  // Because we don't have L2 loss function available yet, manually create L2 loss.
504
1
  ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply(
505
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
506
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
507
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
508
1
    MODEL_IO_LIST(output0, fit0));
509
1
  ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply(
510
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
511
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
512
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
513
1
    MODEL_IO_LIST(diff0, diff0));
514
1
  ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply(
515
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
516
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
517
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
518
1
    MODEL_IO_LIST(output1, fit1));
519
1
  ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply(
520
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
521
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
522
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
523
1
    MODEL_IO_LIST(diff1, diff1));
524
1
  ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1));
525
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 1, 0);
526
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1);
527
1
  ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1);
528
1
  ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1);
529
1
  ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1);
530
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
531
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
532
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
533
1
  ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0);
534
1
  ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0);
535
1
  ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0);
536
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
537
  // It should fit to 1*0.5+1.5=2, 3*0.5+1.5=3
538
1
  a0_tensor->data.f32[0] = 1;
539
1
  a1_tensor->data.f32[0] = 3;
540
1
  b0_tensor->data.f32[0] = 2;
541
1
  b1_tensor->data.f32[0] = 3;
542
1
  int i;
543
11
  for (i = 0; i < 10; 
i++10
)
544
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
545
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
546
101
  for (i = 0; i < 100; 
i++100
)
547
100
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
548
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
549
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
550
1.00k
    ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0);
551
1
  a0_tensor->data.f32[0] = 2;
552
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
553
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
554
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
555
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
556
1
    .is_test = 1
557
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
558
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this.");
559
  // Figure out the actual weight and bias term in the model.
560
1
  a0_tensor->data.f32[0] = 0;
561
1
  a1_tensor->data.f32[0] = 0;
562
1
  b0_tensor->data.f32[0] = 0;
563
1
  b1_tensor->data.f32[0] = 0;
564
  // The output will be 2*bias^2
565
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
566
1
    .is_test = 1
567
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
568
1
  const float bias = sqrtf(o0_tensor->data.f32[0] * 0.5);
569
1
  a0_tensor->data.f32[0] = 1;
570
1
  a1_tensor->data.f32[0] = 1;
571
1
  b0_tensor->data.f32[0] = 0;
572
1
  b1_tensor->data.f32[0] = 0;
573
  // The output will be 2*(w+bias)^2
574
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
575
1
    .is_test = 1
576
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
577
1
  const float w = sqrt(o0_tensor->data.f32[0] * 0.5) - bias;
578
  // Compute the out gradient to verify.
579
1
  a0_tensor->data.f32[0] = 2;
580
1
  a1_tensor->data.f32[0] = 2; // The final result should be 4.
581
1
  b0_tensor->data.f32[0] = 2; // diff is 0.5
582
1
  b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5.
583
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
584
1
    .requires_grad = 1,
585
1
  }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0);
586
  // Note that I have to use new tensors and have to keep these tensors around since they were binded to the model when evaluate.
587
1
  ccv_nnc_tensor_t* da0_tensor = ccv_nnc_tensor_new(0, a0, 0);
588
1
  ccv_nnc_tensor_t* da1_tensor = ccv_nnc_tensor_new(0, a1, 0);
589
1
  ccv_nnc_tensor_t* db0_tensor = ccv_nnc_tensor_new(0, b0, 0);
590
1
  ccv_nnc_tensor_t* db1_tensor = ccv_nnc_tensor_new(0, b1, 0);
591
1
  ccv_nnc_tensor_t* do0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
592
1
  do0_tensor->data.f32[0] = 1;
593
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(do0_tensor), TENSOR_LIST(da0_tensor, da1_tensor, db0_tensor, db1_tensor), 0, 0);
594
1
  REQUIRE_EQ_WITH_TOLERANCE(da0_tensor->data.f32[0], 2 * w * (w * 2 + bias - 2), 1e-5, "da0=2*w*(w*a0+bias-b0), thus, 0.5");
595
1
  REQUIRE_EQ_WITH_TOLERANCE(da1_tensor->data.f32[0], 2 * w * (w * 2 + bias - 3), 1e-5, "da1=2*w*(w*a1+bias-b1), thus, -0.5");
596
1
  REQUIRE_EQ_WITH_TOLERANCE(db0_tensor->data.f32[0], -2 * (w * 2 + bias - 2), 1e-5, "db0=-2*(w*a0+bias-b0), thus, -1");
597
1
  REQUIRE_EQ_WITH_TOLERANCE(db1_tensor->data.f32[0], -2 * (w * 2 + bias - 3), 1e-5, "db1=-2*(w*a1+bias-b1), thus, 1");
598
1
  ccv_nnc_tensor_free(a0_tensor);
599
1
  ccv_nnc_tensor_free(a1_tensor);
600
1
  ccv_nnc_tensor_free(b0_tensor);
601
1
  ccv_nnc_tensor_free(b1_tensor);
602
1
  ccv_nnc_tensor_free(o0_tensor);
603
1
  ccv_nnc_tensor_free(da0_tensor);
604
1
  ccv_nnc_tensor_free(da1_tensor);
605
1
  ccv_nnc_tensor_free(db0_tensor);
606
1
  ccv_nnc_tensor_free(db1_tensor);
607
1
  ccv_nnc_tensor_free(do0_tensor);
608
1
  ccv_cnnp_model_free(final);
609
1
}
610
611
TEST_CASE("apply functional model as forward pass")
612
1
{
613
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
614
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
615
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))),
616
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul");
617
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
618
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(input));
619
1
  output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(output));
620
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
621
1
  b->data.f32[0] = -1;
622
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
623
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
624
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
625
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add");
626
1
  output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output));
627
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 1, "final");
628
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
629
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
630
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
631
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
632
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0);
633
1
  a0_tensor->data.f32[0] = 1.12;
634
1
  o0_tensor->data.f32[0] = 0;
635
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
636
1
    .is_test = 1
637
1
  }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0);
638
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1");
639
1
  ccv_nnc_tensor_free(a0_tensor);
640
1
  ccv_nnc_tensor_free(b);
641
1
  ccv_nnc_tensor_free(o0_tensor);
642
1
  ccv_cnnp_model_free(final);
643
1
}
644
645
TEST_CASE("apply sequential model as forward pass")
646
1
{
647
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
648
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
649
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))),
650
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul");
651
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
652
1
  b->data.f32[0] = -1;
653
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
654
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
655
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
656
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add");
657
1
  ccv_cnnp_model_t* const final = ccv_cnnp_sequential_new(MODEL_LIST(mul, mul, add), 1, "seq");
658
1
  ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1);
659
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
660
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
661
1
  ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0);
662
1
  ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0);
663
1
  a0_tensor->data.f32[0] = 1.12;
664
1
  o0_tensor->data.f32[0] = 0;
665
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
666
1
    .is_test = 1
667
1
  }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0);
668
1
  REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1");
669
1
  ccv_nnc_tensor_free(a0_tensor);
670
1
  ccv_nnc_tensor_free(b);
671
1
  ccv_nnc_tensor_free(o0_tensor);
672
1
  ccv_cnnp_model_free(final);
673
1
}
674
675
ccv_cnnp_model_t* _math_2_x_1_1_10(const ccv_nnc_tensor_t* const b)
676
6
{
677
6
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
678
6
    MODEL_CMD_EXEC_IO_MAP(
679
6
      KV(CCV_CNNP_IO),
680
6
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_set_by(CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1))),
681
6
    ),
682
6
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul");
683
6
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
684
6
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
685
6
      KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))),
686
6
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add");
687
6
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add, add), 1, "seq");
688
6
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
689
6
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
690
6
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
691
  // Because we don't have L2 loss function available yet, manually create L2 loss.
692
6
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
693
6
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
694
6
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
695
6
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
696
6
    MODEL_IO_LIST(left_out, fit));
697
6
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
698
6
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
699
6
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
700
6
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
701
6
    MODEL_IO_LIST(diff, diff));
702
6
  return ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0);
703
6
}
704
705
TEST_CASE("learn simple math of 2 * x + 1 + 1 = 10, x = 4")
706
1
{
707
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
708
1
  b->data.f32[0] = 1;
709
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
710
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
711
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
712
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
713
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
714
1
  ccv_nnc_tensor_param_t o = {};
715
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
716
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
717
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
718
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
719
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
720
1
  ccv_nnc_tensor_t* outgrad0 = ccv_nnc_tensor_new(0, a, 0);
721
1
  ccv_nnc_tensor_t* outgrad1 = ccv_nnc_tensor_new(0, f, 0);
722
1
  ingrad->data.f32[0] = 1;
723
1
  a_tensor->data.f32[0] = 2;
724
1
  f_tensor->data.f32[0] = 10;
725
1
  int i;
726
1
  float old_o = 10;
727
11
  for (i = 0; i < 10; 
i++10
)
728
10
  {
729
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
730
10
      .requires_grad = 1,
731
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
732
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
733
10
    ccv_cnnp_model_apply_gradients(final, 0);
734
10
  }
735
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be different");
736
1
  old_o = o_tensor->data.f32[0];
737
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0, 0, 0), 0, 0, 0); // No decay.
738
1
  ingrad->data.f32[0] = 0; // ingrad is 0, no update at all.
739
11
  for (i = 0; i < 10; 
i++10
)
740
10
  {
741
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
742
10
      .requires_grad = 1,
743
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
744
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(outgrad0, outgrad1), 0, 0);
745
10
    ccv_cnnp_model_apply_gradients(final, 0);
746
10
  }
747
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad");
748
1
  old_o = o_tensor->data.f32[0];
749
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0);
750
101
  for (i = 0; i < 100; 
i++100
)
751
100
  {
752
100
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
753
100
      .requires_grad = 1,
754
100
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
755
100
    ccv_cnnp_model_backward(final, TENSOR_LIST(0), TENSOR_LIST(outgrad0, outgrad1), 0, 0);
756
100
    ccv_cnnp_model_apply_gradients(final, 0);
757
100
  }
758
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 100 iterations, output should be different");
759
1
  old_o = o_tensor->data.f32[0];
760
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0, 0, 0), 0, 0, 0); // No decay.
761
  // Note we still use the old ingrad which is 0.
762
11
  for (i = 0; i < 10; 
i++10
)
763
10
  {
764
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
765
10
      .requires_grad = 1,
766
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
767
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0);
768
10
    ccv_cnnp_model_apply_gradients(final, 0);
769
10
  }
770
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad");
771
1
  ingrad->data.f32[0] = 1; // ingrad reset to 1.
772
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0);
773
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
774
1.00k
  {
775
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
776
1.00k
      .requires_grad = 1,
777
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
778
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0);
779
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
780
1.00k
  }
781
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 1000 iterations, output should be different");
782
1
  o_tensor->data.f32[0] = 10;
783
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
784
1
    .is_test = 1,
785
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
786
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "(2 * x + 1 + 1 - 10) ^ 2 should equal to 0");
787
1
  ccv_nnc_tensor_free(a_tensor);
788
1
  ccv_nnc_tensor_free(b);
789
1
  ccv_nnc_tensor_free(f_tensor);
790
1
  ccv_nnc_tensor_free(o_tensor);
791
1
  ccv_nnc_tensor_free(ingrad);
792
1
  ccv_nnc_tensor_free(outgrad0);
793
1
  ccv_nnc_tensor_free(outgrad1);
794
1
  ccv_cnnp_model_free(final);
795
1
}
796
797
static int _ccv_cnnp_model_clip_grad_norm_reduce_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
798
2
{
799
2
  ccv_nnc_tensor_t* const old_norm2 = outputs[1];
800
2
  ccv_nnc_tensor_t* const norm2 = outputs[2];
801
2
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(), hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(norm2), stream_context);
802
2
  ccv_nnc_cmd_exec(CMD_ADD_FORWARD(1, 1), hint, flags, TENSOR_LIST(old_norm2, norm2), TENSOR_LIST(old_norm2), stream_context);
803
2
  return CCV_NNC_EXEC_SUCCESS;
804
2
}
805
806
static ccv_nnc_cmd_vtab_t clip_grad_norm_reduce_norm2_vtab = {
807
  .exec = _ccv_cnnp_model_clip_grad_norm_reduce_norm2
808
};
809
810
TEST_CASE("learn simple math of 2 * x + 1 + 1 = 10, x = 4 and clip grad to max_norm = 0.5")
811
1
{
812
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
813
1
  b->data.f32[0] = 1;
814
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
815
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
816
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
817
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
818
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
819
1
  ccv_nnc_tensor_param_t o = {};
820
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
821
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
822
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
823
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
824
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
825
1
  ccv_nnc_tensor_t* outgrad0 = ccv_nnc_tensor_new(0, a, 0);
826
1
  ccv_nnc_tensor_t* outgrad1 = ccv_nnc_tensor_new(0, f, 0);
827
1
  ingrad->data.f32[0] = 1;
828
1
  a_tensor->data.f32[0] = 2;
829
1
  f_tensor->data.f32[0] = 10;
830
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
831
1
    .requires_grad = 1,
832
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
833
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
834
1
  ccv_cnnp_model_parameters_clip_grad_norm(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), 2, 0.5, 0);
835
1
  ccv_nnc_tensor_t* old_norm2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
836
1
  ccv_nnc_tensor_t* norm2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
837
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(old_norm2), 0);
838
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(norm2), 0);
839
1
  ccv_cnnp_model_apply_gradients(final, 0);
840
1
  ccv_nnc_cmd_t reduce_cmd = {
841
1
    .cmd = CCV_NNC_CUSTOM_FORWARD,
842
1
    .isa = &clip_grad_norm_reduce_norm2_vtab,
843
1
  };
844
1
  ccv_cnnp_model_parameter_gradients_map(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(old_norm2, norm2), 0);
845
1
  REQUIRE(norm2->data.f32[0] < 0.5 + 1e-5, "norm2 should be smaller than max_norm");
846
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
847
1
    .requires_grad = 1,
848
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
849
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
850
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
851
1
    .requires_grad = 1,
852
1
  }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
853
1
  ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
854
1
  ccv_cnnp_model_parameters_clip_grad_norm(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), 2, 0.5, 0);
855
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(old_norm2), 0);
856
1
  ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(norm2), 0);
857
1
  ccv_cnnp_model_parameter_gradients_map(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(old_norm2, norm2), 0);
858
1
  REQUIRE(norm2->data.f32[0] < 0.5 + 1e-5, "norm2 should be smaller than max_norm");
859
1
  ccv_cnnp_model_apply_gradients(final, 0);
860
1
  ccv_nnc_tensor_free(a_tensor);
861
1
  ccv_nnc_tensor_free(b);
862
1
  ccv_nnc_tensor_free(f_tensor);
863
1
  ccv_nnc_tensor_free(o_tensor);
864
1
  ccv_nnc_tensor_free(ingrad);
865
1
  ccv_nnc_tensor_free(outgrad0);
866
1
  ccv_nnc_tensor_free(outgrad1);
867
1
  ccv_cnnp_model_free(final);
868
1
  ccv_nnc_tensor_free(old_norm2);
869
1
  ccv_nnc_tensor_free(norm2);
870
1
}
871
872
TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and copy parameter to a new model entirely")
873
1
{
874
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
875
1
  b->data.f32[0] = 1;
876
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
877
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
878
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
879
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
880
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
881
1
  ccv_nnc_tensor_param_t o = {};
882
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
883
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
884
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
885
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
886
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
887
1
  ingrad->data.f32[0] = 1;
888
1
  a_tensor->data.f32[0] = 2;
889
1
  f_tensor->data.f32[0] = 10;
890
1
  int i;
891
11
  for (i = 0; i < 10; 
i++10
)
892
10
  {
893
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
894
10
      .requires_grad = 1,
895
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
896
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
897
10
    ccv_cnnp_model_apply_gradients(final, 0);
898
10
  }
899
1
  const float o_final = o_tensor->data.f32[0];
900
1
  ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b);
901
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
902
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
903
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
904
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
905
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0);
906
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
907
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 64, 1e-5, "should match the output when x is 0");
908
1
  ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final, 1);
909
1
  ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
910
1
  ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
911
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
912
1
  ccv_nnc_tensor_free(a_tensor);
913
1
  ccv_nnc_tensor_free(b);
914
1
  ccv_nnc_tensor_free(f_tensor);
915
1
  ccv_nnc_tensor_free(o_tensor);
916
1
  ccv_nnc_tensor_free(ingrad);
917
1
  ccv_cnnp_model_free(final);
918
1
  ccv_cnnp_model_free(final2);
919
1
  ccv_cnnp_model_free(final3);
920
1
}
921
922
TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and merge parameters with a model")
923
1
{
924
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
925
1
  b->data.f32[0] = 1;
926
1
  ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b);
927
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
928
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
929
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
930
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
931
1
  ccv_nnc_tensor_param_t o = {};
932
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
933
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
934
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
935
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
936
1
  ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0);
937
1
  ingrad->data.f32[0] = 1;
938
1
  a_tensor->data.f32[0] = 2;
939
1
  f_tensor->data.f32[0] = 10;
940
1
  int i;
941
11
  for (i = 0; i < 10; 
i++10
)
942
10
  {
943
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
944
10
      .requires_grad = 1,
945
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
946
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
947
10
    ccv_cnnp_model_apply_gradients(final, 0);
948
10
  }
949
1
  const float o_final = o_tensor->data.f32[0];
950
1
  ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b);
951
1
  ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
952
1
  ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
953
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
954
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output");
955
1
  ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0);
956
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
957
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 36, 1e-5, "should match the output when x is 1");
958
1
  ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
959
1
  ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
960
1
  ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, a, 0);
961
1
  const ccv_nnc_tensor_param_t params = ccv_cnnp_model_parameter_tensor_params(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS));
962
1
  REQUIRE_EQ(1, params.dim[0], "should match parameter shape");
963
1
  REQUIRE_EQ(0, params.dim[1], "should match parameter shape");
964
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor);
965
1
  const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6;
966
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], (x_final * 2 + 1 + 1 - 10) * (x_final * 2 + 1 + 1 - 10), 1e-5, "should match the previous output");
967
1
  ccv_nnc_tensor_free(a_tensor);
968
1
  ccv_nnc_tensor_free(b);
969
1
  ccv_nnc_tensor_free(f_tensor);
970
1
  ccv_nnc_tensor_free(o_tensor);
971
1
  ccv_nnc_tensor_free(x_tensor);
972
1
  ccv_nnc_tensor_free(ingrad);
973
1
  ccv_cnnp_model_free(final);
974
1
  ccv_cnnp_model_free(final2);
975
1
}
976
977
TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate convergence")
978
1
{
979
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
980
1
  x->data.f32[0] = 1;
981
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
982
1
    MODEL_CMD_EXEC_IO_MAP(
983
1
      KV(CCV_CNNP_IO),
984
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))),
985
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul");
986
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
987
1
  y->data.f32[0] = 2;
988
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
989
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
990
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))),
991
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add");
992
1
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), 1, "seq");
993
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
994
1
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
995
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
996
  // Because we don't have L2 loss function available yet, manually create L2 loss.
997
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
998
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
999
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1000
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
1001
1
    MODEL_IO_LIST(left_out, fit));
1002
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
1003
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
1004
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1005
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
1006
1
    MODEL_IO_LIST(diff, diff));
1007
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0);
1008
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
1009
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
1010
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP());
1011
  // Train add exclusively.
1012
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
1013
1
  ccv_nnc_tensor_param_t o = {};
1014
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
1015
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
1016
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
1017
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
1018
1
  a_tensor->data.f32[0] = 2;
1019
1
  f_tensor->data.f32[0] = 12;
1020
1
  o_tensor->data.f32[0] = 12;
1021
1
  int i;
1022
11
  for (i = 0; i < 10; 
i++10
)
1023
10
  {
1024
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1025
10
      .requires_grad = 1,
1026
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
1027
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1028
10
    ccv_cnnp_model_apply_gradients(final, 0);
1029
10
  }
1030
1
  REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 12, 1e-5, "after 10 iterations, output should not be the original");
1031
  // Switch to train mul exclusively.
1032
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
1033
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS)));
1034
1
  float old_o = o_tensor->data.f32[0];
1035
11
  for (i = 0; i < 10; 
i++10
)
1036
10
  {
1037
10
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1038
10
      .requires_grad = 1,
1039
10
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
1040
10
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1041
10
    ccv_cnnp_model_apply_gradients(final, 0);
1042
10
  }
1043
1
  REQUIRE(o_tensor->data.f32[0] < old_o, "we should be closer to 0 at this point");
1044
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
1045
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1046
1.00k
  {
1047
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1048
1.00k
      .requires_grad = 1,
1049
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
1050
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1051
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1052
1.00k
  }
1053
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "the mean squared error should be 0 at this point");
1054
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1055
1
  ccv_nnc_tensor_free(a_tensor);
1056
1
  ccv_nnc_tensor_free(o_tensor);
1057
1
  ccv_nnc_tensor_free(f_tensor);
1058
1
  ccv_nnc_tensor_free(x);
1059
1
  ccv_nnc_tensor_free(y);
1060
1
  ccv_cnnp_model_free(final);
1061
1
}
1062
1063
TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate learn-ability")
1064
1
{
1065
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
1066
1
  x->data.f32[0] = 1;
1067
1
  ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
1068
1
    MODEL_CMD_EXEC_IO_MAP(
1069
1
      KV(CCV_CNNP_IO),
1070
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))),
1071
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul");
1072
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
1073
1
  y->data.f32[0] = 2;
1074
1
  ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0,
1075
1
    MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO),
1076
1
      KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))),
1077
1
    MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add");
1078
1
  ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), 1, "seq");
1079
1
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
1080
1
  ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input));
1081
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
1082
  // Because we don't have L2 loss function available yet, manually create L2 loss.
1083
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
1084
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
1085
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1086
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
1087
1
    MODEL_IO_LIST(left_out, fit));
1088
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
1089
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
1090
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1091
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
1092
1
    MODEL_IO_LIST(diff, diff));
1093
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0);
1094
1
  const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1);
1095
1
  const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1);
1096
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP());
1097
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(mul, 0, 0), x);
1098
  // Train add exclusively.
1099
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
1100
1
  ccv_nnc_tensor_param_t o = {};
1101
1
  ccv_cnnp_model_tensor_auto(final, &o, 1);
1102
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0);
1103
1
  ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0);
1104
1
  ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0);
1105
1
  a_tensor->data.f32[0] = 2;
1106
1
  f_tensor->data.f32[0] = 12;
1107
1
  o_tensor->data.f32[0] = 12;
1108
1
  int i;
1109
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1110
1.00k
  {
1111
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1112
1.00k
      .requires_grad = 1,
1113
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
1114
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1115
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1116
1.00k
  }
1117
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point");
1118
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(add, 0, 0), x);
1119
1
  REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 10, 1e-1, "the weight on add should be 10");
1120
  // Switch to train mul exclusively. Reset its value.
1121
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(add, 0, 0), y);
1122
1
  ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS)));
1123
1
  ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS)));
1124
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1125
1.00k
  {
1126
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1127
1.00k
      .requires_grad = 1,
1128
1.00k
    }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0);
1129
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1130
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1131
1.00k
  }
1132
1
  REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point");
1133
1
  ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(mul, 0, 0), x);
1134
1
  REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 5, 1e-2, "the weight on add should be 10");
1135
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1136
1
  ccv_nnc_tensor_free(a_tensor);
1137
1
  ccv_nnc_tensor_free(o_tensor);
1138
1
  ccv_nnc_tensor_free(f_tensor);
1139
1
  ccv_nnc_tensor_free(x);
1140
1
  ccv_nnc_tensor_free(y);
1141
1
  ccv_cnnp_model_free(final);
1142
1
}
1143
1144
TEST_CASE("a compiled model absorbs a new model with slightly different configuration")
1145
1
{
1146
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST(
1147
1
    ccv_cnnp_dense(2, 0, 0, 1, 0),
1148
1
    ccv_cnnp_dense(2, 0, 0, 1, 0),
1149
1
    ccv_cnnp_dense(1, 0, 0, 1, 0)
1150
1
  ), 1, "multi_layer");
1151
1
  ccv_nnc_tensor_param_t x = CPU_TENSOR_NHWC(32F, 2, 2);
1152
1
  ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(x), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP());
1153
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, x, 0);
1154
1
  dsfmt_t dsfmt;
1155
1
  int i;
1156
1
  dsfmt_init_gen_rand(&dsfmt, 1);
1157
5
  for (i = 0; i < 4; 
i++4
)
1158
4
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1159
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 1), 0);
1160
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1161
1
    .requires_grad = 1,
1162
1
  }, TENSOR_LIST(x_tensor), TENSOR_LIST(y_tensor), 0, 0);
1163
1
  ccv_cnnp_model_t* const small_model = ccv_cnnp_sequential_new(MODEL_LIST(
1164
1
    ccv_cnnp_dense(2, 0, 0, 1, 0),
1165
1
    ccv_cnnp_dense(2, 0, 0, 1, 0),
1166
1
    ccv_cnnp_dense(1, 0, 0, 1, 0)
1167
1
  ), 1, "multi_layer");
1168
1
  x = CPU_TENSOR_NHWC(32F, 1, 2);
1169
1
  ccv_cnnp_model_absorb(multi_layer, small_model, TENSOR_PARAM_LIST(x));
1170
1
  ccv_nnc_tensor_t* const small_x = ccv_nnc_tensor_new(0, x, 0);
1171
1
  ccv_nnc_tensor_t* const small_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0);
1172
1
  memcpy(small_x->data.f32, x_tensor->data.f32, sizeof(float) * 2);
1173
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1174
1
    .requires_grad = 1,
1175
1
  }, TENSOR_LIST(small_x), TENSOR_LIST(small_y), 0, 0);
1176
1
  REQUIRE_EQ_WITH_TOLERANCE(small_y->data.f32[0], y_tensor->data.f32[0], 1e-5, "the parameters retained, the value should be too");
1177
1
  ccv_cnnp_model_t* const large_model = ccv_cnnp_sequential_new(MODEL_LIST(
1178
1
    ccv_cnnp_dense(2, 0, 0, 1, 0),
1179
1
    ccv_cnnp_dense(2, 0, 0, 1, 0),
1180
1
    ccv_cnnp_dense(1, 0, 0, 1, 0)
1181
1
  ), 1, "multi_layer");
1182
1
  x = CPU_TENSOR_NHWC(32F, 4, 2);
1183
1
  ccv_cnnp_model_absorb(multi_layer, large_model, TENSOR_PARAM_LIST(x));
1184
1
  ccv_nnc_tensor_t* const large_x = ccv_nnc_tensor_new(0, x, 0);
1185
1
  ccv_nnc_tensor_t* const large_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 1), 0);
1186
1
  memcpy(large_x->data.f32, x_tensor->data.f32, sizeof(float) * 4);
1187
5
  for (i = 4; i < 8; 
i++4
)
1188
4
    large_x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1189
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){
1190
1
    .requires_grad = 1,
1191
1
  }, TENSOR_LIST(large_x), TENSOR_LIST(large_y), 0, 0);
1192
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, large_y->data.f32, y_tensor->data.f32, 2, 1e-5, "the parameters retained, the value should be too");
1193
1
  ccv_nnc_tensor_free(y_tensor);
1194
1
  ccv_nnc_tensor_free(x_tensor);
1195
1
  ccv_nnc_tensor_free(small_y);
1196
1
  ccv_nnc_tensor_free(small_x);
1197
1
  ccv_nnc_tensor_free(large_y);
1198
1
  ccv_nnc_tensor_free(large_x);
1199
1
  ccv_cnnp_model_free(multi_layer);
1200
1
}
1201
1202
TEST_CASE("use linear model's parameter as the input for more computation")
1203
1
{
1204
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 0, 0, 1, 0);
1205
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST(
1206
1
    linear,
1207
1
  ), 1, "multi_layer");
1208
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1209
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(multi_layer, MODEL_IO_LIST(input));
1210
1
  out = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, NO_TRANSPOSE, 0, 0), MODEL_IO_LIST(out, ccv_cnnp_model_parameters(linear, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0)));
1211
1
  ccv_cnnp_model_io_t fit = ccv_cnnp_input();
1212
  // Because we don't have L2 loss function available yet, manually create L2 loss.
1213
1
  ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply(
1214
1
    ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0,
1215
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1216
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
1217
1
    MODEL_IO_LIST(out, fit));
1218
1
  ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply(
1219
1
    ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0,
1220
1
      MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)),
1221
1
      MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0),
1222
1
    MODEL_IO_LIST(diff, diff));
1223
1
  ccv_cnnp_model_t* const model = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0);
1224
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32F, 1);
1225
1
  const ccv_nnc_tensor_param_t t_params = CPU_TENSOR_NHWC(32F, 1);
1226
1
  ccv_cnnp_model_compile(model, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP());
1227
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_copy(model, 1);
1228
1
  ccv_cnnp_model_free(model);
1229
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP());
1230
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1231
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1232
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1233
1
  x->data.f32[0] = 1.4;
1234
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), x);
1235
1
  x->data.f32[0] = 0;
1236
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_BIAS, 0), x);
1237
1
  int i;
1238
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1239
1.00k
  {
1240
1.00k
    if (i % 2 == 0)
1241
500
    {
1242
500
      x->data.f32[0] = 1;
1243
500
      t->data.f32[0] = 3;
1244
500
    } else {
1245
500
      x->data.f32[0] = 2;
1246
500
      t->data.f32[0] = 4;
1247
500
    }
1248
1.00k
    float lr = 0.05;
1249
1.00k
    if (i >= 100)
1250
900
      lr = 0.01;
1251
100
    else if (i >= 500)
1252
0
      lr = 0.001;
1253
1.00k
    ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, lr, 1, 0, 0, 0), 0, 0, 0);
1254
1.00k
    ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1255
1.00k
      .requires_grad = 1,
1256
1.00k
    }, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0);
1257
1.00k
    ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0);
1258
1.00k
    ccv_cnnp_model_apply_gradients(final, 0);
1259
1.00k
  }
1260
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1261
1
  x->data.f32[0] = 1;
1262
1
  t->data.f32[0] = 3;
1263
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0);
1264
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 0, 1e-2, "the mean squared error should be 0 at this point");
1265
1
  ccv_nnc_tensor_free(x);
1266
1
  ccv_nnc_tensor_free(t);
1267
1
  ccv_nnc_tensor_free(y);
1268
1
  ccv_cnnp_model_free(final);
1269
1
}
1270
1271
TEST_CASE("model can have multiple outputs and some of them can be used in the computation")
1272
1
{
1273
1
  ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0, 1, 0);
1274
1
  ccv_cnnp_model_t* const linear2 = ccv_cnnp_dense(1, 1, 0, 1, 0);
1275
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1276
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input));
1277
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(linear2, MODEL_IO_LIST(out1));
1278
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 1, 0);
1279
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1280
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1281
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1282
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1283
1
  ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1284
1
  t->data.f32[0] = 2.4;
1285
1
  ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear1, ALL_PARAMETERS, 0), t);
1286
1
  t->data.f32[0] = -1.5;
1287
1
  ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear2, ALL_PARAMETERS, 0), t);
1288
1
  x->data.f32[0] = 10;
1289
1
  ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(t, y), 0, 0);
1290
1
  REQUIRE_EQ_WITH_TOLERANCE(t->data.f32[0], 10 * 2.4, 1e-5, "should be equal to expected value");
1291
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], -10 * 2.4 * 1.5, 1e-5, "should be equal to expected value");
1292
1
  ccv_nnc_tensor_free(x);
1293
1
  ccv_nnc_tensor_free(t);
1294
1
  ccv_nnc_tensor_free(y);
1295
1
  ccv_cnnp_model_free(multi_layer);
1296
1
}
1297
1298
TEST_CASE("index select model can select a part from vocabulary")
1299
1
{
1300
1
  ccv_cnnp_model_t* const index_select = ccv_cnnp_index_select(0);
1301
1
  const ccv_nnc_tensor_param_t v_params = CPU_TENSOR_NHWC(32F, 10, 8);
1302
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, v_params, 0);
1303
1
  dsfmt_t dsfmt;
1304
1
  int i;
1305
1
  dsfmt_init_gen_rand(&dsfmt, 1);
1306
81
  for (i = 0; i < 10 * 8; 
i++80
)
1307
80
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1308
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32S, 3);
1309
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, x_params, 0);
1310
1
  ccv_cnnp_model_compile(index_select, TENSOR_PARAM_LIST(v_params, x_params), CMD_NOOP(), CMD_NOOP());
1311
1
  x->data.i32[0] = 1;
1312
1
  x->data.i32[1] = 0;
1313
1
  x->data.i32[2] = 5;
1314
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 8), 0);
1315
1
  ccv_cnnp_model_evaluate(index_select, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(v, x), TENSOR_LIST(y), 0, 0);
1316
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 1 * 8, y->data.f32, 8, 1e-5, "index 1st vector");
1317
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 0 * 8, y->data.f32 + 8, 8, 1e-5, "index 0th vector");
1318
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 5 * 8, y->data.f32 + 8 * 2, 8, 1e-5, "index 5th vector");
1319
1
  ccv_nnc_tensor_free(x);
1320
1
  ccv_nnc_tensor_free(y);
1321
1
  ccv_nnc_tensor_free(v);
1322
1
  ccv_cnnp_model_free(index_select);
1323
1
}
1324
1325
TEST_CASE("embedding model can generate vector embedding")
1326
1
{
1327
1
  ccv_cnnp_model_t* const embedding = ccv_cnnp_embedding(CCV_32F, 10, 8, 1, 0);
1328
1
  const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32S, 3);
1329
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, x_params, 0);
1330
1
  ccv_cnnp_model_compile(embedding, TENSOR_PARAM_LIST(x_params), CMD_NOOP(), CMD_NOOP());
1331
1
  x->data.i32[0] = 1;
1332
1
  x->data.i32[1] = 0;
1333
1
  x->data.i32[2] = 5;
1334
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 8), 0);
1335
1
  ccv_cnnp_model_evaluate(embedding, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0);
1336
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 8), 0);
1337
1
  ccv_cnnp_model_parameter_copy(embedding, ccv_cnnp_model_parameters(embedding, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), v);
1338
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 1 * 8, y->data.f32, 8, 1e-5, "index 1st vector");
1339
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 0 * 8, y->data.f32 + 8, 8, 1e-5, "index 0th vector");
1340
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 5 * 8, y->data.f32 + 8 * 2, 8, 1e-5, "index 5th vector");
1341
1
  ccv_nnc_tensor_free(x);
1342
1
  ccv_nnc_tensor_free(y);
1343
1
  ccv_nnc_tensor_free(v);
1344
1
  ccv_cnnp_model_free(embedding);
1345
1
}
1346
1347
TEST_CASE("model to get the internal name for parameters")
1348
1
{
1349
1
  ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1350
1
  ccv_cnnp_model_t* const linear2 = ccv_cnnp_dense(1, 1, 0, 1, 0);
1351
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1352
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input));
1353
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(linear2, MODEL_IO_LIST(out1));
1354
1
  ccv_cnnp_model_t* const multi_layer = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 1, 0);
1355
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1356
1
  ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1357
1
  const char* linear1p = "t-linear-0-0";
1358
1
  REQUIRE(memcmp(linear1p, ccv_cnnp_model_parameter_name(multi_layer, ccv_cnnp_model_parameters(linear1, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0)), strlen(linear1p) + 1) == 0, "should be equal");
1359
1
  const char* linear2p = "t-0-0";
1360
1
  REQUIRE(memcmp(linear2p, ccv_cnnp_model_parameter_name(multi_layer, ccv_cnnp_model_parameters(linear2, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0)), strlen(linear2p) + 1) == 0, "should be equal");
1361
1
  ccv_cnnp_model_free(multi_layer);
1362
1
}
1363
1364
static ccv_cnnp_model_t* _resnet_block_new(const int filters, const int expansion, const int strides, const int projection_shortcut)
1365
16
{
1366
16
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
1367
16
  ccv_cnnp_model_io_t shortcut = input;
1368
16
  if (projection_shortcut)
1369
4
  {
1370
4
    ccv_cnnp_model_t* const avgdown = ccv_cnnp_average_pool(DIM_ALLOC(strides, strides), HINT((strides, strides), (0, 0)), 0);
1371
4
    shortcut = ccv_cnnp_model_apply(avgdown, MODEL_IO_LIST(input));
1372
4
    ccv_cnnp_model_t* const conv0 = ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), DIM_ALLOC(), 1, HINT((1, 1), (0, 0)), 0, 1, 0);
1373
4
    shortcut = ccv_cnnp_model_apply(conv0, MODEL_IO_LIST(shortcut));
1374
4
  }
1375
16
  ccv_cnnp_model_t* const conv1 = ccv_cnnp_sequential_new(MODEL_LIST(
1376
16
    ccv_cnnp_convolution(1, filters, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0),
1377
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0),
1378
16
    ccv_cnnp_relu(0)
1379
16
  ), 1, 0);
1380
16
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(conv1, MODEL_IO_LIST(input));
1381
16
  ccv_cnnp_model_t* const conv2 = ccv_cnnp_sequential_new(MODEL_LIST(
1382
16
    ccv_cnnp_convolution(1, filters, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((strides, strides), (1, 1)), 0, 1, 0),
1383
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0),
1384
16
    ccv_cnnp_relu(0)
1385
16
  ), 1, 0);
1386
16
  output = ccv_cnnp_model_apply(conv2, MODEL_IO_LIST(output));
1387
16
  ccv_cnnp_model_t* const conv3 = ccv_cnnp_sequential_new(MODEL_LIST(
1388
16
    ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0),
1389
16
    ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0)
1390
16
  ), 1, 0);
1391
16
  output = ccv_cnnp_model_apply(conv3, MODEL_IO_LIST(output));
1392
16
  ccv_cnnp_model_t* const add = ccv_cnnp_sum(0);
1393
16
  output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output, shortcut));
1394
16
  ccv_cnnp_model_t* const relu = ccv_cnnp_relu(0);
1395
16
  output = ccv_cnnp_model_apply(relu, MODEL_IO_LIST(output));
1396
16
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 1, 0);
1397
16
}
1398
1399
static ccv_cnnp_model_t* _resnet_block_layer_new(const int filters, const int expansion, const int strides, const int blocks)
1400
4
{
1401
4
  ccv_cnnp_model_io_t input = ccv_cnnp_input();
1402
4
  ccv_cnnp_model_t* first_block = _resnet_block_new(filters, expansion, strides, 1);
1403
4
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(first_block, MODEL_IO_LIST(input));
1404
4
  int i;
1405
16
  for (i = 1; i < blocks; 
i++12
)
1406
12
  {
1407
12
    ccv_cnnp_model_t* block = _resnet_block_new(filters, expansion, 1, 0);
1408
12
    output = ccv_cnnp_model_apply(block, MODEL_IO_LIST(output));
1409
12
  }
1410
4
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 1, 0);
1411
4
}
1412
1413
static void _fpn(const int d, const ccv_cnnp_model_io_t* const c, const int c_size, ccv_cnnp_model_io_t* const p)
1414
1
{
1415
1
  int i;
1416
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(c[c_size - 1]));
1417
1
  p[c_size - 1] = output;
1418
4
  for (i = c_size - 2; i >= 0; 
i--3
)
1419
3
  {
1420
3
    const ccv_cnnp_model_io_t lateral = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(c[i]));
1421
3
    const ccv_cnnp_model_io_t up = ccv_cnnp_model_apply(ccv_cnnp_upsample(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0, 0), MODEL_IO_LIST(output));
1422
3
    const ccv_cnnp_model_io_t sum = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(lateral, up));
1423
3
    output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((1, 1), (1, 1)), 0, 1, 0), MODEL_IO_LIST(sum));
1424
3
    p[i] = output;
1425
3
  }
1426
1
}
1427
1428
ccv_cnnp_model_t* _imagenet_resnet50_v1d_fpn(void)
1429
1
{
1430
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1431
1
  ccv_cnnp_model_t* init_conv = ccv_cnnp_sequential_new(MODEL_LIST(
1432
1
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((2, 2), (1, 1)), 0, 1, 0),
1433
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0),
1434
1
    ccv_cnnp_relu(0),
1435
1
    ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((1, 1), (1, 1)), 0, 1, 0),
1436
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0),
1437
1
    ccv_cnnp_relu(0),
1438
1
    ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((1, 1), (1, 1)), 0, 1, 0),
1439
1
    ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0),
1440
1
    ccv_cnnp_relu(0),
1441
1
    ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (1, 1)), 0)
1442
1
  ), 1, 0);
1443
1
  ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(init_conv, MODEL_IO_LIST(input));
1444
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(64, 4, 1, 3), MODEL_IO_LIST(output));
1445
1
  const ccv_cnnp_model_io_t c2 = output;
1446
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(128, 4, 2, 4), MODEL_IO_LIST(output));
1447
1
  const ccv_cnnp_model_io_t c3 = output;
1448
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(256, 4, 2, 6), MODEL_IO_LIST(output));
1449
1
  const ccv_cnnp_model_io_t c4 = output;
1450
1
  output = ccv_cnnp_model_apply(_resnet_block_layer_new(512, 4, 2, 3), MODEL_IO_LIST(output));
1451
1
  const ccv_cnnp_model_io_t c5 = output;
1452
1
  const ccv_cnnp_model_io_t c[] = { c2, c3, c4, c5 };
1453
1
  ccv_cnnp_model_io_t p[5];
1454
1
  _fpn(256, c, 4, p);
1455
1
  p[4] = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(2, 2), HINT((2, 2), (0, 0)), 0), MODEL_IO_LIST(p[3]));
1456
  // 3 aspect ratios (1:2, 1:1, 2:1). Each has 4 + 2 (x, y, w, h, object, non-object), total 18.
1457
1
  ccv_cnnp_model_t* const rpn_proposals = ccv_cnnp_convolution(1, 18, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, "rpn");
1458
1
  ccv_cnnp_model_io_t proposals[5];
1459
1
  int i;
1460
6
  for (i = 0; i < 5; 
i++5
)
1461
5
    proposals[i] = ccv_cnnp_model_apply(rpn_proposals, MODEL_IO_LIST(p[i]));
1462
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(input), proposals, 5, 1, 0);
1463
1
}
1464
1465
TEST_CASE("FPN-RPN use cnnp model with multiple outputs")
1466
1
{
1467
1
  ccv_cnnp_model_t* rpn = _imagenet_resnet50_v1d_fpn();
1468
1
  ccv_nnc_tensor_param_t input_params = GPU_TENSOR_NCHW(000, 32F, 4, 3, 835, 1146);
1469
1
  ccv_cnnp_model_compile(rpn, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1470
1
  ccv_nnc_tensor_param_t output_params[5];
1471
1
  ccv_cnnp_model_tensor_auto(rpn, output_params, 5);
1472
1
  REQUIRE_EQ(output_params[0].dim[2], 209, "should be equal");
1473
1
  REQUIRE_EQ(output_params[0].dim[3], 287, "should be equal");
1474
1
  REQUIRE_EQ(output_params[1].dim[2], 105, "should be equal");
1475
1
  REQUIRE_EQ(output_params[1].dim[3], 144, "should be equal");
1476
1
  REQUIRE_EQ(output_params[2].dim[2], 53, "should be equal");
1477
1
  REQUIRE_EQ(output_params[2].dim[3], 72, "should be equal");
1478
1
  REQUIRE_EQ(output_params[3].dim[2], 27, "should be equal");
1479
1
  REQUIRE_EQ(output_params[3].dim[3], 36, "should be equal");
1480
1
  REQUIRE_EQ(output_params[4].dim[2], 13, "should be equal");
1481
1
  REQUIRE_EQ(output_params[4].dim[3], 18, "should be equal");
1482
1
  ccv_cnnp_model_free(rpn);
1483
1
}
1484
1485
TEST_CASE("extract one output each feed into different feed-forward")
1486
1
{
1487
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1488
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1489
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input));
1490
1
  ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid");
1491
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1));
1492
1
  ccv_cnnp_model_t* tiny = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 1, "tiny");
1493
1
  const ccv_cnnp_model_io_t i0 = ccv_cnnp_input();
1494
1
  ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(tiny, MODEL_IO_LIST(i0));
1495
1
  ccv_cnnp_model_io_t o00 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(o0));
1496
1
  ccv_cnnp_model_io_t o01 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(o0));
1497
1
  ccv_cnnp_model_t* const l0 = ccv_cnnp_dense(1, 1, 0, 1, "l0");
1498
1
  ccv_cnnp_model_io_t o10 = ccv_cnnp_model_apply(l0, MODEL_IO_LIST(o00));
1499
1
  ccv_cnnp_model_t* const l1 = ccv_cnnp_dense(1, 1, 0, 1, "l1");
1500
1
  ccv_cnnp_model_io_t o11 = ccv_cnnp_model_apply(l1, MODEL_IO_LIST(o01));
1501
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(i0), MODEL_IO_LIST(o10, o11), 1, "final");
1502
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1503
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1504
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1505
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1506
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1507
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1508
1
  t->data.f32[0] = 2.4;
1509
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t);
1510
1
  t->data.f32[0] = -1.5;
1511
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(l0, ALL_PARAMETERS, 0), t);
1512
1
  t->data.f32[0] = 1.7;
1513
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(l1, ALL_PARAMETERS, 0), t);
1514
1
  x->data.f32[0] = 10;
1515
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(t, y), 0, 0);
1516
1
  REQUIRE_EQ_WITH_TOLERANCE(t->data.f32[0], 10 * 2.4 * -1.5, 1e-5, "should be equal to expected value");
1517
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) * 1.7, 1e-5, "should be equal to expected value");
1518
1
  ccv_nnc_tensor_free(x);
1519
1
  ccv_nnc_tensor_free(t);
1520
1
  ccv_nnc_tensor_free(y);
1521
1
  ccv_cnnp_model_free(final);
1522
1
}
1523
1524
TEST_CASE("use parameter for values")
1525
1
{
1526
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1527
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1528
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input));
1529
1
  ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid");
1530
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1));
1531
1
  ccv_cnnp_model_t* const value = ccv_cnnp_parameter(CPU_TENSOR_NCHW(32F, 1), 0, 1, "value");
1532
1
  ccv_cnnp_model_io_t out3 = ccv_cnnp_model_apply(value, 0, 0);
1533
1
  ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum");
1534
1
  ccv_cnnp_model_io_t out4 = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out2, out3));
1535
1
  ccv_cnnp_model_t* final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out4), 1, "tiny");
1536
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1537
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1538
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1539
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1540
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1541
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1542
1
  t->data.f32[0] = 2.4;
1543
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t);
1544
1
  t->data.f32[0] = -1.5;
1545
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(value, ALL_PARAMETERS, 0), t);
1546
1
  x->data.f32[0] = 10;
1547
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0);
1548
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) - 1.5, 1e-5, "should be equal to expected value");
1549
1
  ccv_nnc_tensor_free(x);
1550
1
  ccv_nnc_tensor_free(t);
1551
1
  ccv_nnc_tensor_free(y);
1552
1
  ccv_cnnp_model_free(final);
1553
1
}
1554
1555
TEST_CASE("use scalar for values")
1556
1
{
1557
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1558
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1559
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input));
1560
1
  ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid");
1561
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1));
1562
1
  ccv_cnnp_model_io_t value = ccv_cnnp_model_apply(ccv_cnnp_scalar(CCV_TENSOR_CPU_MEMORY, CCV_TENSOR_FORMAT_NHWC, CCV_32F, 1.5, "value"), 0, 0);
1563
1
  ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum");
1564
1
  ccv_cnnp_model_io_t out4 = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out2, value));
1565
1
  ccv_cnnp_model_t* final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out4), 1, "tiny");
1566
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1567
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1568
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1569
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1570
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1571
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1572
1
  t->data.f32[0] = 2.4;
1573
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t);
1574
1
  x->data.f32[0] = 10;
1575
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0);
1576
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) + 1.5, 1e-5, "should be equal to expected value");
1577
1
  ccv_nnc_tensor_free(x);
1578
1
  ccv_nnc_tensor_free(t);
1579
1
  ccv_nnc_tensor_free(y);
1580
1
  ccv_cnnp_model_free(final);
1581
1
}
1582
1583
TEST_CASE("use scalar for values and copy types from other inputs")
1584
1
{
1585
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1586
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1587
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input));
1588
1
  ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid");
1589
1
  ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1));
1590
1
  ccv_cnnp_model_io_t value = ccv_cnnp_model_apply(ccv_cnnp_scalar(0, 0, 0, 1.5, "value"), MODEL_IO_LIST(input));
1591
1
  ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum");
1592
1
  ccv_cnnp_model_io_t out4 = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out2, value));
1593
1
  ccv_cnnp_model_t* final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out4), 1, "tiny");
1594
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1595
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1596
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1597
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1598
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1599
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1600
1
  t->data.f32[0] = 2.4;
1601
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t);
1602
1
  x->data.f32[0] = 10;
1603
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0);
1604
1
  REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) + 1.5, 1e-5, "should be equal to expected value");
1605
1
  ccv_nnc_tensor_free(x);
1606
1
  ccv_nnc_tensor_free(t);
1607
1
  ccv_nnc_tensor_free(y);
1608
1
  ccv_cnnp_model_free(final);
1609
1
}
1610
1611
TEST_CASE("LoRA fine-tuning GEMM set is_trainable to false")
1612
1
{
1613
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1614
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(10, 1, 0, -1, "linear");
1615
1
  ccv_cnnp_model_t* const down = ccv_cnnp_dense(2, 1, 0, 1, "down");
1616
1
  ccv_cnnp_model_t* const up = ccv_cnnp_dense(10, 1, 0, 1, "up");
1617
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input));
1618
1
  ccv_cnnp_model_io_t out_down = ccv_cnnp_model_apply(down, MODEL_IO_LIST(input));
1619
1
  ccv_cnnp_model_io_t out_up = ccv_cnnp_model_apply(up, MODEL_IO_LIST(out_down));
1620
1
  ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum");
1621
1
  ccv_cnnp_model_io_t out_final = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out, out_up));
1622
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out_final), 0, "tiny");
1623
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1624
1
  ccv_nnc_tensor_t* const tlinear = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0);
1625
1
  int i;
1626
101
  for (i = 0; i < 10 * 10; 
i++100
)
1627
100
    tlinear->data.f32[i] = (i / 10 == i % 10) ? 
110
:
090
;
1628
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 2), 0);
1629
21
  for (i = 0; i < 10 * 2; 
i++20
)
1630
20
    t->data.f32[i] = 0;
1631
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1632
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10);
1633
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_SGD_FORWARD(1, 0.01, 1, 0.1, 0, 0), CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN));
1634
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), tlinear);
1635
1
  ccv_nnc_tensor_free(tlinear);
1636
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up, ALL_PARAMETERS, 0), t);
1637
1
  ccv_nnc_tensor_free(t);
1638
11
  for (i = 0; i < 10; 
i++10
)
1639
10
    x->data.f32[i] = i;
1640
1
  ccv_nnc_tensor_t* const target = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1641
11
  for (i = 0; i < 10; 
i++10
)
1642
10
    target->data.f32[i] = 10 - i;
1643
11
  for (i = 0; i < 10; 
i++10
)
1644
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0);
1645
1
  ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0);
1646
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1647
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, y->data.f32, target->data.f32, 10, 1e-2, "should match the target after fine-tuning");
1648
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable");
1649
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(down), 1, "should be marked as trainable");
1650
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(up), 1, "should be marked as trainable");
1651
1
  ccv_nnc_tensor_free(x);
1652
1
  ccv_nnc_tensor_free(target);
1653
1
  ccv_nnc_tensor_free(y);
1654
1
  ccv_cnnp_model_free(final);
1655
1
}
1656
1657
TEST_CASE("LoRA fine-tuning convolution set is_trainable to false")
1658
1
{
1659
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1660
1
  ccv_cnnp_model_t* const conv = ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, -1, "conv");
1661
1
  ccv_cnnp_model_t* const down = ccv_cnnp_convolution(1, 4, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, 1, "down");
1662
1
  ccv_cnnp_model_t* const up = ccv_cnnp_convolution(1, 32, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, "up");
1663
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(conv, MODEL_IO_LIST(input));
1664
1
  ccv_cnnp_model_io_t out_down = ccv_cnnp_model_apply(down, MODEL_IO_LIST(input));
1665
1
  ccv_cnnp_model_io_t out_up = ccv_cnnp_model_apply(up, MODEL_IO_LIST(out_down));
1666
1
  ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum");
1667
1
  ccv_cnnp_model_io_t out_final = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out, out_up));
1668
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out_final), 0, "tiny");
1669
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 5, 10), 0);
1670
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 5, 32), 0);
1671
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 5, 5, 10);
1672
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1673
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1674
1
    .requires_grad = 1,
1675
1
  }, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0);
1676
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable");
1677
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(down), 1, "should be marked as trainable");
1678
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(up), 1, "should be marked as trainable");
1679
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1680
1
  ccv_nnc_tensor_free(x);
1681
1
  ccv_nnc_tensor_free(y);
1682
1
  ccv_cnnp_model_free(final);
1683
1
}
1684
1685
static int _ccv_nnc_same_namer(void* context, const char* src_name, char* updated_name, const size_t provided_size)
1686
3
{
1687
3
  const size_t src_len = ccv_min(strnlen(src_name, provided_size - 1), provided_size - 1);
1688
3
  memcpy(updated_name, src_name, src_len);
1689
3
  updated_name[src_len] = '\0';
1690
3
  return 0;
1691
3
}
1692
1693
TEST_CASE("two models share the same parameters")
1694
1
{
1695
1
  const ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
1696
1
  ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(10, 1, 0, -1, "linear");
1697
1
  ccv_cnnp_model_t* const down0 = ccv_cnnp_dense(2, 1, 0, 1, "down");
1698
1
  ccv_cnnp_model_t* const up0 = ccv_cnnp_dense(10, 1, 0, 1, "up");
1699
1
  ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0));
1700
1
  ccv_cnnp_model_io_t out0_down = ccv_cnnp_model_apply(down0, MODEL_IO_LIST(input0));
1701
1
  ccv_cnnp_model_io_t out0_up = ccv_cnnp_model_apply(up0, MODEL_IO_LIST(out0_down));
1702
1
  ccv_cnnp_model_t* const add0 = ccv_cnnp_sum("sum");
1703
1
  ccv_cnnp_model_io_t out0_final = ccv_cnnp_model_apply(add0, MODEL_IO_LIST(out0, out0_up));
1704
1
  ccv_cnnp_model_t* const final0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0), MODEL_IO_LIST(out0_final), 0, "tiny0");
1705
1706
1
  const ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
1707
1
  ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(10, 1, 0, -1, "linear");
1708
1
  ccv_cnnp_model_t* const down1 = ccv_cnnp_dense(2, 1, 0, 1, "down");
1709
1
  ccv_cnnp_model_t* const up1 = ccv_cnnp_dense(10, 1, 0, 1, "up");
1710
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input1));
1711
1
  ccv_cnnp_model_io_t out1_down = ccv_cnnp_model_apply(down1, MODEL_IO_LIST(input1));
1712
1
  ccv_cnnp_model_io_t out1_up = ccv_cnnp_model_apply(up1, MODEL_IO_LIST(out1_down));
1713
1
  ccv_cnnp_model_t* const add1 = ccv_cnnp_sum("sum");
1714
1
  ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(add1, MODEL_IO_LIST(out1, out1_up));
1715
1
  ccv_cnnp_model_t* const final1 = ccv_cnnp_model_new(MODEL_IO_LIST(input1), MODEL_IO_LIST(out1_final), 0, "tiny1");
1716
1717
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1718
1
  dsfmt_t dsfmt;
1719
1
  int i;
1720
1
  dsfmt_init_gen_rand(&dsfmt, 1);
1721
11
  for (i = 0; i < 10; 
i++10
)
1722
10
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1723
1
  ccv_nnc_tensor_t* const y0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1724
1
  ccv_nnc_tensor_t* const y1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1725
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10);
1726
1
  ccv_cnnp_model_compile(final0, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1727
1
  ccv_cnnp_model_evaluate(final0, (ccv_cnnp_evaluate_param_t){
1728
1
    .requires_grad = 0,
1729
1
  }, TENSOR_LIST(x), TENSOR_LIST(y0), 0, 0);
1730
1
  ccv_cnnp_model_compile(final1, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1731
1
  ccv_cnnp_model_share_parameters(final1, ccv_cnnp_model_parameters(final1, ALL_PARAMETERS, ALL_PARAMETERS), final0, ccv_cnnp_model_parameters(final0, ALL_PARAMETERS, ALL_PARAMETERS), 0, 0);
1732
1
  ccv_cnnp_model_evaluate(final1, (ccv_cnnp_evaluate_param_t){
1733
1
    .requires_grad = 0,
1734
1
  }, TENSOR_LIST(x), TENSOR_LIST(y1), 0, 0);
1735
1
  REQUIRE_TENSOR_EQ(y0, y1, "two model now shares the weights, should have the same result");
1736
1
  CNNP_MODEL_GEN(final0, CCV_NNC_LONG_DOT_GRAPH);
1737
1
  ccv_nnc_tensor_free(x);
1738
1
  ccv_nnc_tensor_free(y0);
1739
1
  ccv_nnc_tensor_free(y1);
1740
1
  ccv_cnnp_model_free(final0);
1741
1
  ccv_cnnp_model_free(final1);
1742
1
}
1743
1744
TEST_CASE("two models, one with LoRA, one with not, share the same parameters")
1745
1
{
1746
1
  const ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
1747
1
  ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(10, 1, 0, -1, "linear");
1748
1
  ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0));
1749
1
  ccv_cnnp_model_t* const final0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0), MODEL_IO_LIST(out0), 0, "tiny");
1750
1751
1
  const ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
1752
1
  ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(10, 1, 0, -1, "linear");
1753
1
  ccv_cnnp_model_t* const down1 = ccv_cnnp_dense(2, 1, 0, 1, "down");
1754
1
  ccv_cnnp_model_t* const up1 = ccv_cnnp_dense(10, 1, 0, 1, "up");
1755
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input1));
1756
1
  ccv_cnnp_model_io_t out1_down = ccv_cnnp_model_apply(down1, MODEL_IO_LIST(input1));
1757
1
  ccv_cnnp_model_io_t out1_up = ccv_cnnp_model_apply(up1, MODEL_IO_LIST(out1_down));
1758
1
  ccv_cnnp_model_t* const add1 = ccv_cnnp_sum("sum");
1759
1
  ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(add1, MODEL_IO_LIST(out1, out1_up));
1760
1
  ccv_cnnp_model_t* const final1 = ccv_cnnp_model_new(MODEL_IO_LIST(input1), MODEL_IO_LIST(out1_final), 0, "tiny");
1761
1762
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1763
1
  dsfmt_t dsfmt;
1764
1
  int i;
1765
1
  dsfmt_init_gen_rand(&dsfmt, 1);
1766
11
  for (i = 0; i < 10; 
i++10
)
1767
10
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1768
1
  ccv_nnc_tensor_t* const y0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1769
1
  ccv_nnc_tensor_t* const y1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1770
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10);
1771
1
  ccv_cnnp_model_compile(final0, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1772
1
  ccv_cnnp_model_evaluate(final0, (ccv_cnnp_evaluate_param_t){
1773
1
    .requires_grad = 0,
1774
1
  }, TENSOR_LIST(x), TENSOR_LIST(y0), 0, 0);
1775
1
  ccv_cnnp_model_compile(final1, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP());
1776
1
  ccv_nnc_tensor_t* const up_weights = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 10), 0);
1777
21
  for (i = 0; i < 2 * 10; 
i++20
)
1778
20
    up_weights->data.f32[i] = 0;
1779
1
  ccv_cnnp_model_set_parameter(final1, ccv_cnnp_model_parameters(up1, ALL_PARAMETERS, ALL_PARAMETERS), up_weights);
1780
1
  ccv_nnc_tensor_free(up_weights);
1781
1
  ccv_cnnp_model_share_parameters(final1, ccv_cnnp_model_parameters(final1, ALL_PARAMETERS, ALL_PARAMETERS), final0, ccv_cnnp_model_parameters(final0, ALL_PARAMETERS, ALL_PARAMETERS), _ccv_nnc_same_namer, 0);
1782
1
  ccv_cnnp_model_evaluate(final1, (ccv_cnnp_evaluate_param_t){
1783
1
    .requires_grad = 0,
1784
1
  }, TENSOR_LIST(x), TENSOR_LIST(y1), 0, 0);
1785
1
  REQUIRE_TENSOR_EQ(y0, y1, "two model now shares the weights, should have the same result");
1786
1
  CNNP_MODEL_GEN(final0, CCV_NNC_LONG_DOT_GRAPH);
1787
1
  ccv_nnc_tensor_free(x);
1788
1
  ccv_nnc_tensor_free(y0);
1789
1
  ccv_nnc_tensor_free(y1);
1790
1
  ccv_cnnp_model_free(final0);
1791
1
  ccv_cnnp_model_free(final1);
1792
1
}
1793
1794
TEST_CASE("pad a tensor with padding")
1795
1
{
1796
1
  const ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
1797
1
  const ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
1798
1
  ccv_cnnp_model_t* const pad = ccv_cnnp_pad(CCV_NNC_PAD_ZERO, DIM_ALLOC(0, 2, 2, 0), DIM_ALLOC(0, 1, 2, 1), "pad");
1799
1
  ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(pad, MODEL_IO_LIST(input0));
1800
1
  ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum");
1801
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out0, input1));
1802
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(out), 0, "tiny");
1803
1804
1
  ccv_nnc_tensor_t* const x0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 3, 3, 10), 0);
1805
1
  dsfmt_t dsfmt;
1806
1
  int i;
1807
1
  dsfmt_init_gen_rand(&dsfmt, 1);
1808
91
  for (i = 0; i < 3 * 3 * 10; 
i++90
)
1809
90
    x0->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
1810
1
  ccv_nnc_tensor_t* const x1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 6, 7, 11), 0);
1811
463
  for (i = 0; i < 6 * 7 * 11; 
i++462
)
1812
462
    x1->data.f32[i] = 1;
1813
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 6, 7, 11), 0);
1814
1
  ccv_nnc_tensor_param_t input0_params = CPU_TENSOR_NHWC(32F, 1, 3, 3, 10);
1815
1
  ccv_nnc_tensor_param_t input1_params = CPU_TENSOR_NHWC(32F, 1, 6, 7, 11);
1816
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input0_params, input1_params), CMD_NOOP(), CMD_NOOP());
1817
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){
1818
1
    .requires_grad = 0,
1819
1
  }, TENSOR_LIST(x0, x1), TENSOR_LIST(y), 0, 0);
1820
1
  int j, k;
1821
1
  ccv_nnc_tensor_t* const y0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 6, 7, 11), 0);
1822
7
  for (i = 0; i < 6; 
i++6
)
1823
48
    
for (j = 0; 6
j < 7;
j++42
)
1824
504
      
for (k = 0; 42
k < 11;
k++462
)
1825
462
        y0->data.f32[i * 7 * 11 + j * 11 + k] = (i >= 2 && 
i < 5308
&&
j >=2231
&&
j < 5165
&&
k < 1099
) ?
1 + x0->data.f32[(i - 2) * 3 * 10 + (j - 2) * 10 + k]90
:
1372
;
1826
1
  REQUIRE_TENSOR_EQ(y, y0, "it should be padded");
1827
1
  CNNP_MODEL_GEN(pad, CCV_NNC_LONG_DOT_GRAPH);
1828
1
  ccv_nnc_tensor_free(x0);
1829
1
  ccv_nnc_tensor_free(x1);
1830
1
  ccv_nnc_tensor_free(y);
1831
1
  ccv_nnc_tensor_free(y0);
1832
1
  ccv_cnnp_model_free(final);
1833
1
}
1834
1835
TEST_CASE("use move semantics to write output to the empty space of the input tensor")
1836
1
{
1837
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
1838
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1839
1
  ccv_cnnp_model_io_t input0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(0), DIM_ALLOC(1), "first reshape"), MODEL_IO_LIST(input));
1840
1
  ccv_cnnp_model_io_t input1 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(1), DIM_ALLOC(1), "second reshape"), MODEL_IO_LIST(input));
1841
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input0));
1842
1
  ccv_cnnp_model_io_t move0 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out1, input1));
1843
1
  const ccv_cnnp_model_io_t input2 = ccv_cnnp_input();
1844
1
  ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(ccv_cnnp_sum("sum"), MODEL_IO_LIST(move0, input2));
1845
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, input2), MODEL_IO_LIST(out1_final), 0, "tiny");
1846
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
1847
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1848
1
  ccv_nnc_tensor_t* const z = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1849
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 2);
1850
1
  ccv_nnc_tensor_param_t input2_params = CPU_TENSOR_NHWC(32F, 1);
1851
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params, input2_params), CMD_NOOP(), CMD_NOOP());
1852
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1853
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1854
1
  t->data.f32[0] = 2.4;
1855
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t);
1856
1
  x->data.f32[0] = 10;
1857
1
  x->data.f32[1] = 0;
1858
1
  y->data.f32[0] = 3;
1859
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, y), TENSOR_LIST(z), 0, 0);
1860
1
  REQUIRE_EQ_WITH_TOLERANCE(z->data.f32[0], 2.4 * 10 + 3, 1e-5, "should be equal to expected value");
1861
1
  REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[1], 2.4 * 10, 1e-5, "should be equal to expected value");
1862
1
  ccv_nnc_tensor_free(x);
1863
1
  ccv_nnc_tensor_free(t);
1864
1
  ccv_nnc_tensor_free(y);
1865
1
  ccv_nnc_tensor_free(z);
1866
1
  ccv_cnnp_model_free(final);
1867
1
}
1868
1869
TEST_CASE("use variable and move semantics to co-locate input in the same tensor")
1870
1
{
1871
1
  const ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
1872
1
  const ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
1873
1
  ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1874
1
  ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0));
1875
1
  ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input1));
1876
1
  ccv_cnnp_model_io_t var = ccv_cnnp_model_apply(ccv_cnnp_variable(CPU_TENSOR_NHWC(32F, 2), "var"), MODEL_IO_LIST());
1877
1
  ccv_cnnp_model_io_t var0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(0), DIM_ALLOC(1), "first reshape"), MODEL_IO_LIST(var));
1878
1
  ccv_cnnp_model_io_t var1 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(1), DIM_ALLOC(1), "second reshape"), MODEL_IO_LIST(var));
1879
1
  ccv_cnnp_model_io_t move0 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out0, var0));
1880
1
  ccv_cnnp_model_io_t move1 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out1, var1));
1881
1
  ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0, 1, "linear");
1882
1
  ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(var));
1883
1
  ccv_cnnp_model_add_dependencies(out1_final, MODEL_IO_LIST(move0, move1));
1884
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(out1_final), 0, "tiny");
1885
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1886
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1887
1
  ccv_nnc_tensor_t* const z = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1888
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
1889
1
  ccv_nnc_tensor_param_t input2_params = CPU_TENSOR_NHWC(32F, 1);
1890
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params, input2_params), CMD_NOOP(), CMD_NOOP());
1891
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1892
1
  ccv_nnc_tensor_t* const t0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
1893
1
  t0->data.f32[0] = 2.4;
1894
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear0, ALL_PARAMETERS, 0), t0);
1895
1
  ccv_nnc_tensor_t* const t1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
1896
1
  t1->data.f32[0] = -1.1;
1897
1
  t1->data.f32[1] = 1.2;
1898
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear1, ALL_PARAMETERS, 0), t1);
1899
1
  x->data.f32[0] = 10;
1900
1
  y->data.f32[0] = 3;
1901
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, y), TENSOR_LIST(z), 0, 0);
1902
1
  REQUIRE_EQ_WITH_TOLERANCE(z->data.f32[0], -1.1 * 2.4 * 10 + 3 * 2.4 * 1.2, 1e-5, "should be equal to expected value");
1903
1
  ccv_nnc_tensor_free(x);
1904
1
  ccv_nnc_tensor_free(t0);
1905
1
  ccv_nnc_tensor_free(t1);
1906
1
  ccv_nnc_tensor_free(y);
1907
1
  ccv_nnc_tensor_free(z);
1908
1
  ccv_cnnp_model_free(final);
1909
1
}
1910
1911
TEST_CASE("use contiguous to make certain tensor contiguous during model inference")
1912
1
{
1913
1
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
1914
1
  ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(4, 1, 0, 1, "linear");
1915
1
  ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(x));
1916
  // Get the middle 2, and then apply GELU, which in Float32 / CPU, requires to be contiguous for now.
1917
1
  ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(2, 2), DIM_ALLOC(0, 2), DIM_ALLOC(4, 1), "reshape"), MODEL_IO_LIST(y));
1918
  /* Using just data transfer is not enough.
1919
  ccv_cnnp_model_io_t moved = ccv_cnnp_model_apply(ccv_cnnp_variable(CPU_TENSOR_NHWC(32F, 2, 2), 0), MODEL_IO_LIST());
1920
  ccv_cnnp_model_io_t y_copied = ccv_cnnp_model_apply(ccv_cnnp_move(0), MODEL_IO_LIST(y0, moved));
1921
  ccv_cnnp_model_io_t z = ccv_cnnp_model_apply(ccv_cnnp_sigmoid("sigmoid"), MODEL_IO_LIST(y_copied));
1922
  */
1923
  // Have to use the new contiguous model.
1924
1
  ccv_cnnp_model_io_t y_copied = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0));
1925
1
  ccv_cnnp_model_io_t z = ccv_cnnp_model_apply(ccv_cnnp_sigmoid("sigmoid"), MODEL_IO_LIST(y_copied));
1926
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 1), 0);
1927
1
  ccv_nnc_tensor_t* const z_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
1928
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(z), 0, "tiny");
1929
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP());
1930
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1931
1
  ccv_nnc_tensor_t* const t0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
1932
1
  t0->data.f32[0] = 2.4;
1933
1
  t0->data.f32[1] = -0.4;
1934
1
  t0->data.f32[2] = 1.2;
1935
1
  t0->data.f32[3] = -3.6;
1936
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear0, ALL_PARAMETERS, 0), t0);
1937
1
  x_tensor->data.f32[0] = 1;
1938
1
  x_tensor->data.f32[1] = -1;
1939
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(z_tensor), 0, 0);
1940
1
  REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[0], 1.0 / (1.0 + exp(-1.2)), 1e-5, "should be equal to expected value");
1941
1
  REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[1], 1.0 / (1.0 + exp(3.6)), 1e-5, "should be equal to expected value");
1942
1
  REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[2], 1.0 / (1.0 + exp(1.2)), 1e-5, "should be equal to expected value");
1943
1
  REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[3], 1.0 / (1.0 + exp(-3.6)), 1e-5, "should be equal to expected value");
1944
1
  ccv_nnc_tensor_free(x_tensor);
1945
1
  ccv_nnc_tensor_free(t0);
1946
1
  ccv_nnc_tensor_free(z_tensor);
1947
1
  ccv_cnnp_model_free(final);
1948
1
}
1949
1950
TEST_CASE("chunk a tensor into several smaller ones, variant 1")
1951
1
{
1952
1
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
1953
1
  ccv_cnnp_model_t* const chunk = ccv_cnnp_chunk(2, 1, "chunk");
1954
1
  ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(chunk, MODEL_IO_LIST(x));
1955
1
  ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(y));
1956
1
  ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0));
1957
1
  ccv_cnnp_model_io_t y1 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(y));
1958
1
  ccv_cnnp_model_io_t o1 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y1));
1959
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(o0, o1), 0, "tiny");
1960
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 4), 0);
1961
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP());
1962
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
1963
1
  x_tensor->data.f32[0] = 1;
1964
1
  x_tensor->data.f32[1] = -1;
1965
1
  x_tensor->data.f32[2] = 2;
1966
1
  x_tensor->data.f32[3] = 3;
1967
1
  x_tensor->data.f32[4] = 4;
1968
1
  x_tensor->data.f32[5] = 5;
1969
1
  x_tensor->data.f32[6] = 6;
1970
1
  x_tensor->data.f32[7] = 7;
1971
1
  ccv_nnc_tensor_t* const y0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
1972
1
  ccv_nnc_tensor_t* const y1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
1973
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(y0_tensor, y1_tensor), 0, 0);
1974
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[0], 1, 1e-5, "should be equal to expected value");
1975
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[1], -1, 1e-5, "should be equal to expected value");
1976
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[2], 4, 1e-5, "should be equal to expected value");
1977
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[3], 5, 1e-5, "should be equal to expected value");
1978
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[0], 2, 1e-5, "should be equal to expected value");
1979
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[1], 3, 1e-5, "should be equal to expected value");
1980
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[2], 6, 1e-5, "should be equal to expected value");
1981
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[3], 7, 1e-5, "should be equal to expected value");
1982
1
  ccv_nnc_tensor_free(x_tensor);
1983
1
  ccv_nnc_tensor_free(y0_tensor);
1984
1
  ccv_nnc_tensor_free(y1_tensor);
1985
1
  ccv_cnnp_model_free(final);
1986
1
}
1987
1988
TEST_CASE("chunk a tensor into several smaller ones, variant 2")
1989
1
{
1990
1
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
1991
1
  ccv_cnnp_model_t* const chunk = ccv_cnnp_chunk(2, 0, "chunk");
1992
1
  ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(chunk, MODEL_IO_LIST(x));
1993
1
  ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(y));
1994
1
  ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0));
1995
1
  ccv_cnnp_model_io_t y1 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(y));
1996
1
  ccv_cnnp_model_io_t o1 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y1));
1997
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(o0, o1), 0, "tiny");
1998
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 4), 0);
1999
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP());
2000
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
2001
1
  x_tensor->data.f32[0] = 1;
2002
1
  x_tensor->data.f32[1] = -1;
2003
1
  x_tensor->data.f32[2] = 2;
2004
1
  x_tensor->data.f32[3] = 3;
2005
1
  x_tensor->data.f32[4] = 4;
2006
1
  x_tensor->data.f32[5] = 5;
2007
1
  x_tensor->data.f32[6] = 6;
2008
1
  x_tensor->data.f32[7] = 7;
2009
1
  ccv_nnc_tensor_t* const y0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 4), 0);
2010
1
  ccv_nnc_tensor_t* const y1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 4), 0);
2011
1
  ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(y0_tensor, y1_tensor), 0, 0);
2012
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[0], 1, 1e-5, "should be equal to expected value");
2013
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[1], -1, 1e-5, "should be equal to expected value");
2014
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[2], 2, 1e-5, "should be equal to expected value");
2015
1
  REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[3], 3, 1e-5, "should be equal to expected value");
2016
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[0], 4, 1e-5, "should be equal to expected value");
2017
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[1], 5, 1e-5, "should be equal to expected value");
2018
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[2], 6, 1e-5, "should be equal to expected value");
2019
1
  REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[3], 7, 1e-5, "should be equal to expected value");
2020
1
  ccv_nnc_tensor_free(x_tensor);
2021
1
  ccv_nnc_tensor_free(y0_tensor);
2022
1
  ccv_nnc_tensor_free(y1_tensor);
2023
1
  ccv_cnnp_model_free(final);
2024
1
}
2025
2026
TEST_CASE("LoRA fine-tuning GEMM set is_trainable to false and with gradient checkpointing")
2027
1
{
2028
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
2029
1
  ccv_cnnp_model_t* const linear = ccv_cnnp_dense(10, 1, 0, -1, "linear");
2030
1
  ccv_cnnp_model_t* const down = ccv_cnnp_dense(2, 1, 0, 1, "down");
2031
1
  ccv_cnnp_model_t* const up = ccv_cnnp_dense(10, 1, 0, 1, "up");
2032
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input));
2033
1
  ccv_cnnp_model_io_t out_down = ccv_cnnp_model_apply(down, MODEL_IO_LIST(input));
2034
1
  ccv_cnnp_model_io_t out_up = ccv_cnnp_model_apply(up, MODEL_IO_LIST(out_down));
2035
1
  ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum");
2036
1
  ccv_cnnp_model_io_t out_final = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out, out_up));
2037
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out_final), 0, "tiny");
2038
1
  ccv_cnnp_model_set_gradient_checkpointing(final, 1);
2039
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
2040
1
  ccv_nnc_tensor_t* const tlinear = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0);
2041
1
  int i;
2042
101
  for (i = 0; i < 10 * 10; 
i++100
)
2043
100
    tlinear->data.f32[i] = (i / 10 == i % 10) ? 
110
:
090
;
2044
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 2), 0);
2045
21
  for (i = 0; i < 10 * 2; 
i++20
)
2046
20
    t->data.f32[i] = 0;
2047
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
2048
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10);
2049
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_SGD_FORWARD(1, 0.01, 1, 0.1, 0, 0), CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN));
2050
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), tlinear);
2051
1
  ccv_nnc_tensor_free(tlinear);
2052
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up, ALL_PARAMETERS, 0), t);
2053
1
  ccv_nnc_tensor_free(t);
2054
11
  for (i = 0; i < 10; 
i++10
)
2055
10
    x->data.f32[i] = i;
2056
1
  ccv_nnc_tensor_t* const target = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
2057
11
  for (i = 0; i < 10; 
i++10
)
2058
10
    target->data.f32[i] = 10 - i;
2059
11
  for (i = 0; i < 10; 
i++10
)
2060
10
    ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0);
2061
1
  ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0);
2062
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
2063
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, y->data.f32, target->data.f32, 10, 1e-2, "should match the target after fine-tuning");
2064
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable");
2065
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(down), 1, "should be marked as trainable");
2066
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(up), 1, "should be marked as trainable");
2067
1
  ccv_nnc_tensor_free(x);
2068
1
  ccv_nnc_tensor_free(target);
2069
1
  ccv_nnc_tensor_free(y);
2070
1
  ccv_cnnp_model_free(final);
2071
1
}
2072
2073
TEST_CASE("LoRA fine-tuning MLP with GELU, set is_trainable to false and with gradient checkpointing")
2074
1
{
2075
1
  ccv_nnc_stream_context_set_seed(0, 47);
2076
1
  const ccv_cnnp_model_io_t input = ccv_cnnp_input();
2077
1
  ccv_cnnp_model_t* const fc1 = ccv_cnnp_dense(10, 1, 0, -1, "fc1");
2078
1
  ccv_cnnp_model_t* const fc2 = ccv_cnnp_dense(10, 1, 0, -1, "fc2");
2079
1
  ccv_cnnp_model_t* const down_fc1 = ccv_cnnp_dense(2, 1, 0, 1, "down_fc1");
2080
1
  ccv_cnnp_model_t* const up_fc1 = ccv_cnnp_dense(10, 1, 0, 1, "up_fc1");
2081
1
  ccv_cnnp_model_t* const down_fc2 = ccv_cnnp_dense(2, 1, 0, 1, "down_fc2");
2082
1
  ccv_cnnp_model_t* const up_fc2 = ccv_cnnp_dense(10, 1, 0, 1, "up_fc2");
2083
1
  ccv_cnnp_model_t* const fc3 = ccv_cnnp_dense(5, 1, 0, -1, "fc3");
2084
1
  ccv_cnnp_model_t* const down_fc3 = ccv_cnnp_dense(2, 1, 0, 1, "down_fc3");
2085
1
  ccv_cnnp_model_t* const up_fc3 = ccv_cnnp_dense(5, 1, 0, 1, "up_fc3");
2086
1
  ccv_cnnp_model_io_t out_fc1 = ccv_cnnp_model_apply(fc1, MODEL_IO_LIST(input));
2087
1
  ccv_cnnp_model_io_t out_fc2 = ccv_cnnp_model_apply(fc2, MODEL_IO_LIST(input));
2088
1
  ccv_cnnp_model_io_t out_down_fc1 = ccv_cnnp_model_apply(down_fc1, MODEL_IO_LIST(input));
2089
1
  ccv_cnnp_model_io_t out_up_fc1 = ccv_cnnp_model_apply(up_fc1, MODEL_IO_LIST(out_down_fc1));
2090
1
  ccv_cnnp_model_io_t out_down_fc2 = ccv_cnnp_model_apply(down_fc2, MODEL_IO_LIST(input));
2091
1
  ccv_cnnp_model_io_t out_up_fc2 = ccv_cnnp_model_apply(up_fc2, MODEL_IO_LIST(out_down_fc2));
2092
1
  ccv_cnnp_model_io_t out_sum_fc1 = ccv_cnnp_model_apply(ccv_cnnp_sum("sum_fc1"), MODEL_IO_LIST(out_fc1, out_up_fc1));
2093
1
  ccv_cnnp_model_io_t out_sum_fc2 = ccv_cnnp_model_apply(ccv_cnnp_sum("sum_fc2"), MODEL_IO_LIST(out_fc2, out_up_fc2));
2094
1
  ccv_cnnp_model_io_t out_gelu_fc2 = ccv_cnnp_model_apply(ccv_cnnp_gelu(0, "gelu_fc2"), MODEL_IO_LIST(out_sum_fc2));
2095
1
  ccv_cnnp_model_io_t out_mul_fc12 = ccv_cnnp_model_apply(ccv_cnnp_mul(1, "mul_fc12"), MODEL_IO_LIST(out_sum_fc1, out_gelu_fc2));
2096
1
  ccv_cnnp_model_io_t out_fc3 = ccv_cnnp_model_apply(fc3, MODEL_IO_LIST(out_mul_fc12));
2097
1
  ccv_cnnp_model_io_t out_down_fc3 = ccv_cnnp_model_apply(down_fc3, MODEL_IO_LIST(out_mul_fc12));
2098
1
  ccv_cnnp_model_io_t out_up_fc3 = ccv_cnnp_model_apply(up_fc3, MODEL_IO_LIST(out_down_fc3));
2099
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_sum("sum_fc3"), MODEL_IO_LIST(out_fc3, out_up_fc3));
2100
1
  ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out), 0, "tiny");
2101
1
  ccv_cnnp_model_set_gradient_checkpointing(final, 1);
2102
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
2103
1
  ccv_nnc_tensor_t* const tlinear = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0);
2104
1
  int i;
2105
101
  for (i = 0; i < 10 * 10; 
i++100
)
2106
100
    tlinear->data.f32[i] = (i / 10 == i % 10) ? 
110
:
090
;
2107
1
  ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 2), 0);
2108
21
  for (i = 0; i < 10 * 2; 
i++20
)
2109
20
    t->data.f32[i] = 0;
2110
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5), 0);
2111
1
  ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10);
2112
1
  ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_SGD_FORWARD(1, 0.001, 1, 0.1, 0, 0), CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN));
2113
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(fc1, ALL_PARAMETERS, 0), tlinear);
2114
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(fc2, ALL_PARAMETERS, 0), tlinear);
2115
1
  ccv_nnc_tensor_free(tlinear);
2116
1
  ccv_nnc_tensor_t* const tlinear_fc3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 10), 0);
2117
51
  for (i = 0; i < 5 * 10; 
i++50
)
2118
50
    tlinear_fc3->data.f32[i] = (i / 10 == i % 10) ? 
15
:
045
;
2119
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(fc3, ALL_PARAMETERS, 0), tlinear_fc3);
2120
1
  ccv_nnc_tensor_free(tlinear_fc3);
2121
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up_fc1, ALL_PARAMETERS, 0), t);
2122
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up_fc2, ALL_PARAMETERS, 0), t);
2123
1
  ccv_nnc_tensor_free(t);
2124
1
  ccv_nnc_tensor_t* const t_fc3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 2), 0);
2125
11
  for (i = 0; i < 5 * 2; 
i++10
)
2126
10
    t_fc3->data.f32[i] = 0;
2127
1
  ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up_fc3, ALL_PARAMETERS, 0), t_fc3);
2128
1
  ccv_nnc_tensor_free(t_fc3);
2129
11
  for (i = 0; i < 10; 
i++10
)
2130
10
    x->data.f32[i] = i;
2131
1
  ccv_nnc_tensor_t* const target = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5), 0);
2132
6
  for (i = 0; i < 5; 
i++5
)
2133
5
    target->data.f32[i] = 5 - i;
2134
101
  for (i = 0; i < 100; 
i++100
)
2135
100
    ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0);
2136
1
  ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0);
2137
1
  CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
2138
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, y->data.f32, target->data.f32, 5, 1e-1, "should match the target after fine-tuning");
2139
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable");
2140
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(down_fc1), 1, "should be marked as trainable");
2141
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(up_fc1), 1, "should be marked as trainable");
2142
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(down_fc2), 1, "should be marked as trainable");
2143
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(up_fc2), 1, "should be marked as trainable");
2144
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(down_fc3), 1, "should be marked as trainable");
2145
1
  REQUIRE_EQ(ccv_cnnp_model_is_trainable(up_fc3), 1, "should be marked as trainable");
2146
1
  ccv_nnc_tensor_free(x);
2147
1
  ccv_nnc_tensor_free(target);
2148
1
  ccv_nnc_tensor_free(y);
2149
1
  ccv_cnnp_model_free(final);
2150
1
}
2151
2152
#include "case_main.h"