/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/cnnp.core.tests.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include "3rdparty/dsfmt/dSFMT.h" |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | static ccv_cnnp_model_t* simple_cifar_10(void) |
15 | 2 | { |
16 | 2 | return ccv_cnnp_sequential_new(MODEL_LIST( |
17 | 2 | ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), |
18 | 2 | ccv_cnnp_relu(0), |
19 | 2 | ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0), |
20 | 2 | ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), |
21 | 2 | ccv_cnnp_relu(0), |
22 | 2 | ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0), |
23 | 2 | ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), |
24 | 2 | ccv_cnnp_relu(0), |
25 | 2 | ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0), |
26 | 2 | ccv_cnnp_flatten(0), |
27 | 2 | ccv_cnnp_dense(256, 0, 0, 1, 0), |
28 | 2 | ccv_cnnp_relu(0), |
29 | 2 | ccv_cnnp_dense(10, 0, 0, 1, 0), |
30 | 2 | ccv_cnnp_softmax(0) |
31 | 2 | ), 1, 0); |
32 | 2 | } |
33 | | |
34 | | TEST_CASE("compile simple cifar-10 model") |
35 | 1 | { |
36 | 1 | ccv_cnnp_model_t* const sequential0 = simple_cifar_10(); |
37 | 1 | ccv_cnnp_model_t* const sequential = ccv_cnnp_model_copy(sequential0, 1); |
38 | 1 | ccv_cnnp_model_free(sequential0); |
39 | 1 | const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3); |
40 | 1 | ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD()); |
41 | 1 | ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0); |
42 | 1 | dsfmt_t dsfmt; |
43 | 1 | int i; |
44 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
45 | 2.88k | for (i = 0; i < 31 * 31 * 3; i++2.88k ) |
46 | 2.88k | input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
47 | 1 | ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0); |
48 | 1 | memset(output_tensor->data.f32, 0, sizeof(float) * 10); |
49 | 1 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
50 | 1 | .is_test = 1 |
51 | 1 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
52 | 1 | int t = 0; |
53 | 1 | float max = output_tensor->data.f32[0]; |
54 | 10 | for (i = 1; i < 10; i++9 ) |
55 | 9 | if (output_tensor->data.f32[i] > max) |
56 | 2 | max = output_tensor->data.f32[i], t = i; |
57 | 1 | const int target = (t + 1) % 10; |
58 | 1 | REQUIRE_NOT_EQ(target, t, "should not fit"); |
59 | | // Doing training. |
60 | 1 | ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
61 | 1 | fit_tensor->data.f32[0] = target; |
62 | 101 | for (i = 0; i < 100; i++100 ) |
63 | 100 | ccv_cnnp_model_fit(sequential, TENSOR_LIST(input_tensor), TENSOR_LIST(fit_tensor), TENSOR_LIST(output_tensor), 0, 0); |
64 | 1 | memset(output_tensor->data.f32, 0, sizeof(float) * 10); |
65 | | // After training, it should fit. |
66 | 1 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
67 | 1 | .is_test = 1 |
68 | 1 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
69 | 1 | t = 0; |
70 | 1 | max = output_tensor->data.f32[0]; |
71 | 10 | for (i = 1; i < 10; i++9 ) |
72 | 9 | if (output_tensor->data.f32[i] > max) |
73 | 5 | max = output_tensor->data.f32[i], t = i; |
74 | 1 | REQUIRE_EQ(target, t, "should fit"); |
75 | 1 | remove("/tmp/compile_simple_cifar_10_model.checkpoint"); |
76 | 1 | ccv_cnnp_model_write_to_file(sequential, "/tmp/compile_simple_cifar_10_model.checkpoint", 0); |
77 | 1 | CNNP_MODEL_GEN(sequential, CCV_NNC_LONG_DOT_GRAPH); |
78 | 1 | ccv_cnnp_model_free(sequential); |
79 | 1 | ccv_cnnp_model_t* const sequential2 = simple_cifar_10(); |
80 | 1 | ccv_cnnp_model_compile(sequential2, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD()); |
81 | | // Load from the checkpoint file. |
82 | 1 | ccv_cnnp_model_read_from_file("/tmp/compile_simple_cifar_10_model.checkpoint", 0, sequential2); |
83 | 1 | remove("/tmp/compile_simple_cifar_10_model.checkpoint"); |
84 | 1 | memset(output_tensor->data.f32, 0, sizeof(float) * 10); |
85 | 1 | ccv_cnnp_model_evaluate(sequential2, (ccv_cnnp_evaluate_param_t){ |
86 | 1 | .is_test = 1 |
87 | 1 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
88 | 1 | t = 0; |
89 | 1 | max = output_tensor->data.f32[0]; |
90 | 10 | for (i = 1; i < 10; i++9 ) |
91 | 9 | if (output_tensor->data.f32[i] > max) |
92 | 5 | max = output_tensor->data.f32[i], t = i; |
93 | 1 | REQUIRE_EQ(target, t, "should fit"); |
94 | 1 | ccv_cnnp_model_free(sequential2); |
95 | 1 | ccv_nnc_tensor_free(input_tensor); |
96 | 1 | ccv_nnc_tensor_free(fit_tensor); |
97 | 1 | ccv_nnc_tensor_free(output_tensor); |
98 | 1 | } |
99 | | |
100 | | static int _ccv_cnnp_model_notified = 0; |
101 | | |
102 | | static void _ccv_cnnp_model_hook(const ccv_cnnp_model_t* const model, const int tag, void* const payload, void* const context) |
103 | 3 | { |
104 | 3 | if (payload) |
105 | 3 | ++_ccv_cnnp_model_notified; |
106 | 3 | } |
107 | | |
108 | | TEST_CASE("inception layer for model") |
109 | 1 | { |
110 | 1 | const ccv_cnnp_model_io_t x = ccv_cnnp_input(); |
111 | 1 | _ccv_cnnp_model_notified = 0; |
112 | 1 | ccv_cnnp_model_t* const conv_1 = ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0); |
113 | 1 | ccv_cnnp_model_notify_hook(conv_1, _ccv_cnnp_model_hook, 0); |
114 | 1 | ccv_cnnp_model_io_t tower_1 = ccv_cnnp_model_apply(conv_1, MODEL_IO_LIST(x)); |
115 | 1 | ccv_cnnp_model_t* const relu_1 = ccv_cnnp_relu(0); |
116 | 1 | ccv_cnnp_model_notify_hook(relu_1, _ccv_cnnp_model_hook, 0); |
117 | 1 | tower_1 = ccv_cnnp_model_apply(relu_1, MODEL_IO_LIST(tower_1)); |
118 | 1 | tower_1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, 1, 0), MODEL_IO_LIST(tower_1)); |
119 | 1 | tower_1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_1)); |
120 | | |
121 | 1 | ccv_cnnp_model_io_t tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(x)); |
122 | 1 | tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2)); |
123 | 1 | tower_2 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), MODEL_IO_LIST(tower_2)); |
124 | 1 | tower_2 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_2)); |
125 | | |
126 | 1 | ccv_cnnp_model_io_t tower_3 = ccv_cnnp_model_apply(ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((1, 1), (1, 1)), 0), MODEL_IO_LIST(x)); |
127 | 1 | tower_3 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(tower_3)); |
128 | 1 | tower_3 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(tower_3)); |
129 | 1 | ccv_cnnp_model_t* const add_1 = ccv_cnnp_sum(0); |
130 | 1 | ccv_cnnp_model_notify_hook(add_1, _ccv_cnnp_model_hook, 0); |
131 | 1 | ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(add_1, MODEL_IO_LIST(tower_1, tower_2, tower_3)); |
132 | 1 | REQUIRE_EQ(0, _ccv_cnnp_model_notified, "haven't notified"); |
133 | 1 | ccv_cnnp_model_t* const inception0 = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(output), 1, 0); |
134 | 1 | ccv_cnnp_model_notify(inception0, 0, inception0); |
135 | 1 | ccv_cnnp_model_t* const inception = ccv_cnnp_model_copy(inception0, 1); |
136 | 1 | REQUIRE_EQ(3, _ccv_cnnp_model_notified, "3 models changed owner"); |
137 | 1 | ccv_cnnp_model_free(inception0); |
138 | 1 | const ccv_nnc_tensor_param_t input = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256); |
139 | 1 | ccv_cnnp_model_compile(inception, &input, 1, CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD()); |
140 | 1 | CNNP_MODEL_GEN(inception, CCV_NNC_LONG_DOT_GRAPH); |
141 | 1 | ccv_cnnp_model_free(inception); |
142 | 1 | } |
143 | | |
144 | | static ccv_cnnp_model_t* _ccv_multiple_outputs_functional_model(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context) |
145 | 1 | { |
146 | 1 | ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
147 | 1 | ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
148 | 1 | ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(input0)); |
149 | 1 | output0 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output0)); |
150 | 1 | ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, 1, 0), MODEL_IO_LIST(input1)); |
151 | 1 | output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1)); |
152 | 1 | ccv_cnnp_model_t* model0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(output0, output1), 1, 0); |
153 | 1 | input0 = ccv_cnnp_input(); |
154 | 1 | input1 = ccv_cnnp_input(); |
155 | 1 | output0 = ccv_cnnp_model_apply(model0, MODEL_IO_LIST(input0, input1)); |
156 | 1 | ccv_cnnp_model_io_t input2 = ccv_cnnp_input(); |
157 | 1 | output1 = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), MODEL_IO_LIST(input2)); |
158 | 1 | output1 = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(output1)); |
159 | 1 | ccv_cnnp_model_t* interim = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0, output1), 1, 0); |
160 | 1 | input0 = ccv_cnnp_input(); |
161 | 1 | input1 = ccv_cnnp_input(); |
162 | 1 | input2 = ccv_cnnp_input(); |
163 | 1 | output0 = ccv_cnnp_model_apply(interim, MODEL_IO_LIST(input0, input1, input2)); |
164 | 1 | output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0)); |
165 | 1 | return ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2), MODEL_IO_LIST(output0), 1, 0); |
166 | 1 | } |
167 | | |
168 | | TEST_CASE("functional model's IO can represent multiple outputs") |
169 | 1 | { |
170 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_dynamic_new(_ccv_multiple_outputs_functional_model, 0, 0); |
171 | 1 | const ccv_nnc_tensor_param_t a0 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256); |
172 | 1 | const ccv_nnc_tensor_param_t a1 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256); |
173 | 1 | const ccv_nnc_tensor_param_t a2 = GPU_TENSOR_NCHW(000, 32F, 1, 3, 256, 256); |
174 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD()); |
175 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
176 | 1 | ccv_cnnp_model_free(final); |
177 | 1 | } |
178 | | |
179 | | TEST_CASE("functional model's IO outputs can be non-terminal") |
180 | 1 | { |
181 | 1 | ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
182 | 1 | ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
183 | 1 | ccv_cnnp_model_io_t input2 = ccv_cnnp_input(); |
184 | 1 | ccv_cnnp_model_io_t input3 = ccv_cnnp_input(); |
185 | 1 | ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(input0, input1)); |
186 | 1 | ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_mul(1, 0), MODEL_IO_LIST(output0, input2)); |
187 | 1 | output1 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output1, input3)); |
188 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2, input3), MODEL_IO_LIST(output0, output1), 1, 0); |
189 | 1 | const ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1); |
190 | 1 | const ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1); |
191 | 1 | const ccv_nnc_tensor_param_t a2 = CPU_TENSOR_NCHW(32F, 1); |
192 | 1 | const ccv_nnc_tensor_param_t a3 = CPU_TENSOR_NCHW(32F, 1); |
193 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2, a3), CMD_NOOP(), CMD_NOOP()); |
194 | 1 | ccv_nnc_tensor_t* const a0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
195 | 1 | ccv_nnc_tensor_t* const a1_tensor = ccv_nnc_tensor_new(0, a1, 0); |
196 | 1 | ccv_nnc_tensor_t* const a2_tensor = ccv_nnc_tensor_new(0, a2, 0); |
197 | 1 | ccv_nnc_tensor_t* const a3_tensor = ccv_nnc_tensor_new(0, a3, 0); |
198 | 1 | ccv_nnc_tensor_t* const b0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
199 | 1 | ccv_nnc_tensor_t* const b1_tensor = ccv_nnc_tensor_new(0, a0, 0); |
200 | 1 | a0_tensor->data.f32[0] = 0.5; |
201 | 1 | a1_tensor->data.f32[0] = 0.75; |
202 | 1 | a2_tensor->data.f32[0] = 1.75; |
203 | 1 | a3_tensor->data.f32[0] = 2.5; |
204 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
205 | 1 | .is_test = 1 |
206 | 1 | }, TENSOR_LIST(a0_tensor, a1_tensor, a2_tensor, a3_tensor), TENSOR_LIST(b0_tensor, b1_tensor), 0, 0); |
207 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
208 | 1 | REQUIRE_EQ_WITH_TOLERANCE(b0_tensor->data.f32[0], 0.5 + 0.75, 1e-5, "should match the intermediate result"); |
209 | 1 | REQUIRE_EQ_WITH_TOLERANCE(b1_tensor->data.f32[0], (0.5 + 0.75) * 1.75 + 2.5, 1e-5, "should match the final result"); |
210 | 1 | ccv_cnnp_model_free(final); |
211 | 1 | ccv_nnc_tensor_free(a0_tensor); |
212 | 1 | ccv_nnc_tensor_free(a1_tensor); |
213 | 1 | ccv_nnc_tensor_free(a2_tensor); |
214 | 1 | ccv_nnc_tensor_free(a3_tensor); |
215 | 1 | ccv_nnc_tensor_free(b0_tensor); |
216 | 1 | ccv_nnc_tensor_free(b1_tensor); |
217 | 1 | } |
218 | | |
219 | | TEST_CASE("functional model's IO can introduce non-functional dependencies") |
220 | 1 | { |
221 | 1 | ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
222 | 1 | ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
223 | 1 | ccv_cnnp_model_io_t input2 = ccv_cnnp_input(); |
224 | 1 | ccv_cnnp_model_io_t input3 = ccv_cnnp_input(); |
225 | 1 | ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(input0, input1)); |
226 | 1 | ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(ccv_cnnp_mul(1, 0), MODEL_IO_LIST(input2, input3)); |
227 | | // non-functional dependency. |
228 | 1 | ccv_cnnp_model_add_dependencies(output1, MODEL_IO_LIST(output0)); |
229 | 1 | output1 = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0, output1)); |
230 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, input2, input3), MODEL_IO_LIST(output0, output1), 1, 0); |
231 | 1 | const ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1); |
232 | 1 | const ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1); |
233 | 1 | const ccv_nnc_tensor_param_t a2 = CPU_TENSOR_NCHW(32F, 1); |
234 | 1 | const ccv_nnc_tensor_param_t a3 = CPU_TENSOR_NCHW(32F, 1); |
235 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, a2, a3), CMD_NOOP(), CMD_NOOP()); |
236 | 1 | ccv_nnc_tensor_t* const a0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
237 | 1 | ccv_nnc_tensor_t* const a1_tensor = ccv_nnc_tensor_new(0, a1, 0); |
238 | 1 | ccv_nnc_tensor_t* const a2_tensor = ccv_nnc_tensor_new(0, a2, 0); |
239 | 1 | ccv_nnc_tensor_t* const a3_tensor = ccv_nnc_tensor_new(0, a3, 0); |
240 | 1 | ccv_nnc_tensor_t* const b0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
241 | 1 | ccv_nnc_tensor_t* const b1_tensor = ccv_nnc_tensor_new(0, a0, 0); |
242 | 1 | a0_tensor->data.f32[0] = 0.5; |
243 | 1 | a1_tensor->data.f32[0] = 0.75; |
244 | 1 | a2_tensor->data.f32[0] = 1.75; |
245 | 1 | a3_tensor->data.f32[0] = 2.5; |
246 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
247 | 1 | .is_test = 1 |
248 | 1 | }, TENSOR_LIST(a0_tensor, a1_tensor, a2_tensor, a3_tensor), TENSOR_LIST(b0_tensor, b1_tensor), 0, 0); |
249 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
250 | 1 | REQUIRE_EQ_WITH_TOLERANCE(b0_tensor->data.f32[0], 0.5 + 0.75, 1e-5, "should match the intermediate result"); |
251 | 1 | REQUIRE_EQ_WITH_TOLERANCE(b1_tensor->data.f32[0], (0.5 + 0.75) + (1.75 * 2.5), 1e-5, "should match the final result"); |
252 | 1 | ccv_cnnp_model_free(final); |
253 | 1 | ccv_nnc_tensor_free(a0_tensor); |
254 | 1 | ccv_nnc_tensor_free(a1_tensor); |
255 | 1 | ccv_nnc_tensor_free(a2_tensor); |
256 | 1 | ccv_nnc_tensor_free(a3_tensor); |
257 | 1 | ccv_nnc_tensor_free(b0_tensor); |
258 | 1 | ccv_nnc_tensor_free(b1_tensor); |
259 | 1 | } |
260 | | |
261 | | TEST_CASE("make sure reuse model enables share weights") |
262 | 1 | { |
263 | 1 | ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
264 | 1 | ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
265 | 1 | ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0, 1, 0); |
266 | 1 | ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0)); |
267 | 1 | ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1)); |
268 | 1 | ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(output0, output1)); |
269 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(final_output), 1, 0); |
270 | 1 | ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1); |
271 | 1 | ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1); |
272 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1), CMD_SGD_FORWARD(1, 0.001, 1, 0.99, 0.9, 0), CMD_CATEGORICAL_CROSSENTROPY_FORWARD()); |
273 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
274 | 1 | ccv_cnnp_model_free(final); |
275 | 1 | } |
276 | | |
277 | | TEST_CASE("train model with share weights and L2 loss") |
278 | 1 | { |
279 | 1 | ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
280 | 1 | ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
281 | 1 | ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0, 1, 0); |
282 | 1 | ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0)); |
283 | 1 | ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1)); |
284 | 1 | ccv_cnnp_model_io_t fit0 = ccv_cnnp_input(); |
285 | 1 | ccv_cnnp_model_io_t fit1 = ccv_cnnp_input(); |
286 | | // Because we don't have L2 loss function available yet, manually create L2 loss. |
287 | 1 | ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply( |
288 | 1 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
289 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
290 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
291 | 1 | MODEL_IO_LIST(output0, fit0)); |
292 | 1 | ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply( |
293 | 1 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
294 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
295 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
296 | 1 | MODEL_IO_LIST(diff0, diff0)); |
297 | 1 | ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply( |
298 | 1 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
299 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
300 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
301 | 1 | MODEL_IO_LIST(output1, fit1)); |
302 | 1 | ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply( |
303 | 1 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
304 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
305 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
306 | 1 | MODEL_IO_LIST(diff1, diff1)); |
307 | 1 | ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1)); |
308 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 1, 0); |
309 | 1 | ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1); |
310 | 1 | ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1); |
311 | 1 | ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1); |
312 | 1 | ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1); |
313 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
314 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
315 | 1 | ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
316 | 1 | ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0); |
317 | 1 | ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0); |
318 | 1 | ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0); |
319 | 1 | ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
320 | 1 | a0_tensor->data.f32[0] = 1; |
321 | 1 | a1_tensor->data.f32[0] = 3; |
322 | 1 | b0_tensor->data.f32[0] = 2; |
323 | 1 | b1_tensor->data.f32[0] = 3; |
324 | 1 | int i; |
325 | 11 | for (i = 0; i < 10; i++10 ) |
326 | 10 | ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0); |
327 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0); |
328 | 101 | for (i = 0; i < 100; i++100 ) |
329 | 100 | ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0); |
330 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0); |
331 | 1.00k | for (i = 0; i < 1000; i++1.00k ) |
332 | 1.00k | ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0); |
333 | 1 | a0_tensor->data.f32[0] = 2; |
334 | 1 | a1_tensor->data.f32[0] = 2; // The final result should be 4. |
335 | 1 | b0_tensor->data.f32[0] = 2; // diff is 0.5 |
336 | 1 | b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5. |
337 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
338 | 1 | .is_test = 1 |
339 | 1 | }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0); |
340 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this."); |
341 | 1 | ccv_nnc_tensor_free(a0_tensor); |
342 | 1 | ccv_nnc_tensor_free(a1_tensor); |
343 | 1 | ccv_nnc_tensor_free(b0_tensor); |
344 | 1 | ccv_nnc_tensor_free(b1_tensor); |
345 | 1 | ccv_nnc_tensor_free(o0_tensor); |
346 | 1 | ccv_cnnp_model_free(final); |
347 | 1 | } |
348 | | |
349 | | static ccv_cnnp_model_t* simple_cifar_10_no_softmax(void) |
350 | 2 | { |
351 | 2 | return ccv_cnnp_sequential_new(MODEL_LIST( |
352 | 2 | ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), |
353 | 2 | ccv_cnnp_relu(0), |
354 | 2 | ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0), |
355 | 2 | ccv_cnnp_convolution(1, 32, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), |
356 | 2 | ccv_cnnp_relu(0), |
357 | 2 | ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0), |
358 | 2 | ccv_cnnp_convolution(1, 64, DIM_ALLOC(5, 5), DIM_ALLOC(), 0, HINT((1, 1), (2, 2)), 0, 1, 0), |
359 | 2 | ccv_cnnp_relu(0), |
360 | 2 | ccv_cnnp_average_pool(DIM_ALLOC(3, 3), HINT((2, 2), (0, 0)), 0), |
361 | 2 | ccv_cnnp_flatten(0), |
362 | 2 | ccv_cnnp_dense(256, 0, 0, 1, 0), |
363 | 2 | ccv_cnnp_relu(0), |
364 | 2 | ccv_cnnp_dense(10, 0, 0, 1, 0) |
365 | 2 | ), 1, 0); |
366 | 2 | } |
367 | | |
368 | | TEST_CASE("evaluate cifar-10 model in multi-stage mode") |
369 | 1 | { |
370 | 1 | ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax(); |
371 | 1 | const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3); |
372 | 1 | ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), CMD_NOOP()); |
373 | 1 | ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0); |
374 | 1 | dsfmt_t dsfmt; |
375 | 1 | int i; |
376 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
377 | 2.88k | for (i = 0; i < 31 * 31 * 3; i++2.88k ) |
378 | 2.88k | input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
379 | 1 | ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0); |
380 | 1 | memset(output_tensor->data.f32, 0, sizeof(float) * 10); |
381 | 1 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
382 | 1 | .is_test = 1 |
383 | 1 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
384 | 1 | int t = 0; |
385 | 1 | float max = output_tensor->data.f32[0]; |
386 | 10 | for (i = 1; i < 10; i++9 ) |
387 | 9 | if (output_tensor->data.f32[i] > max) |
388 | 3 | max = output_tensor->data.f32[i], t = i; |
389 | 1 | const int target = (t + 1) % 10; |
390 | 1 | REQUIRE_NOT_EQ(target, t, "should not fit"); |
391 | | // Doing training. |
392 | 1 | ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
393 | 1 | fit_tensor->data.f32[0] = target; |
394 | 1 | ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0); |
395 | 1 | ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
396 | 1 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0); |
397 | 101 | for (i = 0; i < 100; i++100 ) |
398 | 100 | { |
399 | 100 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
400 | 100 | .requires_grad = 1 |
401 | 100 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
402 | 100 | ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0); |
403 | 100 | ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0); |
404 | 100 | ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0); |
405 | 100 | ccv_cnnp_model_apply_gradients(sequential, 0); |
406 | 100 | } |
407 | 1 | memset(output_tensor->data.f32, 0, sizeof(float) * 10); |
408 | | // After training, it should fit. |
409 | 1 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
410 | 1 | .is_test = 1 |
411 | 1 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
412 | 1 | t = 0; |
413 | 1 | max = output_tensor->data.f32[0]; |
414 | 10 | for (i = 1; i < 10; i++9 ) |
415 | 9 | if (output_tensor->data.f32[i] > max) |
416 | 4 | max = output_tensor->data.f32[i], t = i; |
417 | 1 | REQUIRE_EQ(target, t, "should fit"); |
418 | 1 | ccv_nnc_tensor_free(ingrad_tensor); |
419 | 1 | ccv_nnc_tensor_free(fit_tensor); |
420 | 1 | ccv_nnc_tensor_free(softmax_tensor); |
421 | 1 | ccv_nnc_tensor_free(loss_tensor); |
422 | 1 | ccv_nnc_tensor_free(input_tensor); |
423 | 1 | ccv_nnc_tensor_free(output_tensor); |
424 | 1 | ccv_cnnp_model_free(sequential); |
425 | 1 | } |
426 | | |
427 | | TEST_CASE("evaluate cifar-10 model in multi-stage mode with gradient accumulated") |
428 | 1 | { |
429 | 1 | ccv_cnnp_model_t* const sequential = simple_cifar_10_no_softmax(); |
430 | 1 | const ccv_nnc_tensor_param_t input = CPU_TENSOR_NHWC(32F, 1, 31, 31, 3); |
431 | 1 | ccv_cnnp_model_compile(sequential, &input, 1, CMD_SGD_FORWARD(0, 0.00033, 1, 0.99, 0.9, 0.9), CMD_NOOP()); |
432 | 1 | ccv_nnc_tensor_t* const input_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 31, 31, 3), 0); |
433 | 1 | dsfmt_t dsfmt; |
434 | 1 | int i; |
435 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
436 | 2.88k | for (i = 0; i < 31 * 31 * 3; i++2.88k ) |
437 | 2.88k | input_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
438 | 1 | ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0); |
439 | 1 | memset(output_tensor->data.f32, 0, sizeof(float) * 10); |
440 | 1 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
441 | 1 | .is_test = 1 |
442 | 1 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
443 | 1 | int t = 0; |
444 | 1 | float max = output_tensor->data.f32[0]; |
445 | 10 | for (i = 1; i < 10; i++9 ) |
446 | 9 | if (output_tensor->data.f32[i] > max) |
447 | 4 | max = output_tensor->data.f32[i], t = i; |
448 | 1 | const int target = (t + 1) % 10; |
449 | 1 | REQUIRE_NOT_EQ(target, t, "should not fit"); |
450 | | // Doing training. |
451 | 1 | ccv_nnc_tensor_t* const fit_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
452 | 1 | fit_tensor->data.f32[0] = target; |
453 | 1 | ccv_nnc_tensor_t* const softmax_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0); |
454 | 1 | ccv_nnc_tensor_t* const loss_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
455 | 1 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 10), 0); |
456 | 101 | for (i = 0; i < 100; i++100 ) |
457 | 100 | { |
458 | 100 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
459 | 100 | .requires_grad = 1 |
460 | 100 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
461 | 100 | ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(output_tensor, fit_tensor), TENSOR_LIST(loss_tensor, softmax_tensor), 0); |
462 | 100 | ccv_nnc_cmd_exec(CMD_SOFTMAX_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, output_tensor, fit_tensor, loss_tensor, softmax_tensor), TENSOR_LIST(ingrad_tensor), 0); |
463 | 100 | ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0); |
464 | | // Backward again to accumulate gradient. |
465 | 100 | if (i % 2 == 0) |
466 | 50 | { |
467 | 50 | ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0); |
468 | | // Backward again to accumulate gradient. |
469 | 50 | if (i % 3 == 0) |
470 | 17 | ccv_cnnp_model_backward(sequential, TENSOR_LIST(ingrad_tensor), 0, 0, 0, 0); |
471 | 50 | } |
472 | 100 | ccv_cnnp_model_apply_gradients(sequential, 0); |
473 | 100 | } |
474 | 1 | memset(output_tensor->data.f32, 0, sizeof(float) * 10); |
475 | | // After training, it should fit. |
476 | 1 | ccv_cnnp_model_evaluate(sequential, (ccv_cnnp_evaluate_param_t){ |
477 | 1 | .is_test = 1 |
478 | 1 | }, TENSOR_LIST(input_tensor), TENSOR_LIST(output_tensor), 0, 0); |
479 | 1 | t = 0; |
480 | 1 | max = output_tensor->data.f32[0]; |
481 | 10 | for (i = 1; i < 10; i++9 ) |
482 | 9 | if (output_tensor->data.f32[i] > max) |
483 | 4 | max = output_tensor->data.f32[i], t = i; |
484 | 1 | REQUIRE_EQ(target, t, "should fit"); |
485 | 1 | ccv_nnc_tensor_free(ingrad_tensor); |
486 | 1 | ccv_nnc_tensor_free(fit_tensor); |
487 | 1 | ccv_nnc_tensor_free(softmax_tensor); |
488 | 1 | ccv_nnc_tensor_free(loss_tensor); |
489 | 1 | ccv_nnc_tensor_free(input_tensor); |
490 | 1 | ccv_nnc_tensor_free(output_tensor); |
491 | 1 | ccv_cnnp_model_free(sequential); |
492 | 1 | } |
493 | | |
494 | | TEST_CASE("train model with share weights and L2 loss and check out gradients") |
495 | 1 | { |
496 | 1 | ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
497 | 1 | ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
498 | 1 | ccv_cnnp_model_t* const dense = ccv_cnnp_dense(1, 0, 0, 1, 0); |
499 | 1 | ccv_cnnp_model_io_t output0 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input0)); |
500 | 1 | ccv_cnnp_model_io_t output1 = ccv_cnnp_model_apply(dense, MODEL_IO_LIST(input1)); |
501 | 1 | ccv_cnnp_model_io_t fit0 = ccv_cnnp_input(); |
502 | 1 | ccv_cnnp_model_io_t fit1 = ccv_cnnp_input(); |
503 | | // Because we don't have L2 loss function available yet, manually create L2 loss. |
504 | 1 | ccv_cnnp_model_io_t diff0 = ccv_cnnp_model_apply( |
505 | 1 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
506 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
507 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
508 | 1 | MODEL_IO_LIST(output0, fit0)); |
509 | 1 | ccv_cnnp_model_io_t sqr0 = ccv_cnnp_model_apply( |
510 | 1 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
511 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
512 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
513 | 1 | MODEL_IO_LIST(diff0, diff0)); |
514 | 1 | ccv_cnnp_model_io_t diff1 = ccv_cnnp_model_apply( |
515 | 1 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
516 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
517 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
518 | 1 | MODEL_IO_LIST(output1, fit1)); |
519 | 1 | ccv_cnnp_model_io_t sqr1 = ccv_cnnp_model_apply( |
520 | 1 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
521 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
522 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
523 | 1 | MODEL_IO_LIST(diff1, diff1)); |
524 | 1 | ccv_cnnp_model_io_t final_output = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(sqr0, sqr1)); |
525 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1, fit0, fit1), MODEL_IO_LIST(final_output), 1, 0); |
526 | 1 | ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1, 1); |
527 | 1 | ccv_nnc_tensor_param_t a1 = CPU_TENSOR_NCHW(32F, 1, 1); |
528 | 1 | ccv_nnc_tensor_param_t b0 = CPU_TENSOR_NCHW(32F, 1, 1); |
529 | 1 | ccv_nnc_tensor_param_t b1 = CPU_TENSOR_NCHW(32F, 1, 1); |
530 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0, a1, b0, b1), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
531 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
532 | 1 | ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
533 | 1 | ccv_nnc_tensor_t* a1_tensor = ccv_nnc_tensor_new(0, a1, 0); |
534 | 1 | ccv_nnc_tensor_t* b0_tensor = ccv_nnc_tensor_new(0, b0, 0); |
535 | 1 | ccv_nnc_tensor_t* b1_tensor = ccv_nnc_tensor_new(0, b1, 0); |
536 | 1 | ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
537 | | // It should fit to 1*0.5+1.5=2, 3*0.5+1.5=3 |
538 | 1 | a0_tensor->data.f32[0] = 1; |
539 | 1 | a1_tensor->data.f32[0] = 3; |
540 | 1 | b0_tensor->data.f32[0] = 2; |
541 | 1 | b1_tensor->data.f32[0] = 3; |
542 | 1 | int i; |
543 | 11 | for (i = 0; i < 10; i++10 ) |
544 | 10 | ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0); |
545 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0); |
546 | 101 | for (i = 0; i < 100; i++100 ) |
547 | 100 | ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0); |
548 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0); |
549 | 1.00k | for (i = 0; i < 1000; i++1.00k ) |
550 | 1.00k | ccv_cnnp_model_fit(final, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), 0, 0, TENSOR_LIST(o0_tensor), 0, 0); |
551 | 1 | a0_tensor->data.f32[0] = 2; |
552 | 1 | a1_tensor->data.f32[0] = 2; // The final result should be 4. |
553 | 1 | b0_tensor->data.f32[0] = 2; // diff is 0.5 |
554 | 1 | b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5. |
555 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
556 | 1 | .is_test = 1 |
557 | 1 | }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0); |
558 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 0.5, 2 * 1e-2, "We should linear regressed this."); |
559 | | // Figure out the actual weight and bias term in the model. |
560 | 1 | a0_tensor->data.f32[0] = 0; |
561 | 1 | a1_tensor->data.f32[0] = 0; |
562 | 1 | b0_tensor->data.f32[0] = 0; |
563 | 1 | b1_tensor->data.f32[0] = 0; |
564 | | // The output will be 2*bias^2 |
565 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
566 | 1 | .is_test = 1 |
567 | 1 | }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0); |
568 | 1 | const float bias = sqrtf(o0_tensor->data.f32[0] * 0.5); |
569 | 1 | a0_tensor->data.f32[0] = 1; |
570 | 1 | a1_tensor->data.f32[0] = 1; |
571 | 1 | b0_tensor->data.f32[0] = 0; |
572 | 1 | b1_tensor->data.f32[0] = 0; |
573 | | // The output will be 2*(w+bias)^2 |
574 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
575 | 1 | .is_test = 1 |
576 | 1 | }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0); |
577 | 1 | const float w = sqrt(o0_tensor->data.f32[0] * 0.5) - bias; |
578 | | // Compute the out gradient to verify. |
579 | 1 | a0_tensor->data.f32[0] = 2; |
580 | 1 | a1_tensor->data.f32[0] = 2; // The final result should be 4. |
581 | 1 | b0_tensor->data.f32[0] = 2; // diff is 0.5 |
582 | 1 | b1_tensor->data.f32[0] = 3; // diff is 0.5, and 0.5^2 + 0.5^2 = 0.5. |
583 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
584 | 1 | .requires_grad = 1, |
585 | 1 | }, TENSOR_LIST(a0_tensor, a1_tensor, b0_tensor, b1_tensor), TENSOR_LIST(o0_tensor), 0, 0); |
586 | | // Note that I have to use new tensors and have to keep these tensors around since they were binded to the model when evaluate. |
587 | 1 | ccv_nnc_tensor_t* da0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
588 | 1 | ccv_nnc_tensor_t* da1_tensor = ccv_nnc_tensor_new(0, a1, 0); |
589 | 1 | ccv_nnc_tensor_t* db0_tensor = ccv_nnc_tensor_new(0, b0, 0); |
590 | 1 | ccv_nnc_tensor_t* db1_tensor = ccv_nnc_tensor_new(0, b1, 0); |
591 | 1 | ccv_nnc_tensor_t* do0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
592 | 1 | do0_tensor->data.f32[0] = 1; |
593 | 1 | ccv_cnnp_model_backward(final, TENSOR_LIST(do0_tensor), TENSOR_LIST(da0_tensor, da1_tensor, db0_tensor, db1_tensor), 0, 0); |
594 | 1 | REQUIRE_EQ_WITH_TOLERANCE(da0_tensor->data.f32[0], 2 * w * (w * 2 + bias - 2), 1e-5, "da0=2*w*(w*a0+bias-b0), thus, 0.5"); |
595 | 1 | REQUIRE_EQ_WITH_TOLERANCE(da1_tensor->data.f32[0], 2 * w * (w * 2 + bias - 3), 1e-5, "da1=2*w*(w*a1+bias-b1), thus, -0.5"); |
596 | 1 | REQUIRE_EQ_WITH_TOLERANCE(db0_tensor->data.f32[0], -2 * (w * 2 + bias - 2), 1e-5, "db0=-2*(w*a0+bias-b0), thus, -1"); |
597 | 1 | REQUIRE_EQ_WITH_TOLERANCE(db1_tensor->data.f32[0], -2 * (w * 2 + bias - 3), 1e-5, "db1=-2*(w*a1+bias-b1), thus, 1"); |
598 | 1 | ccv_nnc_tensor_free(a0_tensor); |
599 | 1 | ccv_nnc_tensor_free(a1_tensor); |
600 | 1 | ccv_nnc_tensor_free(b0_tensor); |
601 | 1 | ccv_nnc_tensor_free(b1_tensor); |
602 | 1 | ccv_nnc_tensor_free(o0_tensor); |
603 | 1 | ccv_nnc_tensor_free(da0_tensor); |
604 | 1 | ccv_nnc_tensor_free(da1_tensor); |
605 | 1 | ccv_nnc_tensor_free(db0_tensor); |
606 | 1 | ccv_nnc_tensor_free(db1_tensor); |
607 | 1 | ccv_nnc_tensor_free(do0_tensor); |
608 | 1 | ccv_cnnp_model_free(final); |
609 | 1 | } |
610 | | |
611 | | TEST_CASE("apply functional model as forward pass") |
612 | 1 | { |
613 | 1 | ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
614 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), |
615 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))), |
616 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul"); |
617 | 1 | ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
618 | 1 | ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(input)); |
619 | 1 | output = ccv_cnnp_model_apply(mul, MODEL_IO_LIST(output)); |
620 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
621 | 1 | b->data.f32[0] = -1; |
622 | 1 | ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0, |
623 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), |
624 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))), |
625 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add"); |
626 | 1 | output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output)); |
627 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 1, "final"); |
628 | 1 | ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1); |
629 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
630 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
631 | 1 | ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
632 | 1 | ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
633 | 1 | a0_tensor->data.f32[0] = 1.12; |
634 | 1 | o0_tensor->data.f32[0] = 0; |
635 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
636 | 1 | .is_test = 1 |
637 | 1 | }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0); |
638 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1"); |
639 | 1 | ccv_nnc_tensor_free(a0_tensor); |
640 | 1 | ccv_nnc_tensor_free(b); |
641 | 1 | ccv_nnc_tensor_free(o0_tensor); |
642 | 1 | ccv_cnnp_model_free(final); |
643 | 1 | } |
644 | | |
645 | | TEST_CASE("apply sequential model as forward pass") |
646 | 1 | { |
647 | 1 | ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
648 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), |
649 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_set_by(CMD_SET_FORWARD(2.12), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1)))), |
650 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul"); |
651 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
652 | 1 | b->data.f32[0] = -1; |
653 | 1 | ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0, |
654 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), |
655 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))), |
656 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add"); |
657 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_sequential_new(MODEL_LIST(mul, mul, add), 1, "seq"); |
658 | 1 | ccv_nnc_tensor_param_t a0 = CPU_TENSOR_NCHW(32F, 1); |
659 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a0), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
660 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
661 | 1 | ccv_nnc_tensor_t* a0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
662 | 1 | ccv_nnc_tensor_t* o0_tensor = ccv_nnc_tensor_new(0, a0, 0); |
663 | 1 | a0_tensor->data.f32[0] = 1.12; |
664 | 1 | o0_tensor->data.f32[0] = 0; |
665 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
666 | 1 | .is_test = 1 |
667 | 1 | }, TENSOR_LIST(a0_tensor), TENSOR_LIST(o0_tensor), 0, 0); |
668 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o0_tensor->data.f32[0], 1.12 * 2.12 * 2.12 - 1, 1e-5, "all the model building is to compute 1.12 * 2.12 * 2.12 - 1"); |
669 | 1 | ccv_nnc_tensor_free(a0_tensor); |
670 | 1 | ccv_nnc_tensor_free(b); |
671 | 1 | ccv_nnc_tensor_free(o0_tensor); |
672 | 1 | ccv_cnnp_model_free(final); |
673 | 1 | } |
674 | | |
675 | | ccv_cnnp_model_t* _math_2_x_1_1_10(const ccv_nnc_tensor_t* const b) |
676 | 6 | { |
677 | 6 | ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
678 | 6 | MODEL_CMD_EXEC_IO_MAP( |
679 | 6 | KV(CCV_CNNP_IO), |
680 | 6 | KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_set_by(CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, CPU_TENSOR_NCHW(32F, 1))), |
681 | 6 | ), |
682 | 6 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul"); |
683 | 6 | ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0, |
684 | 6 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), |
685 | 6 | KV(CCV_CNNP_INIT_SHARED_TENSOR, ccv_cnnp_cmd_exec_io_copy(b))), |
686 | 6 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add"); |
687 | 6 | ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add, add), 1, "seq"); |
688 | 6 | ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
689 | 6 | ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input)); |
690 | 6 | ccv_cnnp_model_io_t fit = ccv_cnnp_input(); |
691 | | // Because we don't have L2 loss function available yet, manually create L2 loss. |
692 | 6 | ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply( |
693 | 6 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
694 | 6 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
695 | 6 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
696 | 6 | MODEL_IO_LIST(left_out, fit)); |
697 | 6 | ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply( |
698 | 6 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
699 | 6 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
700 | 6 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
701 | 6 | MODEL_IO_LIST(diff, diff)); |
702 | 6 | return ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0); |
703 | 6 | } |
704 | | |
705 | | TEST_CASE("learn simple math of 2 * x + 1 + 1 = 10, x = 4") |
706 | 1 | { |
707 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
708 | 1 | b->data.f32[0] = 1; |
709 | 1 | ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b); |
710 | 1 | const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1); |
711 | 1 | const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1); |
712 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
713 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
714 | 1 | ccv_nnc_tensor_param_t o = {}; |
715 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
716 | 1 | ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0); |
717 | 1 | ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0); |
718 | 1 | ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0); |
719 | 1 | ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0); |
720 | 1 | ccv_nnc_tensor_t* outgrad0 = ccv_nnc_tensor_new(0, a, 0); |
721 | 1 | ccv_nnc_tensor_t* outgrad1 = ccv_nnc_tensor_new(0, f, 0); |
722 | 1 | ingrad->data.f32[0] = 1; |
723 | 1 | a_tensor->data.f32[0] = 2; |
724 | 1 | f_tensor->data.f32[0] = 10; |
725 | 1 | int i; |
726 | 1 | float old_o = 10; |
727 | 11 | for (i = 0; i < 10; i++10 ) |
728 | 10 | { |
729 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
730 | 10 | .requires_grad = 1, |
731 | 10 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
732 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
733 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
734 | 10 | } |
735 | 1 | REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be different"); |
736 | 1 | old_o = o_tensor->data.f32[0]; |
737 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0, 0, 0), 0, 0, 0); // No decay. |
738 | 1 | ingrad->data.f32[0] = 0; // ingrad is 0, no update at all. |
739 | 11 | for (i = 0; i < 10; i++10 ) |
740 | 10 | { |
741 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
742 | 10 | .requires_grad = 1, |
743 | 10 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
744 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(outgrad0, outgrad1), 0, 0); |
745 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
746 | 10 | } |
747 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad"); |
748 | 1 | old_o = o_tensor->data.f32[0]; |
749 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, 0, 0); |
750 | 101 | for (i = 0; i < 100; i++100 ) |
751 | 100 | { |
752 | 100 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
753 | 100 | .requires_grad = 1, |
754 | 100 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
755 | 100 | ccv_cnnp_model_backward(final, TENSOR_LIST(0), TENSOR_LIST(outgrad0, outgrad1), 0, 0); |
756 | 100 | ccv_cnnp_model_apply_gradients(final, 0); |
757 | 100 | } |
758 | 1 | REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 100 iterations, output should be different"); |
759 | 1 | old_o = o_tensor->data.f32[0]; |
760 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0, 0, 0), 0, 0, 0); // No decay. |
761 | | // Note we still use the old ingrad which is 0. |
762 | 11 | for (i = 0; i < 10; i++10 ) |
763 | 10 | { |
764 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
765 | 10 | .requires_grad = 1, |
766 | 10 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
767 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0); |
768 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
769 | 10 | } |
770 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 10 iterations, output should be the same because the ingrad"); |
771 | 1 | ingrad->data.f32[0] = 1; // ingrad reset to 1. |
772 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, 0, 0); |
773 | 1.00k | for (i = 0; i < 1000; i++1.00k ) |
774 | 1.00k | { |
775 | 1.00k | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
776 | 1.00k | .requires_grad = 1, |
777 | 1.00k | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
778 | 1.00k | ccv_cnnp_model_backward(final, TENSOR_LIST(ingrad), TENSOR_LIST(), 0, 0); |
779 | 1.00k | ccv_cnnp_model_apply_gradients(final, 0); |
780 | 1.00k | } |
781 | 1 | REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], old_o, 1e-5, "after 1000 iterations, output should be different"); |
782 | 1 | o_tensor->data.f32[0] = 10; |
783 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
784 | 1 | .is_test = 1, |
785 | 1 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
786 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "(2 * x + 1 + 1 - 10) ^ 2 should equal to 0"); |
787 | 1 | ccv_nnc_tensor_free(a_tensor); |
788 | 1 | ccv_nnc_tensor_free(b); |
789 | 1 | ccv_nnc_tensor_free(f_tensor); |
790 | 1 | ccv_nnc_tensor_free(o_tensor); |
791 | 1 | ccv_nnc_tensor_free(ingrad); |
792 | 1 | ccv_nnc_tensor_free(outgrad0); |
793 | 1 | ccv_nnc_tensor_free(outgrad1); |
794 | 1 | ccv_cnnp_model_free(final); |
795 | 1 | } |
796 | | |
797 | | static int _ccv_cnnp_model_clip_grad_norm_reduce_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
798 | 2 | { |
799 | 2 | ccv_nnc_tensor_t* const old_norm2 = outputs[1]; |
800 | 2 | ccv_nnc_tensor_t* const norm2 = outputs[2]; |
801 | 2 | ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(), hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(norm2), stream_context); |
802 | 2 | ccv_nnc_cmd_exec(CMD_ADD_FORWARD(1, 1), hint, flags, TENSOR_LIST(old_norm2, norm2), TENSOR_LIST(old_norm2), stream_context); |
803 | 2 | return CCV_NNC_EXEC_SUCCESS; |
804 | 2 | } |
805 | | |
806 | | static ccv_nnc_cmd_vtab_t clip_grad_norm_reduce_norm2_vtab = { |
807 | | .exec = _ccv_cnnp_model_clip_grad_norm_reduce_norm2 |
808 | | }; |
809 | | |
810 | | TEST_CASE("learn simple math of 2 * x + 1 + 1 = 10, x = 4 and clip grad to max_norm = 0.5") |
811 | 1 | { |
812 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
813 | 1 | b->data.f32[0] = 1; |
814 | 1 | ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b); |
815 | 1 | const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1); |
816 | 1 | const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1); |
817 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
818 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
819 | 1 | ccv_nnc_tensor_param_t o = {}; |
820 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
821 | 1 | ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0); |
822 | 1 | ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0); |
823 | 1 | ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0); |
824 | 1 | ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0); |
825 | 1 | ccv_nnc_tensor_t* outgrad0 = ccv_nnc_tensor_new(0, a, 0); |
826 | 1 | ccv_nnc_tensor_t* outgrad1 = ccv_nnc_tensor_new(0, f, 0); |
827 | 1 | ingrad->data.f32[0] = 1; |
828 | 1 | a_tensor->data.f32[0] = 2; |
829 | 1 | f_tensor->data.f32[0] = 10; |
830 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
831 | 1 | .requires_grad = 1, |
832 | 1 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
833 | 1 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
834 | 1 | ccv_cnnp_model_parameters_clip_grad_norm(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), 2, 0.5, 0); |
835 | 1 | ccv_nnc_tensor_t* old_norm2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
836 | 1 | ccv_nnc_tensor_t* norm2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
837 | 1 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(old_norm2), 0); |
838 | 1 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(norm2), 0); |
839 | 1 | ccv_cnnp_model_apply_gradients(final, 0); |
840 | 1 | ccv_nnc_cmd_t reduce_cmd = { |
841 | 1 | .cmd = CCV_NNC_CUSTOM_FORWARD, |
842 | 1 | .isa = &clip_grad_norm_reduce_norm2_vtab, |
843 | 1 | }; |
844 | 1 | ccv_cnnp_model_parameter_gradients_map(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(old_norm2, norm2), 0); |
845 | 1 | REQUIRE(norm2->data.f32[0] < 0.5 + 1e-5, "norm2 should be smaller than max_norm"); |
846 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
847 | 1 | .requires_grad = 1, |
848 | 1 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
849 | 1 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
850 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
851 | 1 | .requires_grad = 1, |
852 | 1 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
853 | 1 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
854 | 1 | ccv_cnnp_model_parameters_clip_grad_norm(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), 2, 0.5, 0); |
855 | 1 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(old_norm2), 0); |
856 | 1 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(norm2), 0); |
857 | 1 | ccv_cnnp_model_parameter_gradients_map(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(old_norm2, norm2), 0); |
858 | 1 | REQUIRE(norm2->data.f32[0] < 0.5 + 1e-5, "norm2 should be smaller than max_norm"); |
859 | 1 | ccv_cnnp_model_apply_gradients(final, 0); |
860 | 1 | ccv_nnc_tensor_free(a_tensor); |
861 | 1 | ccv_nnc_tensor_free(b); |
862 | 1 | ccv_nnc_tensor_free(f_tensor); |
863 | 1 | ccv_nnc_tensor_free(o_tensor); |
864 | 1 | ccv_nnc_tensor_free(ingrad); |
865 | 1 | ccv_nnc_tensor_free(outgrad0); |
866 | 1 | ccv_nnc_tensor_free(outgrad1); |
867 | 1 | ccv_cnnp_model_free(final); |
868 | 1 | ccv_nnc_tensor_free(old_norm2); |
869 | 1 | ccv_nnc_tensor_free(norm2); |
870 | 1 | } |
871 | | |
872 | | TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and copy parameter to a new model entirely") |
873 | 1 | { |
874 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
875 | 1 | b->data.f32[0] = 1; |
876 | 1 | ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b); |
877 | 1 | const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1); |
878 | 1 | const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1); |
879 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
880 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
881 | 1 | ccv_nnc_tensor_param_t o = {}; |
882 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
883 | 1 | ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0); |
884 | 1 | ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0); |
885 | 1 | ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0); |
886 | 1 | ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0); |
887 | 1 | ingrad->data.f32[0] = 1; |
888 | 1 | a_tensor->data.f32[0] = 2; |
889 | 1 | f_tensor->data.f32[0] = 10; |
890 | 1 | int i; |
891 | 11 | for (i = 0; i < 10; i++10 ) |
892 | 10 | { |
893 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
894 | 10 | .requires_grad = 1, |
895 | 10 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
896 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
897 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
898 | 10 | } |
899 | 1 | const float o_final = o_tensor->data.f32[0]; |
900 | 1 | ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b); |
901 | 1 | ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
902 | 1 | ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
903 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
904 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output"); |
905 | 1 | ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0); |
906 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
907 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 64, 1e-5, "should match the output when x is 0"); |
908 | 1 | ccv_cnnp_model_t* const final3 = ccv_cnnp_model_copy(final, 1); |
909 | 1 | ccv_cnnp_model_set_parameters(final3, ccv_cnnp_model_parameters(final3, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
910 | 1 | ccv_cnnp_model_evaluate(final3, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
911 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output"); |
912 | 1 | ccv_nnc_tensor_free(a_tensor); |
913 | 1 | ccv_nnc_tensor_free(b); |
914 | 1 | ccv_nnc_tensor_free(f_tensor); |
915 | 1 | ccv_nnc_tensor_free(o_tensor); |
916 | 1 | ccv_nnc_tensor_free(ingrad); |
917 | 1 | ccv_cnnp_model_free(final); |
918 | 1 | ccv_cnnp_model_free(final2); |
919 | 1 | ccv_cnnp_model_free(final3); |
920 | 1 | } |
921 | | |
922 | | TEST_CASE("train a simple math 2 * x + 1 + 1 = 10, x = 4 and merge parameters with a model") |
923 | 1 | { |
924 | 1 | ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
925 | 1 | b->data.f32[0] = 1; |
926 | 1 | ccv_cnnp_model_t* const final = _math_2_x_1_1_10(b); |
927 | 1 | const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1); |
928 | 1 | const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1); |
929 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
930 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
931 | 1 | ccv_nnc_tensor_param_t o = {}; |
932 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
933 | 1 | ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0); |
934 | 1 | ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0); |
935 | 1 | ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0); |
936 | 1 | ccv_nnc_tensor_t* ingrad = ccv_nnc_tensor_new(0, o, 0); |
937 | 1 | ingrad->data.f32[0] = 1; |
938 | 1 | a_tensor->data.f32[0] = 2; |
939 | 1 | f_tensor->data.f32[0] = 10; |
940 | 1 | int i; |
941 | 11 | for (i = 0; i < 10; i++10 ) |
942 | 10 | { |
943 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
944 | 10 | .requires_grad = 1, |
945 | 10 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
946 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
947 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
948 | 10 | } |
949 | 1 | const float o_final = o_tensor->data.f32[0]; |
950 | 1 | ccv_cnnp_model_t* const final2 = _math_2_x_1_1_10(b); |
951 | 1 | ccv_cnnp_model_compile(final2, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
952 | 1 | ccv_cnnp_model_set_parameters(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
953 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
954 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], o_final, 1e-5, "should match the previous output"); |
955 | 1 | ccv_cnnp_model_parameters_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0); |
956 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
957 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 36, 1e-5, "should match the output when x is 1"); |
958 | 1 | ccv_cnnp_model_parameters_zip_map(final2, ccv_cnnp_model_parameters(final2, ALL_PARAMETERS, ALL_PARAMETERS), CMD_ADD_FORWARD(0.6, 0.4), ccv_nnc_no_hint, 0, 0, 0, 0, 0, 0, final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
959 | 1 | ccv_cnnp_model_evaluate(final2, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
960 | 1 | ccv_nnc_tensor_t* x_tensor = ccv_nnc_tensor_new(0, a, 0); |
961 | 1 | const ccv_nnc_tensor_param_t params = ccv_cnnp_model_parameter_tensor_params(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS)); |
962 | 1 | REQUIRE_EQ(1, params.dim[0], "should match parameter shape"); |
963 | 1 | REQUIRE_EQ(0, params.dim[1], "should match parameter shape"); |
964 | 1 | ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(final, ALL_PARAMETERS, ALL_PARAMETERS), x_tensor); |
965 | 1 | const float x_final = x_tensor->data.f32[0] * 0.4 + 1 * 0.6; |
966 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], (x_final * 2 + 1 + 1 - 10) * (x_final * 2 + 1 + 1 - 10), 1e-5, "should match the previous output"); |
967 | 1 | ccv_nnc_tensor_free(a_tensor); |
968 | 1 | ccv_nnc_tensor_free(b); |
969 | 1 | ccv_nnc_tensor_free(f_tensor); |
970 | 1 | ccv_nnc_tensor_free(o_tensor); |
971 | 1 | ccv_nnc_tensor_free(x_tensor); |
972 | 1 | ccv_nnc_tensor_free(ingrad); |
973 | 1 | ccv_cnnp_model_free(final); |
974 | 1 | ccv_cnnp_model_free(final2); |
975 | 1 | } |
976 | | |
977 | | TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate convergence") |
978 | 1 | { |
979 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
980 | 1 | x->data.f32[0] = 1; |
981 | 1 | ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
982 | 1 | MODEL_CMD_EXEC_IO_MAP( |
983 | 1 | KV(CCV_CNNP_IO), |
984 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))), |
985 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul"); |
986 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
987 | 1 | y->data.f32[0] = 2; |
988 | 1 | ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0, |
989 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), |
990 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))), |
991 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add"); |
992 | 1 | ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), 1, "seq"); |
993 | 1 | ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
994 | 1 | ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input)); |
995 | 1 | ccv_cnnp_model_io_t fit = ccv_cnnp_input(); |
996 | | // Because we don't have L2 loss function available yet, manually create L2 loss. |
997 | 1 | ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply( |
998 | 1 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
999 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
1000 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
1001 | 1 | MODEL_IO_LIST(left_out, fit)); |
1002 | 1 | ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply( |
1003 | 1 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
1004 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
1005 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
1006 | 1 | MODEL_IO_LIST(diff, diff)); |
1007 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0); |
1008 | 1 | const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1); |
1009 | 1 | const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1); |
1010 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.1, 1, 0.1, 0, 0), CMD_NOOP()); |
1011 | | // Train add exclusively. |
1012 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS))); |
1013 | 1 | ccv_nnc_tensor_param_t o = {}; |
1014 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
1015 | 1 | ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0); |
1016 | 1 | ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0); |
1017 | 1 | ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0); |
1018 | 1 | a_tensor->data.f32[0] = 2; |
1019 | 1 | f_tensor->data.f32[0] = 12; |
1020 | 1 | o_tensor->data.f32[0] = 12; |
1021 | 1 | int i; |
1022 | 11 | for (i = 0; i < 10; i++10 ) |
1023 | 10 | { |
1024 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1025 | 10 | .requires_grad = 1, |
1026 | 10 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
1027 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
1028 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
1029 | 10 | } |
1030 | 1 | REQUIRE_NOT_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 12, 1e-5, "after 10 iterations, output should not be the original"); |
1031 | | // Switch to train mul exclusively. |
1032 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS))); |
1033 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS))); |
1034 | 1 | float old_o = o_tensor->data.f32[0]; |
1035 | 11 | for (i = 0; i < 10; i++10 ) |
1036 | 10 | { |
1037 | 10 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1038 | 10 | .requires_grad = 1, |
1039 | 10 | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
1040 | 10 | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
1041 | 10 | ccv_cnnp_model_apply_gradients(final, 0); |
1042 | 10 | } |
1043 | 1 | REQUIRE(o_tensor->data.f32[0] < old_o, "we should be closer to 0 at this point"); |
1044 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.001, 1, 0.001, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS))); |
1045 | 1.00k | for (i = 0; i < 1000; i++1.00k ) |
1046 | 1.00k | { |
1047 | 1.00k | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1048 | 1.00k | .requires_grad = 1, |
1049 | 1.00k | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
1050 | 1.00k | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
1051 | 1.00k | ccv_cnnp_model_apply_gradients(final, 0); |
1052 | 1.00k | } |
1053 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 1e-5, "the mean squared error should be 0 at this point"); |
1054 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1055 | 1 | ccv_nnc_tensor_free(a_tensor); |
1056 | 1 | ccv_nnc_tensor_free(o_tensor); |
1057 | 1 | ccv_nnc_tensor_free(f_tensor); |
1058 | 1 | ccv_nnc_tensor_free(x); |
1059 | 1 | ccv_nnc_tensor_free(y); |
1060 | 1 | ccv_cnnp_model_free(final); |
1061 | 1 | } |
1062 | | |
1063 | | TEST_CASE("learn 2 * x + y = 12, first learn x, and then learn y, evaluate learn-ability") |
1064 | 1 | { |
1065 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
1066 | 1 | x->data.f32[0] = 1; |
1067 | 1 | ccv_cnnp_model_t* mul = ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
1068 | 1 | MODEL_CMD_EXEC_IO_MAP( |
1069 | 1 | KV(CCV_CNNP_IO), |
1070 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(x))), |
1071 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "mul"); |
1072 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0); |
1073 | 1 | y->data.f32[0] = 2; |
1074 | 1 | ccv_cnnp_model_t* add = ccv_cnnp_cmd_exec(CMD_EWSUM_FORWARD(), ccv_nnc_no_hint, 0, |
1075 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), |
1076 | 1 | KV(CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, ccv_cnnp_cmd_exec_io_copy(y))), |
1077 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, "add"); |
1078 | 1 | ccv_cnnp_model_t* const left = ccv_cnnp_sequential_new(MODEL_LIST(mul, add), 1, "seq"); |
1079 | 1 | ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1080 | 1 | ccv_cnnp_model_io_t left_out = ccv_cnnp_model_apply(left, MODEL_IO_LIST(input)); |
1081 | 1 | ccv_cnnp_model_io_t fit = ccv_cnnp_input(); |
1082 | | // Because we don't have L2 loss function available yet, manually create L2 loss. |
1083 | 1 | ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply( |
1084 | 1 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
1085 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
1086 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
1087 | 1 | MODEL_IO_LIST(left_out, fit)); |
1088 | 1 | ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply( |
1089 | 1 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
1090 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
1091 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
1092 | 1 | MODEL_IO_LIST(diff, diff)); |
1093 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0); |
1094 | 1 | const ccv_nnc_tensor_param_t a = CPU_TENSOR_NCHW(32F, 1); |
1095 | 1 | const ccv_nnc_tensor_param_t f = CPU_TENSOR_NCHW(32F, 1); |
1096 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(a, f), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP()); |
1097 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(mul, 0, 0), x); |
1098 | | // Train add exclusively. |
1099 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS))); |
1100 | 1 | ccv_nnc_tensor_param_t o = {}; |
1101 | 1 | ccv_cnnp_model_tensor_auto(final, &o, 1); |
1102 | 1 | ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_new(0, a, 0); |
1103 | 1 | ccv_nnc_tensor_t* f_tensor = ccv_nnc_tensor_new(0, f, 0); |
1104 | 1 | ccv_nnc_tensor_t* o_tensor = ccv_nnc_tensor_new(0, o, 0); |
1105 | 1 | a_tensor->data.f32[0] = 2; |
1106 | 1 | f_tensor->data.f32[0] = 12; |
1107 | 1 | o_tensor->data.f32[0] = 12; |
1108 | 1 | int i; |
1109 | 1.00k | for (i = 0; i < 1000; i++1.00k ) |
1110 | 1.00k | { |
1111 | 1.00k | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1112 | 1.00k | .requires_grad = 1, |
1113 | 1.00k | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
1114 | 1.00k | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
1115 | 1.00k | ccv_cnnp_model_apply_gradients(final, 0); |
1116 | 1.00k | } |
1117 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point"); |
1118 | 1 | ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(add, 0, 0), x); |
1119 | 1 | REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 10, 1e-1, "the weight on add should be 10"); |
1120 | | // Switch to train mul exclusively. Reset its value. |
1121 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(add, 0, 0), y); |
1122 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(mul, ALL_PARAMETERS, ALL_PARAMETERS))); |
1123 | 1 | ccv_cnnp_model_set_minimizer(final, CMD_NOOP(), 0, MODEL_IO_LIST(ccv_cnnp_model_parameters(add, ALL_PARAMETERS, ALL_PARAMETERS))); |
1124 | 1.00k | for (i = 0; i < 1000; i++1.00k ) |
1125 | 1.00k | { |
1126 | 1.00k | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1127 | 1.00k | .requires_grad = 1, |
1128 | 1.00k | }, TENSOR_LIST(a_tensor, f_tensor), TENSOR_LIST(o_tensor), 0, 0); |
1129 | 1.00k | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
1130 | 1.00k | ccv_cnnp_model_apply_gradients(final, 0); |
1131 | 1.00k | } |
1132 | 1 | REQUIRE_EQ_WITH_TOLERANCE(o_tensor->data.f32[0], 0, 5e-3, "the mean squared error should be 0 at this point"); |
1133 | 1 | ccv_cnnp_model_parameter_copy(final, ccv_cnnp_model_parameters(mul, 0, 0), x); |
1134 | 1 | REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[0], 5, 1e-2, "the weight on add should be 10"); |
1135 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1136 | 1 | ccv_nnc_tensor_free(a_tensor); |
1137 | 1 | ccv_nnc_tensor_free(o_tensor); |
1138 | 1 | ccv_nnc_tensor_free(f_tensor); |
1139 | 1 | ccv_nnc_tensor_free(x); |
1140 | 1 | ccv_nnc_tensor_free(y); |
1141 | 1 | ccv_cnnp_model_free(final); |
1142 | 1 | } |
1143 | | |
1144 | | TEST_CASE("a compiled model absorbs a new model with slightly different configuration") |
1145 | 1 | { |
1146 | 1 | ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST( |
1147 | 1 | ccv_cnnp_dense(2, 0, 0, 1, 0), |
1148 | 1 | ccv_cnnp_dense(2, 0, 0, 1, 0), |
1149 | 1 | ccv_cnnp_dense(1, 0, 0, 1, 0) |
1150 | 1 | ), 1, "multi_layer"); |
1151 | 1 | ccv_nnc_tensor_param_t x = CPU_TENSOR_NHWC(32F, 2, 2); |
1152 | 1 | ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(x), CMD_SGD_FORWARD(0, 0.01, 1, 0.01, 0, 0), CMD_NOOP()); |
1153 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, x, 0); |
1154 | 1 | dsfmt_t dsfmt; |
1155 | 1 | int i; |
1156 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
1157 | 5 | for (i = 0; i < 4; i++4 ) |
1158 | 4 | x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
1159 | 1 | ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 1), 0); |
1160 | 1 | ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){ |
1161 | 1 | .requires_grad = 1, |
1162 | 1 | }, TENSOR_LIST(x_tensor), TENSOR_LIST(y_tensor), 0, 0); |
1163 | 1 | ccv_cnnp_model_t* const small_model = ccv_cnnp_sequential_new(MODEL_LIST( |
1164 | 1 | ccv_cnnp_dense(2, 0, 0, 1, 0), |
1165 | 1 | ccv_cnnp_dense(2, 0, 0, 1, 0), |
1166 | 1 | ccv_cnnp_dense(1, 0, 0, 1, 0) |
1167 | 1 | ), 1, "multi_layer"); |
1168 | 1 | x = CPU_TENSOR_NHWC(32F, 1, 2); |
1169 | 1 | ccv_cnnp_model_absorb(multi_layer, small_model, TENSOR_PARAM_LIST(x)); |
1170 | 1 | ccv_nnc_tensor_t* const small_x = ccv_nnc_tensor_new(0, x, 0); |
1171 | 1 | ccv_nnc_tensor_t* const small_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0); |
1172 | 1 | memcpy(small_x->data.f32, x_tensor->data.f32, sizeof(float) * 2); |
1173 | 1 | ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){ |
1174 | 1 | .requires_grad = 1, |
1175 | 1 | }, TENSOR_LIST(small_x), TENSOR_LIST(small_y), 0, 0); |
1176 | 1 | REQUIRE_EQ_WITH_TOLERANCE(small_y->data.f32[0], y_tensor->data.f32[0], 1e-5, "the parameters retained, the value should be too"); |
1177 | 1 | ccv_cnnp_model_t* const large_model = ccv_cnnp_sequential_new(MODEL_LIST( |
1178 | 1 | ccv_cnnp_dense(2, 0, 0, 1, 0), |
1179 | 1 | ccv_cnnp_dense(2, 0, 0, 1, 0), |
1180 | 1 | ccv_cnnp_dense(1, 0, 0, 1, 0) |
1181 | 1 | ), 1, "multi_layer"); |
1182 | 1 | x = CPU_TENSOR_NHWC(32F, 4, 2); |
1183 | 1 | ccv_cnnp_model_absorb(multi_layer, large_model, TENSOR_PARAM_LIST(x)); |
1184 | 1 | ccv_nnc_tensor_t* const large_x = ccv_nnc_tensor_new(0, x, 0); |
1185 | 1 | ccv_nnc_tensor_t* const large_y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 1), 0); |
1186 | 1 | memcpy(large_x->data.f32, x_tensor->data.f32, sizeof(float) * 4); |
1187 | 5 | for (i = 4; i < 8; i++4 ) |
1188 | 4 | large_x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
1189 | 1 | ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){ |
1190 | 1 | .requires_grad = 1, |
1191 | 1 | }, TENSOR_LIST(large_x), TENSOR_LIST(large_y), 0, 0); |
1192 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, large_y->data.f32, y_tensor->data.f32, 2, 1e-5, "the parameters retained, the value should be too"); |
1193 | 1 | ccv_nnc_tensor_free(y_tensor); |
1194 | 1 | ccv_nnc_tensor_free(x_tensor); |
1195 | 1 | ccv_nnc_tensor_free(small_y); |
1196 | 1 | ccv_nnc_tensor_free(small_x); |
1197 | 1 | ccv_nnc_tensor_free(large_y); |
1198 | 1 | ccv_nnc_tensor_free(large_x); |
1199 | 1 | ccv_cnnp_model_free(multi_layer); |
1200 | 1 | } |
1201 | | |
1202 | | TEST_CASE("use linear model's parameter as the input for more computation") |
1203 | 1 | { |
1204 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 0, 0, 1, 0); |
1205 | 1 | ccv_cnnp_model_t* const multi_layer = ccv_cnnp_sequential_new(MODEL_LIST( |
1206 | 1 | linear, |
1207 | 1 | ), 1, "multi_layer"); |
1208 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1209 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(multi_layer, MODEL_IO_LIST(input)); |
1210 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, NO_TRANSPOSE, 0, 0), MODEL_IO_LIST(out, ccv_cnnp_model_parameters(linear, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0))); |
1211 | 1 | ccv_cnnp_model_io_t fit = ccv_cnnp_input(); |
1212 | | // Because we don't have L2 loss function available yet, manually create L2 loss. |
1213 | 1 | ccv_cnnp_model_io_t diff = ccv_cnnp_model_apply( |
1214 | 1 | ccv_cnnp_cmd_exec(CMD_ADD_FORWARD(1, -1), ccv_nnc_no_hint, 0, |
1215 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
1216 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
1217 | 1 | MODEL_IO_LIST(out, fit)); |
1218 | 1 | ccv_cnnp_model_io_t sqr = ccv_cnnp_model_apply( |
1219 | 1 | ccv_cnnp_cmd_exec(CMD_EWPROD_FORWARD(), ccv_nnc_no_hint, 0, |
1220 | 1 | MODEL_CMD_EXEC_IO_MAP(KV(CCV_CNNP_IO), KV(CCV_CNNP_IO)), |
1221 | 1 | MODEL_CMD_EXEC_IO_LIST(CCV_CNNP_IO), 1, 0), |
1222 | 1 | MODEL_IO_LIST(diff, diff)); |
1223 | 1 | ccv_cnnp_model_t* const model = ccv_cnnp_model_new(MODEL_IO_LIST(input, fit), MODEL_IO_LIST(sqr), 1, 0); |
1224 | 1 | const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32F, 1); |
1225 | 1 | const ccv_nnc_tensor_param_t t_params = CPU_TENSOR_NHWC(32F, 1); |
1226 | 1 | ccv_cnnp_model_compile(model, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP()); |
1227 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_copy(model, 1); |
1228 | 1 | ccv_cnnp_model_free(model); |
1229 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_params, t_params), CMD_SGD_FORWARD(0, 0.05, 1, 0, 0, 0), CMD_NOOP()); |
1230 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1231 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1232 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1233 | 1 | x->data.f32[0] = 1.4; |
1234 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), x); |
1235 | 1 | x->data.f32[0] = 0; |
1236 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(final, CCV_CNNP_PARAMETER_SELECT_BIAS, 0), x); |
1237 | 1 | int i; |
1238 | 1.00k | for (i = 0; i < 1000; i++1.00k ) |
1239 | 1.00k | { |
1240 | 1.00k | if (i % 2 == 0) |
1241 | 500 | { |
1242 | 500 | x->data.f32[0] = 1; |
1243 | 500 | t->data.f32[0] = 3; |
1244 | 500 | } else { |
1245 | 500 | x->data.f32[0] = 2; |
1246 | 500 | t->data.f32[0] = 4; |
1247 | 500 | } |
1248 | 1.00k | float lr = 0.05; |
1249 | 1.00k | if (i >= 100) |
1250 | 900 | lr = 0.01; |
1251 | 100 | else if (i >= 500) |
1252 | 0 | lr = 0.001; |
1253 | 1.00k | ccv_cnnp_model_set_minimizer(final, CMD_SGD_FORWARD(0, lr, 1, 0, 0, 0), 0, 0, 0); |
1254 | 1.00k | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1255 | 1.00k | .requires_grad = 1, |
1256 | 1.00k | }, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0); |
1257 | 1.00k | ccv_cnnp_model_backward(final, TENSOR_LIST(), TENSOR_LIST(), 0, 0); |
1258 | 1.00k | ccv_cnnp_model_apply_gradients(final, 0); |
1259 | 1.00k | } |
1260 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1261 | 1 | x->data.f32[0] = 1; |
1262 | 1 | t->data.f32[0] = 3; |
1263 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, t), TENSOR_LIST(y), 0, 0); |
1264 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 0, 1e-2, "the mean squared error should be 0 at this point"); |
1265 | 1 | ccv_nnc_tensor_free(x); |
1266 | 1 | ccv_nnc_tensor_free(t); |
1267 | 1 | ccv_nnc_tensor_free(y); |
1268 | 1 | ccv_cnnp_model_free(final); |
1269 | 1 | } |
1270 | | |
1271 | | TEST_CASE("model can have multiple outputs and some of them can be used in the computation") |
1272 | 1 | { |
1273 | 1 | ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0, 1, 0); |
1274 | 1 | ccv_cnnp_model_t* const linear2 = ccv_cnnp_dense(1, 1, 0, 1, 0); |
1275 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1276 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input)); |
1277 | 1 | ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(linear2, MODEL_IO_LIST(out1)); |
1278 | 1 | ccv_cnnp_model_t* const multi_layer = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 1, 0); |
1279 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1280 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1281 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1282 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1); |
1283 | 1 | ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1284 | 1 | t->data.f32[0] = 2.4; |
1285 | 1 | ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear1, ALL_PARAMETERS, 0), t); |
1286 | 1 | t->data.f32[0] = -1.5; |
1287 | 1 | ccv_cnnp_model_set_parameter(multi_layer, ccv_cnnp_model_parameters(linear2, ALL_PARAMETERS, 0), t); |
1288 | 1 | x->data.f32[0] = 10; |
1289 | 1 | ccv_cnnp_model_evaluate(multi_layer, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(t, y), 0, 0); |
1290 | 1 | REQUIRE_EQ_WITH_TOLERANCE(t->data.f32[0], 10 * 2.4, 1e-5, "should be equal to expected value"); |
1291 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], -10 * 2.4 * 1.5, 1e-5, "should be equal to expected value"); |
1292 | 1 | ccv_nnc_tensor_free(x); |
1293 | 1 | ccv_nnc_tensor_free(t); |
1294 | 1 | ccv_nnc_tensor_free(y); |
1295 | 1 | ccv_cnnp_model_free(multi_layer); |
1296 | 1 | } |
1297 | | |
1298 | | TEST_CASE("index select model can select a part from vocabulary") |
1299 | 1 | { |
1300 | 1 | ccv_cnnp_model_t* const index_select = ccv_cnnp_index_select(0); |
1301 | 1 | const ccv_nnc_tensor_param_t v_params = CPU_TENSOR_NHWC(32F, 10, 8); |
1302 | 1 | ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, v_params, 0); |
1303 | 1 | dsfmt_t dsfmt; |
1304 | 1 | int i; |
1305 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
1306 | 81 | for (i = 0; i < 10 * 8; i++80 ) |
1307 | 80 | v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
1308 | 1 | const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32S, 3); |
1309 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, x_params, 0); |
1310 | 1 | ccv_cnnp_model_compile(index_select, TENSOR_PARAM_LIST(v_params, x_params), CMD_NOOP(), CMD_NOOP()); |
1311 | 1 | x->data.i32[0] = 1; |
1312 | 1 | x->data.i32[1] = 0; |
1313 | 1 | x->data.i32[2] = 5; |
1314 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 8), 0); |
1315 | 1 | ccv_cnnp_model_evaluate(index_select, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(v, x), TENSOR_LIST(y), 0, 0); |
1316 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 1 * 8, y->data.f32, 8, 1e-5, "index 1st vector"); |
1317 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 0 * 8, y->data.f32 + 8, 8, 1e-5, "index 0th vector"); |
1318 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 5 * 8, y->data.f32 + 8 * 2, 8, 1e-5, "index 5th vector"); |
1319 | 1 | ccv_nnc_tensor_free(x); |
1320 | 1 | ccv_nnc_tensor_free(y); |
1321 | 1 | ccv_nnc_tensor_free(v); |
1322 | 1 | ccv_cnnp_model_free(index_select); |
1323 | 1 | } |
1324 | | |
1325 | | TEST_CASE("embedding model can generate vector embedding") |
1326 | 1 | { |
1327 | 1 | ccv_cnnp_model_t* const embedding = ccv_cnnp_embedding(CCV_32F, 10, 8, 1, 0); |
1328 | 1 | const ccv_nnc_tensor_param_t x_params = CPU_TENSOR_NHWC(32S, 3); |
1329 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, x_params, 0); |
1330 | 1 | ccv_cnnp_model_compile(embedding, TENSOR_PARAM_LIST(x_params), CMD_NOOP(), CMD_NOOP()); |
1331 | 1 | x->data.i32[0] = 1; |
1332 | 1 | x->data.i32[1] = 0; |
1333 | 1 | x->data.i32[2] = 5; |
1334 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3, 8), 0); |
1335 | 1 | ccv_cnnp_model_evaluate(embedding, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0); |
1336 | 1 | ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 8), 0); |
1337 | 1 | ccv_cnnp_model_parameter_copy(embedding, ccv_cnnp_model_parameters(embedding, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0), v); |
1338 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 1 * 8, y->data.f32, 8, 1e-5, "index 1st vector"); |
1339 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 0 * 8, y->data.f32 + 8, 8, 1e-5, "index 0th vector"); |
1340 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, v->data.f32 + 5 * 8, y->data.f32 + 8 * 2, 8, 1e-5, "index 5th vector"); |
1341 | 1 | ccv_nnc_tensor_free(x); |
1342 | 1 | ccv_nnc_tensor_free(y); |
1343 | 1 | ccv_nnc_tensor_free(v); |
1344 | 1 | ccv_cnnp_model_free(embedding); |
1345 | 1 | } |
1346 | | |
1347 | | TEST_CASE("model to get the internal name for parameters") |
1348 | 1 | { |
1349 | 1 | ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1350 | 1 | ccv_cnnp_model_t* const linear2 = ccv_cnnp_dense(1, 1, 0, 1, 0); |
1351 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1352 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input)); |
1353 | 1 | ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(linear2, MODEL_IO_LIST(out1)); |
1354 | 1 | ccv_cnnp_model_t* const multi_layer = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 1, 0); |
1355 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1); |
1356 | 1 | ccv_cnnp_model_compile(multi_layer, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1357 | 1 | const char* linear1p = "t-linear-0-0"; |
1358 | 1 | REQUIRE(memcmp(linear1p, ccv_cnnp_model_parameter_name(multi_layer, ccv_cnnp_model_parameters(linear1, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0)), strlen(linear1p) + 1) == 0, "should be equal"); |
1359 | 1 | const char* linear2p = "t-0-0"; |
1360 | 1 | REQUIRE(memcmp(linear2p, ccv_cnnp_model_parameter_name(multi_layer, ccv_cnnp_model_parameters(linear2, CCV_CNNP_PARAMETER_SELECT_WEIGHT, 0)), strlen(linear2p) + 1) == 0, "should be equal"); |
1361 | 1 | ccv_cnnp_model_free(multi_layer); |
1362 | 1 | } |
1363 | | |
1364 | | static ccv_cnnp_model_t* _resnet_block_new(const int filters, const int expansion, const int strides, const int projection_shortcut) |
1365 | 16 | { |
1366 | 16 | ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1367 | 16 | ccv_cnnp_model_io_t shortcut = input; |
1368 | 16 | if (projection_shortcut) |
1369 | 4 | { |
1370 | 4 | ccv_cnnp_model_t* const avgdown = ccv_cnnp_average_pool(DIM_ALLOC(strides, strides), HINT((strides, strides), (0, 0)), 0); |
1371 | 4 | shortcut = ccv_cnnp_model_apply(avgdown, MODEL_IO_LIST(input)); |
1372 | 4 | ccv_cnnp_model_t* const conv0 = ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), DIM_ALLOC(), 1, HINT((1, 1), (0, 0)), 0, 1, 0); |
1373 | 4 | shortcut = ccv_cnnp_model_apply(conv0, MODEL_IO_LIST(shortcut)); |
1374 | 4 | } |
1375 | 16 | ccv_cnnp_model_t* const conv1 = ccv_cnnp_sequential_new(MODEL_LIST( |
1376 | 16 | ccv_cnnp_convolution(1, filters, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), |
1377 | 16 | ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0), |
1378 | 16 | ccv_cnnp_relu(0) |
1379 | 16 | ), 1, 0); |
1380 | 16 | ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(conv1, MODEL_IO_LIST(input)); |
1381 | 16 | ccv_cnnp_model_t* const conv2 = ccv_cnnp_sequential_new(MODEL_LIST( |
1382 | 16 | ccv_cnnp_convolution(1, filters, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((strides, strides), (1, 1)), 0, 1, 0), |
1383 | 16 | ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0), |
1384 | 16 | ccv_cnnp_relu(0) |
1385 | 16 | ), 1, 0); |
1386 | 16 | output = ccv_cnnp_model_apply(conv2, MODEL_IO_LIST(output)); |
1387 | 16 | ccv_cnnp_model_t* const conv3 = ccv_cnnp_sequential_new(MODEL_LIST( |
1388 | 16 | ccv_cnnp_convolution(1, filters * expansion, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), |
1389 | 16 | ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0) |
1390 | 16 | ), 1, 0); |
1391 | 16 | output = ccv_cnnp_model_apply(conv3, MODEL_IO_LIST(output)); |
1392 | 16 | ccv_cnnp_model_t* const add = ccv_cnnp_sum(0); |
1393 | 16 | output = ccv_cnnp_model_apply(add, MODEL_IO_LIST(output, shortcut)); |
1394 | 16 | ccv_cnnp_model_t* const relu = ccv_cnnp_relu(0); |
1395 | 16 | output = ccv_cnnp_model_apply(relu, MODEL_IO_LIST(output)); |
1396 | 16 | return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 1, 0); |
1397 | 16 | } |
1398 | | |
1399 | | static ccv_cnnp_model_t* _resnet_block_layer_new(const int filters, const int expansion, const int strides, const int blocks) |
1400 | 4 | { |
1401 | 4 | ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1402 | 4 | ccv_cnnp_model_t* first_block = _resnet_block_new(filters, expansion, strides, 1); |
1403 | 4 | ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(first_block, MODEL_IO_LIST(input)); |
1404 | 4 | int i; |
1405 | 16 | for (i = 1; i < blocks; i++12 ) |
1406 | 12 | { |
1407 | 12 | ccv_cnnp_model_t* block = _resnet_block_new(filters, expansion, 1, 0); |
1408 | 12 | output = ccv_cnnp_model_apply(block, MODEL_IO_LIST(output)); |
1409 | 12 | } |
1410 | 4 | return ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(output), 1, 0); |
1411 | 4 | } |
1412 | | |
1413 | | static void _fpn(const int d, const ccv_cnnp_model_io_t* const c, const int c_size, ccv_cnnp_model_io_t* const p) |
1414 | 1 | { |
1415 | 1 | int i; |
1416 | 1 | ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(c[c_size - 1])); |
1417 | 1 | p[c_size - 1] = output; |
1418 | 4 | for (i = c_size - 2; i >= 0; i--3 ) |
1419 | 3 | { |
1420 | 3 | const ccv_cnnp_model_io_t lateral = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, 0), MODEL_IO_LIST(c[i])); |
1421 | 3 | const ccv_cnnp_model_io_t up = ccv_cnnp_model_apply(ccv_cnnp_upsample(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0, 0), MODEL_IO_LIST(output)); |
1422 | 3 | const ccv_cnnp_model_io_t sum = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(lateral, up)); |
1423 | 3 | output = ccv_cnnp_model_apply(ccv_cnnp_convolution(1, d, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((1, 1), (1, 1)), 0, 1, 0), MODEL_IO_LIST(sum)); |
1424 | 3 | p[i] = output; |
1425 | 3 | } |
1426 | 1 | } |
1427 | | |
1428 | | ccv_cnnp_model_t* _imagenet_resnet50_v1d_fpn(void) |
1429 | 1 | { |
1430 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1431 | 1 | ccv_cnnp_model_t* init_conv = ccv_cnnp_sequential_new(MODEL_LIST( |
1432 | 1 | ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((2, 2), (1, 1)), 0, 1, 0), |
1433 | 1 | ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0), |
1434 | 1 | ccv_cnnp_relu(0), |
1435 | 1 | ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((1, 1), (1, 1)), 0, 1, 0), |
1436 | 1 | ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0), |
1437 | 1 | ccv_cnnp_relu(0), |
1438 | 1 | ccv_cnnp_convolution(1, 64, DIM_ALLOC(3, 3), DIM_ALLOC(), 1, HINT((1, 1), (1, 1)), 0, 1, 0), |
1439 | 1 | ccv_cnnp_batch_norm(0.9, 1e-4, 1, 0), |
1440 | 1 | ccv_cnnp_relu(0), |
1441 | 1 | ccv_cnnp_max_pool(DIM_ALLOC(3, 3), HINT((2, 2), (1, 1)), 0) |
1442 | 1 | ), 1, 0); |
1443 | 1 | ccv_cnnp_model_io_t output = ccv_cnnp_model_apply(init_conv, MODEL_IO_LIST(input)); |
1444 | 1 | output = ccv_cnnp_model_apply(_resnet_block_layer_new(64, 4, 1, 3), MODEL_IO_LIST(output)); |
1445 | 1 | const ccv_cnnp_model_io_t c2 = output; |
1446 | 1 | output = ccv_cnnp_model_apply(_resnet_block_layer_new(128, 4, 2, 4), MODEL_IO_LIST(output)); |
1447 | 1 | const ccv_cnnp_model_io_t c3 = output; |
1448 | 1 | output = ccv_cnnp_model_apply(_resnet_block_layer_new(256, 4, 2, 6), MODEL_IO_LIST(output)); |
1449 | 1 | const ccv_cnnp_model_io_t c4 = output; |
1450 | 1 | output = ccv_cnnp_model_apply(_resnet_block_layer_new(512, 4, 2, 3), MODEL_IO_LIST(output)); |
1451 | 1 | const ccv_cnnp_model_io_t c5 = output; |
1452 | 1 | const ccv_cnnp_model_io_t c[] = { c2, c3, c4, c5 }; |
1453 | 1 | ccv_cnnp_model_io_t p[5]; |
1454 | 1 | _fpn(256, c, 4, p); |
1455 | 1 | p[4] = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(2, 2), HINT((2, 2), (0, 0)), 0), MODEL_IO_LIST(p[3])); |
1456 | | // 3 aspect ratios (1:2, 1:1, 2:1). Each has 4 + 2 (x, y, w, h, object, non-object), total 18. |
1457 | 1 | ccv_cnnp_model_t* const rpn_proposals = ccv_cnnp_convolution(1, 18, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, "rpn"); |
1458 | 1 | ccv_cnnp_model_io_t proposals[5]; |
1459 | 1 | int i; |
1460 | 6 | for (i = 0; i < 5; i++5 ) |
1461 | 5 | proposals[i] = ccv_cnnp_model_apply(rpn_proposals, MODEL_IO_LIST(p[i])); |
1462 | 1 | return ccv_cnnp_model_new(MODEL_IO_LIST(input), proposals, 5, 1, 0); |
1463 | 1 | } |
1464 | | |
1465 | | TEST_CASE("FPN-RPN use cnnp model with multiple outputs") |
1466 | 1 | { |
1467 | 1 | ccv_cnnp_model_t* rpn = _imagenet_resnet50_v1d_fpn(); |
1468 | 1 | ccv_nnc_tensor_param_t input_params = GPU_TENSOR_NCHW(000, 32F, 4, 3, 835, 1146); |
1469 | 1 | ccv_cnnp_model_compile(rpn, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1470 | 1 | ccv_nnc_tensor_param_t output_params[5]; |
1471 | 1 | ccv_cnnp_model_tensor_auto(rpn, output_params, 5); |
1472 | 1 | REQUIRE_EQ(output_params[0].dim[2], 209, "should be equal"); |
1473 | 1 | REQUIRE_EQ(output_params[0].dim[3], 287, "should be equal"); |
1474 | 1 | REQUIRE_EQ(output_params[1].dim[2], 105, "should be equal"); |
1475 | 1 | REQUIRE_EQ(output_params[1].dim[3], 144, "should be equal"); |
1476 | 1 | REQUIRE_EQ(output_params[2].dim[2], 53, "should be equal"); |
1477 | 1 | REQUIRE_EQ(output_params[2].dim[3], 72, "should be equal"); |
1478 | 1 | REQUIRE_EQ(output_params[3].dim[2], 27, "should be equal"); |
1479 | 1 | REQUIRE_EQ(output_params[3].dim[3], 36, "should be equal"); |
1480 | 1 | REQUIRE_EQ(output_params[4].dim[2], 13, "should be equal"); |
1481 | 1 | REQUIRE_EQ(output_params[4].dim[3], 18, "should be equal"); |
1482 | 1 | ccv_cnnp_model_free(rpn); |
1483 | 1 | } |
1484 | | |
1485 | | TEST_CASE("extract one output each feed into different feed-forward") |
1486 | 1 | { |
1487 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1488 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1489 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input)); |
1490 | 1 | ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid"); |
1491 | 1 | ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1)); |
1492 | 1 | ccv_cnnp_model_t* tiny = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out1, out2), 1, "tiny"); |
1493 | 1 | const ccv_cnnp_model_io_t i0 = ccv_cnnp_input(); |
1494 | 1 | ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(tiny, MODEL_IO_LIST(i0)); |
1495 | 1 | ccv_cnnp_model_io_t o00 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(o0)); |
1496 | 1 | ccv_cnnp_model_io_t o01 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(o0)); |
1497 | 1 | ccv_cnnp_model_t* const l0 = ccv_cnnp_dense(1, 1, 0, 1, "l0"); |
1498 | 1 | ccv_cnnp_model_io_t o10 = ccv_cnnp_model_apply(l0, MODEL_IO_LIST(o00)); |
1499 | 1 | ccv_cnnp_model_t* const l1 = ccv_cnnp_dense(1, 1, 0, 1, "l1"); |
1500 | 1 | ccv_cnnp_model_io_t o11 = ccv_cnnp_model_apply(l1, MODEL_IO_LIST(o01)); |
1501 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(i0), MODEL_IO_LIST(o10, o11), 1, "final"); |
1502 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1503 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1504 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1505 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1); |
1506 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1507 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1508 | 1 | t->data.f32[0] = 2.4; |
1509 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t); |
1510 | 1 | t->data.f32[0] = -1.5; |
1511 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(l0, ALL_PARAMETERS, 0), t); |
1512 | 1 | t->data.f32[0] = 1.7; |
1513 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(l1, ALL_PARAMETERS, 0), t); |
1514 | 1 | x->data.f32[0] = 10; |
1515 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(t, y), 0, 0); |
1516 | 1 | REQUIRE_EQ_WITH_TOLERANCE(t->data.f32[0], 10 * 2.4 * -1.5, 1e-5, "should be equal to expected value"); |
1517 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) * 1.7, 1e-5, "should be equal to expected value"); |
1518 | 1 | ccv_nnc_tensor_free(x); |
1519 | 1 | ccv_nnc_tensor_free(t); |
1520 | 1 | ccv_nnc_tensor_free(y); |
1521 | 1 | ccv_cnnp_model_free(final); |
1522 | 1 | } |
1523 | | |
1524 | | TEST_CASE("use parameter for values") |
1525 | 1 | { |
1526 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1527 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1528 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input)); |
1529 | 1 | ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid"); |
1530 | 1 | ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1)); |
1531 | 1 | ccv_cnnp_model_t* const value = ccv_cnnp_parameter(CPU_TENSOR_NCHW(32F, 1), 0, 1, "value"); |
1532 | 1 | ccv_cnnp_model_io_t out3 = ccv_cnnp_model_apply(value, 0, 0); |
1533 | 1 | ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum"); |
1534 | 1 | ccv_cnnp_model_io_t out4 = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out2, out3)); |
1535 | 1 | ccv_cnnp_model_t* final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out4), 1, "tiny"); |
1536 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1537 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1538 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1539 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1); |
1540 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1541 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1542 | 1 | t->data.f32[0] = 2.4; |
1543 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t); |
1544 | 1 | t->data.f32[0] = -1.5; |
1545 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(value, ALL_PARAMETERS, 0), t); |
1546 | 1 | x->data.f32[0] = 10; |
1547 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0); |
1548 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) - 1.5, 1e-5, "should be equal to expected value"); |
1549 | 1 | ccv_nnc_tensor_free(x); |
1550 | 1 | ccv_nnc_tensor_free(t); |
1551 | 1 | ccv_nnc_tensor_free(y); |
1552 | 1 | ccv_cnnp_model_free(final); |
1553 | 1 | } |
1554 | | |
1555 | | TEST_CASE("use scalar for values") |
1556 | 1 | { |
1557 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1558 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1559 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input)); |
1560 | 1 | ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid"); |
1561 | 1 | ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1)); |
1562 | 1 | ccv_cnnp_model_io_t value = ccv_cnnp_model_apply(ccv_cnnp_scalar(CCV_TENSOR_CPU_MEMORY, CCV_TENSOR_FORMAT_NHWC, CCV_32F, 1.5, "value"), 0, 0); |
1563 | 1 | ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum"); |
1564 | 1 | ccv_cnnp_model_io_t out4 = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out2, value)); |
1565 | 1 | ccv_cnnp_model_t* final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out4), 1, "tiny"); |
1566 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1567 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1568 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1569 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1); |
1570 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1571 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1572 | 1 | t->data.f32[0] = 2.4; |
1573 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t); |
1574 | 1 | x->data.f32[0] = 10; |
1575 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0); |
1576 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) + 1.5, 1e-5, "should be equal to expected value"); |
1577 | 1 | ccv_nnc_tensor_free(x); |
1578 | 1 | ccv_nnc_tensor_free(t); |
1579 | 1 | ccv_nnc_tensor_free(y); |
1580 | 1 | ccv_cnnp_model_free(final); |
1581 | 1 | } |
1582 | | |
1583 | | TEST_CASE("use scalar for values and copy types from other inputs") |
1584 | 1 | { |
1585 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1586 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1587 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input)); |
1588 | 1 | ccv_cnnp_model_t* const sigmoid = ccv_cnnp_sigmoid("sigmoid"); |
1589 | 1 | ccv_cnnp_model_io_t out2 = ccv_cnnp_model_apply(sigmoid, MODEL_IO_LIST(out1)); |
1590 | 1 | ccv_cnnp_model_io_t value = ccv_cnnp_model_apply(ccv_cnnp_scalar(0, 0, 0, 1.5, "value"), MODEL_IO_LIST(input)); |
1591 | 1 | ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum"); |
1592 | 1 | ccv_cnnp_model_io_t out4 = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out2, value)); |
1593 | 1 | ccv_cnnp_model_t* final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out4), 1, "tiny"); |
1594 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1595 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1596 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1597 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1); |
1598 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1599 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1600 | 1 | t->data.f32[0] = 2.4; |
1601 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t); |
1602 | 1 | x->data.f32[0] = 10; |
1603 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0); |
1604 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y->data.f32[0], 1 / (1 + exp(-10 * 2.4)) + 1.5, 1e-5, "should be equal to expected value"); |
1605 | 1 | ccv_nnc_tensor_free(x); |
1606 | 1 | ccv_nnc_tensor_free(t); |
1607 | 1 | ccv_nnc_tensor_free(y); |
1608 | 1 | ccv_cnnp_model_free(final); |
1609 | 1 | } |
1610 | | |
1611 | | TEST_CASE("LoRA fine-tuning GEMM set is_trainable to false") |
1612 | 1 | { |
1613 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1614 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(10, 1, 0, -1, "linear"); |
1615 | 1 | ccv_cnnp_model_t* const down = ccv_cnnp_dense(2, 1, 0, 1, "down"); |
1616 | 1 | ccv_cnnp_model_t* const up = ccv_cnnp_dense(10, 1, 0, 1, "up"); |
1617 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input)); |
1618 | 1 | ccv_cnnp_model_io_t out_down = ccv_cnnp_model_apply(down, MODEL_IO_LIST(input)); |
1619 | 1 | ccv_cnnp_model_io_t out_up = ccv_cnnp_model_apply(up, MODEL_IO_LIST(out_down)); |
1620 | 1 | ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum"); |
1621 | 1 | ccv_cnnp_model_io_t out_final = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out, out_up)); |
1622 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out_final), 0, "tiny"); |
1623 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1624 | 1 | ccv_nnc_tensor_t* const tlinear = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0); |
1625 | 1 | int i; |
1626 | 101 | for (i = 0; i < 10 * 10; i++100 ) |
1627 | 100 | tlinear->data.f32[i] = (i / 10 == i % 10) ? 110 : 090 ; |
1628 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 2), 0); |
1629 | 21 | for (i = 0; i < 10 * 2; i++20 ) |
1630 | 20 | t->data.f32[i] = 0; |
1631 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1632 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10); |
1633 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_SGD_FORWARD(1, 0.01, 1, 0.1, 0, 0), CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN)); |
1634 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), tlinear); |
1635 | 1 | ccv_nnc_tensor_free(tlinear); |
1636 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up, ALL_PARAMETERS, 0), t); |
1637 | 1 | ccv_nnc_tensor_free(t); |
1638 | 11 | for (i = 0; i < 10; i++10 ) |
1639 | 10 | x->data.f32[i] = i; |
1640 | 1 | ccv_nnc_tensor_t* const target = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1641 | 11 | for (i = 0; i < 10; i++10 ) |
1642 | 10 | target->data.f32[i] = 10 - i; |
1643 | 11 | for (i = 0; i < 10; i++10 ) |
1644 | 10 | ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0); |
1645 | 1 | ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0); |
1646 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1647 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, y->data.f32, target->data.f32, 10, 1e-2, "should match the target after fine-tuning"); |
1648 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable"); |
1649 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(down), 1, "should be marked as trainable"); |
1650 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(up), 1, "should be marked as trainable"); |
1651 | 1 | ccv_nnc_tensor_free(x); |
1652 | 1 | ccv_nnc_tensor_free(target); |
1653 | 1 | ccv_nnc_tensor_free(y); |
1654 | 1 | ccv_cnnp_model_free(final); |
1655 | 1 | } |
1656 | | |
1657 | | TEST_CASE("LoRA fine-tuning convolution set is_trainable to false") |
1658 | 1 | { |
1659 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1660 | 1 | ccv_cnnp_model_t* const conv = ccv_cnnp_convolution(1, 32, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, -1, "conv"); |
1661 | 1 | ccv_cnnp_model_t* const down = ccv_cnnp_convolution(1, 4, DIM_ALLOC(3, 3), DIM_ALLOC(), 0, HINT((1, 1), (1, 1)), 0, 1, "down"); |
1662 | 1 | ccv_cnnp_model_t* const up = ccv_cnnp_convolution(1, 32, DIM_ALLOC(1, 1), DIM_ALLOC(), 0, HINT((1, 1), (0, 0)), 0, 1, "up"); |
1663 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(conv, MODEL_IO_LIST(input)); |
1664 | 1 | ccv_cnnp_model_io_t out_down = ccv_cnnp_model_apply(down, MODEL_IO_LIST(input)); |
1665 | 1 | ccv_cnnp_model_io_t out_up = ccv_cnnp_model_apply(up, MODEL_IO_LIST(out_down)); |
1666 | 1 | ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum"); |
1667 | 1 | ccv_cnnp_model_io_t out_final = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out, out_up)); |
1668 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out_final), 0, "tiny"); |
1669 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 5, 10), 0); |
1670 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 5, 32), 0); |
1671 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 5, 5, 10); |
1672 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1673 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1674 | 1 | .requires_grad = 1, |
1675 | 1 | }, TENSOR_LIST(x), TENSOR_LIST(y), 0, 0); |
1676 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable"); |
1677 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(down), 1, "should be marked as trainable"); |
1678 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(up), 1, "should be marked as trainable"); |
1679 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1680 | 1 | ccv_nnc_tensor_free(x); |
1681 | 1 | ccv_nnc_tensor_free(y); |
1682 | 1 | ccv_cnnp_model_free(final); |
1683 | 1 | } |
1684 | | |
1685 | | static int _ccv_nnc_same_namer(void* context, const char* src_name, char* updated_name, const size_t provided_size) |
1686 | 3 | { |
1687 | 3 | const size_t src_len = ccv_min(strnlen(src_name, provided_size - 1), provided_size - 1); |
1688 | 3 | memcpy(updated_name, src_name, src_len); |
1689 | 3 | updated_name[src_len] = '\0'; |
1690 | 3 | return 0; |
1691 | 3 | } |
1692 | | |
1693 | | TEST_CASE("two models share the same parameters") |
1694 | 1 | { |
1695 | 1 | const ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
1696 | 1 | ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(10, 1, 0, -1, "linear"); |
1697 | 1 | ccv_cnnp_model_t* const down0 = ccv_cnnp_dense(2, 1, 0, 1, "down"); |
1698 | 1 | ccv_cnnp_model_t* const up0 = ccv_cnnp_dense(10, 1, 0, 1, "up"); |
1699 | 1 | ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0)); |
1700 | 1 | ccv_cnnp_model_io_t out0_down = ccv_cnnp_model_apply(down0, MODEL_IO_LIST(input0)); |
1701 | 1 | ccv_cnnp_model_io_t out0_up = ccv_cnnp_model_apply(up0, MODEL_IO_LIST(out0_down)); |
1702 | 1 | ccv_cnnp_model_t* const add0 = ccv_cnnp_sum("sum"); |
1703 | 1 | ccv_cnnp_model_io_t out0_final = ccv_cnnp_model_apply(add0, MODEL_IO_LIST(out0, out0_up)); |
1704 | 1 | ccv_cnnp_model_t* const final0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0), MODEL_IO_LIST(out0_final), 0, "tiny0"); |
1705 | | |
1706 | 1 | const ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
1707 | 1 | ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(10, 1, 0, -1, "linear"); |
1708 | 1 | ccv_cnnp_model_t* const down1 = ccv_cnnp_dense(2, 1, 0, 1, "down"); |
1709 | 1 | ccv_cnnp_model_t* const up1 = ccv_cnnp_dense(10, 1, 0, 1, "up"); |
1710 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input1)); |
1711 | 1 | ccv_cnnp_model_io_t out1_down = ccv_cnnp_model_apply(down1, MODEL_IO_LIST(input1)); |
1712 | 1 | ccv_cnnp_model_io_t out1_up = ccv_cnnp_model_apply(up1, MODEL_IO_LIST(out1_down)); |
1713 | 1 | ccv_cnnp_model_t* const add1 = ccv_cnnp_sum("sum"); |
1714 | 1 | ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(add1, MODEL_IO_LIST(out1, out1_up)); |
1715 | 1 | ccv_cnnp_model_t* const final1 = ccv_cnnp_model_new(MODEL_IO_LIST(input1), MODEL_IO_LIST(out1_final), 0, "tiny1"); |
1716 | | |
1717 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1718 | 1 | dsfmt_t dsfmt; |
1719 | 1 | int i; |
1720 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
1721 | 11 | for (i = 0; i < 10; i++10 ) |
1722 | 10 | x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
1723 | 1 | ccv_nnc_tensor_t* const y0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1724 | 1 | ccv_nnc_tensor_t* const y1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1725 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10); |
1726 | 1 | ccv_cnnp_model_compile(final0, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1727 | 1 | ccv_cnnp_model_evaluate(final0, (ccv_cnnp_evaluate_param_t){ |
1728 | 1 | .requires_grad = 0, |
1729 | 1 | }, TENSOR_LIST(x), TENSOR_LIST(y0), 0, 0); |
1730 | 1 | ccv_cnnp_model_compile(final1, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1731 | 1 | ccv_cnnp_model_share_parameters(final1, ccv_cnnp_model_parameters(final1, ALL_PARAMETERS, ALL_PARAMETERS), final0, ccv_cnnp_model_parameters(final0, ALL_PARAMETERS, ALL_PARAMETERS), 0, 0); |
1732 | 1 | ccv_cnnp_model_evaluate(final1, (ccv_cnnp_evaluate_param_t){ |
1733 | 1 | .requires_grad = 0, |
1734 | 1 | }, TENSOR_LIST(x), TENSOR_LIST(y1), 0, 0); |
1735 | 1 | REQUIRE_TENSOR_EQ(y0, y1, "two model now shares the weights, should have the same result"); |
1736 | 1 | CNNP_MODEL_GEN(final0, CCV_NNC_LONG_DOT_GRAPH); |
1737 | 1 | ccv_nnc_tensor_free(x); |
1738 | 1 | ccv_nnc_tensor_free(y0); |
1739 | 1 | ccv_nnc_tensor_free(y1); |
1740 | 1 | ccv_cnnp_model_free(final0); |
1741 | 1 | ccv_cnnp_model_free(final1); |
1742 | 1 | } |
1743 | | |
1744 | | TEST_CASE("two models, one with LoRA, one with not, share the same parameters") |
1745 | 1 | { |
1746 | 1 | const ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
1747 | 1 | ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(10, 1, 0, -1, "linear"); |
1748 | 1 | ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0)); |
1749 | 1 | ccv_cnnp_model_t* const final0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0), MODEL_IO_LIST(out0), 0, "tiny"); |
1750 | | |
1751 | 1 | const ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
1752 | 1 | ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(10, 1, 0, -1, "linear"); |
1753 | 1 | ccv_cnnp_model_t* const down1 = ccv_cnnp_dense(2, 1, 0, 1, "down"); |
1754 | 1 | ccv_cnnp_model_t* const up1 = ccv_cnnp_dense(10, 1, 0, 1, "up"); |
1755 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input1)); |
1756 | 1 | ccv_cnnp_model_io_t out1_down = ccv_cnnp_model_apply(down1, MODEL_IO_LIST(input1)); |
1757 | 1 | ccv_cnnp_model_io_t out1_up = ccv_cnnp_model_apply(up1, MODEL_IO_LIST(out1_down)); |
1758 | 1 | ccv_cnnp_model_t* const add1 = ccv_cnnp_sum("sum"); |
1759 | 1 | ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(add1, MODEL_IO_LIST(out1, out1_up)); |
1760 | 1 | ccv_cnnp_model_t* const final1 = ccv_cnnp_model_new(MODEL_IO_LIST(input1), MODEL_IO_LIST(out1_final), 0, "tiny"); |
1761 | | |
1762 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1763 | 1 | dsfmt_t dsfmt; |
1764 | 1 | int i; |
1765 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
1766 | 11 | for (i = 0; i < 10; i++10 ) |
1767 | 10 | x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
1768 | 1 | ccv_nnc_tensor_t* const y0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1769 | 1 | ccv_nnc_tensor_t* const y1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
1770 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10); |
1771 | 1 | ccv_cnnp_model_compile(final0, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1772 | 1 | ccv_cnnp_model_evaluate(final0, (ccv_cnnp_evaluate_param_t){ |
1773 | 1 | .requires_grad = 0, |
1774 | 1 | }, TENSOR_LIST(x), TENSOR_LIST(y0), 0, 0); |
1775 | 1 | ccv_cnnp_model_compile(final1, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); |
1776 | 1 | ccv_nnc_tensor_t* const up_weights = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 10), 0); |
1777 | 21 | for (i = 0; i < 2 * 10; i++20 ) |
1778 | 20 | up_weights->data.f32[i] = 0; |
1779 | 1 | ccv_cnnp_model_set_parameter(final1, ccv_cnnp_model_parameters(up1, ALL_PARAMETERS, ALL_PARAMETERS), up_weights); |
1780 | 1 | ccv_nnc_tensor_free(up_weights); |
1781 | 1 | ccv_cnnp_model_share_parameters(final1, ccv_cnnp_model_parameters(final1, ALL_PARAMETERS, ALL_PARAMETERS), final0, ccv_cnnp_model_parameters(final0, ALL_PARAMETERS, ALL_PARAMETERS), _ccv_nnc_same_namer, 0); |
1782 | 1 | ccv_cnnp_model_evaluate(final1, (ccv_cnnp_evaluate_param_t){ |
1783 | 1 | .requires_grad = 0, |
1784 | 1 | }, TENSOR_LIST(x), TENSOR_LIST(y1), 0, 0); |
1785 | 1 | REQUIRE_TENSOR_EQ(y0, y1, "two model now shares the weights, should have the same result"); |
1786 | 1 | CNNP_MODEL_GEN(final0, CCV_NNC_LONG_DOT_GRAPH); |
1787 | 1 | ccv_nnc_tensor_free(x); |
1788 | 1 | ccv_nnc_tensor_free(y0); |
1789 | 1 | ccv_nnc_tensor_free(y1); |
1790 | 1 | ccv_cnnp_model_free(final0); |
1791 | 1 | ccv_cnnp_model_free(final1); |
1792 | 1 | } |
1793 | | |
1794 | | TEST_CASE("pad a tensor with padding") |
1795 | 1 | { |
1796 | 1 | const ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
1797 | 1 | const ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
1798 | 1 | ccv_cnnp_model_t* const pad = ccv_cnnp_pad(CCV_NNC_PAD_ZERO, DIM_ALLOC(0, 2, 2, 0), DIM_ALLOC(0, 1, 2, 1), "pad"); |
1799 | 1 | ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(pad, MODEL_IO_LIST(input0)); |
1800 | 1 | ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum"); |
1801 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out0, input1)); |
1802 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(out), 0, "tiny"); |
1803 | | |
1804 | 1 | ccv_nnc_tensor_t* const x0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 3, 3, 10), 0); |
1805 | 1 | dsfmt_t dsfmt; |
1806 | 1 | int i; |
1807 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
1808 | 91 | for (i = 0; i < 3 * 3 * 10; i++90 ) |
1809 | 90 | x0->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
1810 | 1 | ccv_nnc_tensor_t* const x1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 6, 7, 11), 0); |
1811 | 463 | for (i = 0; i < 6 * 7 * 11; i++462 ) |
1812 | 462 | x1->data.f32[i] = 1; |
1813 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 6, 7, 11), 0); |
1814 | 1 | ccv_nnc_tensor_param_t input0_params = CPU_TENSOR_NHWC(32F, 1, 3, 3, 10); |
1815 | 1 | ccv_nnc_tensor_param_t input1_params = CPU_TENSOR_NHWC(32F, 1, 6, 7, 11); |
1816 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input0_params, input1_params), CMD_NOOP(), CMD_NOOP()); |
1817 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){ |
1818 | 1 | .requires_grad = 0, |
1819 | 1 | }, TENSOR_LIST(x0, x1), TENSOR_LIST(y), 0, 0); |
1820 | 1 | int j, k; |
1821 | 1 | ccv_nnc_tensor_t* const y0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 6, 7, 11), 0); |
1822 | 7 | for (i = 0; i < 6; i++6 ) |
1823 | 48 | for (j = 0; 6 j < 7; j++42 ) |
1824 | 504 | for (k = 0; 42 k < 11; k++462 ) |
1825 | 462 | y0->data.f32[i * 7 * 11 + j * 11 + k] = (i >= 2 && i < 5308 && j >=2231 && j < 5165 && k < 1099 ) ? 1 + x0->data.f32[(i - 2) * 3 * 10 + (j - 2) * 10 + k]90 : 1372 ; |
1826 | 1 | REQUIRE_TENSOR_EQ(y, y0, "it should be padded"); |
1827 | 1 | CNNP_MODEL_GEN(pad, CCV_NNC_LONG_DOT_GRAPH); |
1828 | 1 | ccv_nnc_tensor_free(x0); |
1829 | 1 | ccv_nnc_tensor_free(x1); |
1830 | 1 | ccv_nnc_tensor_free(y); |
1831 | 1 | ccv_nnc_tensor_free(y0); |
1832 | 1 | ccv_cnnp_model_free(final); |
1833 | 1 | } |
1834 | | |
1835 | | TEST_CASE("use move semantics to write output to the empty space of the input tensor") |
1836 | 1 | { |
1837 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
1838 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1839 | 1 | ccv_cnnp_model_io_t input0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(0), DIM_ALLOC(1), "first reshape"), MODEL_IO_LIST(input)); |
1840 | 1 | ccv_cnnp_model_io_t input1 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(1), DIM_ALLOC(1), "second reshape"), MODEL_IO_LIST(input)); |
1841 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input0)); |
1842 | 1 | ccv_cnnp_model_io_t move0 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out1, input1)); |
1843 | 1 | const ccv_cnnp_model_io_t input2 = ccv_cnnp_input(); |
1844 | 1 | ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(ccv_cnnp_sum("sum"), MODEL_IO_LIST(move0, input2)); |
1845 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, input2), MODEL_IO_LIST(out1_final), 0, "tiny"); |
1846 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0); |
1847 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1848 | 1 | ccv_nnc_tensor_t* const z = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1849 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 2); |
1850 | 1 | ccv_nnc_tensor_param_t input2_params = CPU_TENSOR_NHWC(32F, 1); |
1851 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params, input2_params), CMD_NOOP(), CMD_NOOP()); |
1852 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1853 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1854 | 1 | t->data.f32[0] = 2.4; |
1855 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t); |
1856 | 1 | x->data.f32[0] = 10; |
1857 | 1 | x->data.f32[1] = 0; |
1858 | 1 | y->data.f32[0] = 3; |
1859 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, y), TENSOR_LIST(z), 0, 0); |
1860 | 1 | REQUIRE_EQ_WITH_TOLERANCE(z->data.f32[0], 2.4 * 10 + 3, 1e-5, "should be equal to expected value"); |
1861 | 1 | REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[1], 2.4 * 10, 1e-5, "should be equal to expected value"); |
1862 | 1 | ccv_nnc_tensor_free(x); |
1863 | 1 | ccv_nnc_tensor_free(t); |
1864 | 1 | ccv_nnc_tensor_free(y); |
1865 | 1 | ccv_nnc_tensor_free(z); |
1866 | 1 | ccv_cnnp_model_free(final); |
1867 | 1 | } |
1868 | | |
1869 | | TEST_CASE("use variable and move semantics to co-locate input in the same tensor") |
1870 | 1 | { |
1871 | 1 | const ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); |
1872 | 1 | const ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); |
1873 | 1 | ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1874 | 1 | ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0)); |
1875 | 1 | ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input1)); |
1876 | 1 | ccv_cnnp_model_io_t var = ccv_cnnp_model_apply(ccv_cnnp_variable(CPU_TENSOR_NHWC(32F, 2), "var"), MODEL_IO_LIST()); |
1877 | 1 | ccv_cnnp_model_io_t var0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(0), DIM_ALLOC(1), "first reshape"), MODEL_IO_LIST(var)); |
1878 | 1 | ccv_cnnp_model_io_t var1 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(1), DIM_ALLOC(1), "second reshape"), MODEL_IO_LIST(var)); |
1879 | 1 | ccv_cnnp_model_io_t move0 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out0, var0)); |
1880 | 1 | ccv_cnnp_model_io_t move1 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out1, var1)); |
1881 | 1 | ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 0, 1, "linear"); |
1882 | 1 | ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(var)); |
1883 | 1 | ccv_cnnp_model_add_dependencies(out1_final, MODEL_IO_LIST(move0, move1)); |
1884 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(out1_final), 0, "tiny"); |
1885 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1886 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1887 | 1 | ccv_nnc_tensor_t* const z = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1888 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1); |
1889 | 1 | ccv_nnc_tensor_param_t input2_params = CPU_TENSOR_NHWC(32F, 1); |
1890 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params, input2_params), CMD_NOOP(), CMD_NOOP()); |
1891 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1892 | 1 | ccv_nnc_tensor_t* const t0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
1893 | 1 | t0->data.f32[0] = 2.4; |
1894 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear0, ALL_PARAMETERS, 0), t0); |
1895 | 1 | ccv_nnc_tensor_t* const t1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0); |
1896 | 1 | t1->data.f32[0] = -1.1; |
1897 | 1 | t1->data.f32[1] = 1.2; |
1898 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear1, ALL_PARAMETERS, 0), t1); |
1899 | 1 | x->data.f32[0] = 10; |
1900 | 1 | y->data.f32[0] = 3; |
1901 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, y), TENSOR_LIST(z), 0, 0); |
1902 | 1 | REQUIRE_EQ_WITH_TOLERANCE(z->data.f32[0], -1.1 * 2.4 * 10 + 3 * 2.4 * 1.2, 1e-5, "should be equal to expected value"); |
1903 | 1 | ccv_nnc_tensor_free(x); |
1904 | 1 | ccv_nnc_tensor_free(t0); |
1905 | 1 | ccv_nnc_tensor_free(t1); |
1906 | 1 | ccv_nnc_tensor_free(y); |
1907 | 1 | ccv_nnc_tensor_free(z); |
1908 | 1 | ccv_cnnp_model_free(final); |
1909 | 1 | } |
1910 | | |
1911 | | TEST_CASE("use contiguous to make certain tensor contiguous during model inference") |
1912 | 1 | { |
1913 | 1 | const ccv_cnnp_model_io_t x = ccv_cnnp_input(); |
1914 | 1 | ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(4, 1, 0, 1, "linear"); |
1915 | 1 | ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(x)); |
1916 | | // Get the middle 2, and then apply GELU, which in Float32 / CPU, requires to be contiguous for now. |
1917 | 1 | ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(2, 2), DIM_ALLOC(0, 2), DIM_ALLOC(4, 1), "reshape"), MODEL_IO_LIST(y)); |
1918 | | /* Using just data transfer is not enough. |
1919 | | ccv_cnnp_model_io_t moved = ccv_cnnp_model_apply(ccv_cnnp_variable(CPU_TENSOR_NHWC(32F, 2, 2), 0), MODEL_IO_LIST()); |
1920 | | ccv_cnnp_model_io_t y_copied = ccv_cnnp_model_apply(ccv_cnnp_move(0), MODEL_IO_LIST(y0, moved)); |
1921 | | ccv_cnnp_model_io_t z = ccv_cnnp_model_apply(ccv_cnnp_sigmoid("sigmoid"), MODEL_IO_LIST(y_copied)); |
1922 | | */ |
1923 | | // Have to use the new contiguous model. |
1924 | 1 | ccv_cnnp_model_io_t y_copied = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0)); |
1925 | 1 | ccv_cnnp_model_io_t z = ccv_cnnp_model_apply(ccv_cnnp_sigmoid("sigmoid"), MODEL_IO_LIST(y_copied)); |
1926 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 1), 0); |
1927 | 1 | ccv_nnc_tensor_t* const z_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0); |
1928 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(z), 0, "tiny"); |
1929 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP()); |
1930 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1931 | 1 | ccv_nnc_tensor_t* const t0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0); |
1932 | 1 | t0->data.f32[0] = 2.4; |
1933 | 1 | t0->data.f32[1] = -0.4; |
1934 | 1 | t0->data.f32[2] = 1.2; |
1935 | 1 | t0->data.f32[3] = -3.6; |
1936 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear0, ALL_PARAMETERS, 0), t0); |
1937 | 1 | x_tensor->data.f32[0] = 1; |
1938 | 1 | x_tensor->data.f32[1] = -1; |
1939 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(z_tensor), 0, 0); |
1940 | 1 | REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[0], 1.0 / (1.0 + exp(-1.2)), 1e-5, "should be equal to expected value"); |
1941 | 1 | REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[1], 1.0 / (1.0 + exp(3.6)), 1e-5, "should be equal to expected value"); |
1942 | 1 | REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[2], 1.0 / (1.0 + exp(1.2)), 1e-5, "should be equal to expected value"); |
1943 | 1 | REQUIRE_EQ_WITH_TOLERANCE(z_tensor->data.f32[3], 1.0 / (1.0 + exp(-3.6)), 1e-5, "should be equal to expected value"); |
1944 | 1 | ccv_nnc_tensor_free(x_tensor); |
1945 | 1 | ccv_nnc_tensor_free(t0); |
1946 | 1 | ccv_nnc_tensor_free(z_tensor); |
1947 | 1 | ccv_cnnp_model_free(final); |
1948 | 1 | } |
1949 | | |
1950 | | TEST_CASE("chunk a tensor into several smaller ones, variant 1") |
1951 | 1 | { |
1952 | 1 | const ccv_cnnp_model_io_t x = ccv_cnnp_input(); |
1953 | 1 | ccv_cnnp_model_t* const chunk = ccv_cnnp_chunk(2, 1, "chunk"); |
1954 | 1 | ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(chunk, MODEL_IO_LIST(x)); |
1955 | 1 | ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(y)); |
1956 | 1 | ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0)); |
1957 | 1 | ccv_cnnp_model_io_t y1 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(y)); |
1958 | 1 | ccv_cnnp_model_io_t o1 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y1)); |
1959 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(o0, o1), 0, "tiny"); |
1960 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 4), 0); |
1961 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP()); |
1962 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
1963 | 1 | x_tensor->data.f32[0] = 1; |
1964 | 1 | x_tensor->data.f32[1] = -1; |
1965 | 1 | x_tensor->data.f32[2] = 2; |
1966 | 1 | x_tensor->data.f32[3] = 3; |
1967 | 1 | x_tensor->data.f32[4] = 4; |
1968 | 1 | x_tensor->data.f32[5] = 5; |
1969 | 1 | x_tensor->data.f32[6] = 6; |
1970 | 1 | x_tensor->data.f32[7] = 7; |
1971 | 1 | ccv_nnc_tensor_t* const y0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0); |
1972 | 1 | ccv_nnc_tensor_t* const y1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0); |
1973 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(y0_tensor, y1_tensor), 0, 0); |
1974 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[0], 1, 1e-5, "should be equal to expected value"); |
1975 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[1], -1, 1e-5, "should be equal to expected value"); |
1976 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[2], 4, 1e-5, "should be equal to expected value"); |
1977 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[3], 5, 1e-5, "should be equal to expected value"); |
1978 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[0], 2, 1e-5, "should be equal to expected value"); |
1979 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[1], 3, 1e-5, "should be equal to expected value"); |
1980 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[2], 6, 1e-5, "should be equal to expected value"); |
1981 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[3], 7, 1e-5, "should be equal to expected value"); |
1982 | 1 | ccv_nnc_tensor_free(x_tensor); |
1983 | 1 | ccv_nnc_tensor_free(y0_tensor); |
1984 | 1 | ccv_nnc_tensor_free(y1_tensor); |
1985 | 1 | ccv_cnnp_model_free(final); |
1986 | 1 | } |
1987 | | |
1988 | | TEST_CASE("chunk a tensor into several smaller ones, variant 2") |
1989 | 1 | { |
1990 | 1 | const ccv_cnnp_model_io_t x = ccv_cnnp_input(); |
1991 | 1 | ccv_cnnp_model_t* const chunk = ccv_cnnp_chunk(2, 0, "chunk"); |
1992 | 1 | ccv_cnnp_model_io_t y = ccv_cnnp_model_apply(chunk, MODEL_IO_LIST(x)); |
1993 | 1 | ccv_cnnp_model_io_t y0 = ccv_cnnp_model_apply(ccv_cnnp_extract(0, "index0"), MODEL_IO_LIST(y)); |
1994 | 1 | ccv_cnnp_model_io_t o0 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y0)); |
1995 | 1 | ccv_cnnp_model_io_t y1 = ccv_cnnp_model_apply(ccv_cnnp_extract(1, "index1"), MODEL_IO_LIST(y)); |
1996 | 1 | ccv_cnnp_model_io_t o1 = ccv_cnnp_model_apply(ccv_cnnp_contiguous(0), MODEL_IO_LIST(y1)); |
1997 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(x), MODEL_IO_LIST(o0, o1), 0, "tiny"); |
1998 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 4), 0); |
1999 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(x_tensor->info), CMD_NOOP(), CMD_NOOP()); |
2000 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
2001 | 1 | x_tensor->data.f32[0] = 1; |
2002 | 1 | x_tensor->data.f32[1] = -1; |
2003 | 1 | x_tensor->data.f32[2] = 2; |
2004 | 1 | x_tensor->data.f32[3] = 3; |
2005 | 1 | x_tensor->data.f32[4] = 4; |
2006 | 1 | x_tensor->data.f32[5] = 5; |
2007 | 1 | x_tensor->data.f32[6] = 6; |
2008 | 1 | x_tensor->data.f32[7] = 7; |
2009 | 1 | ccv_nnc_tensor_t* const y0_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 4), 0); |
2010 | 1 | ccv_nnc_tensor_t* const y1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 4), 0); |
2011 | 1 | ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x_tensor), TENSOR_LIST(y0_tensor, y1_tensor), 0, 0); |
2012 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[0], 1, 1e-5, "should be equal to expected value"); |
2013 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[1], -1, 1e-5, "should be equal to expected value"); |
2014 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[2], 2, 1e-5, "should be equal to expected value"); |
2015 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y0_tensor->data.f32[3], 3, 1e-5, "should be equal to expected value"); |
2016 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[0], 4, 1e-5, "should be equal to expected value"); |
2017 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[1], 5, 1e-5, "should be equal to expected value"); |
2018 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[2], 6, 1e-5, "should be equal to expected value"); |
2019 | 1 | REQUIRE_EQ_WITH_TOLERANCE(y1_tensor->data.f32[3], 7, 1e-5, "should be equal to expected value"); |
2020 | 1 | ccv_nnc_tensor_free(x_tensor); |
2021 | 1 | ccv_nnc_tensor_free(y0_tensor); |
2022 | 1 | ccv_nnc_tensor_free(y1_tensor); |
2023 | 1 | ccv_cnnp_model_free(final); |
2024 | 1 | } |
2025 | | |
2026 | | TEST_CASE("LoRA fine-tuning GEMM set is_trainable to false and with gradient checkpointing") |
2027 | 1 | { |
2028 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
2029 | 1 | ccv_cnnp_model_t* const linear = ccv_cnnp_dense(10, 1, 0, -1, "linear"); |
2030 | 1 | ccv_cnnp_model_t* const down = ccv_cnnp_dense(2, 1, 0, 1, "down"); |
2031 | 1 | ccv_cnnp_model_t* const up = ccv_cnnp_dense(10, 1, 0, 1, "up"); |
2032 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input)); |
2033 | 1 | ccv_cnnp_model_io_t out_down = ccv_cnnp_model_apply(down, MODEL_IO_LIST(input)); |
2034 | 1 | ccv_cnnp_model_io_t out_up = ccv_cnnp_model_apply(up, MODEL_IO_LIST(out_down)); |
2035 | 1 | ccv_cnnp_model_t* const add = ccv_cnnp_sum("sum"); |
2036 | 1 | ccv_cnnp_model_io_t out_final = ccv_cnnp_model_apply(add, MODEL_IO_LIST(out, out_up)); |
2037 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out_final), 0, "tiny"); |
2038 | 1 | ccv_cnnp_model_set_gradient_checkpointing(final, 1); |
2039 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
2040 | 1 | ccv_nnc_tensor_t* const tlinear = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0); |
2041 | 1 | int i; |
2042 | 101 | for (i = 0; i < 10 * 10; i++100 ) |
2043 | 100 | tlinear->data.f32[i] = (i / 10 == i % 10) ? 110 : 090 ; |
2044 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 2), 0); |
2045 | 21 | for (i = 0; i < 10 * 2; i++20 ) |
2046 | 20 | t->data.f32[i] = 0; |
2047 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
2048 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10); |
2049 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_SGD_FORWARD(1, 0.01, 1, 0.1, 0, 0), CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN)); |
2050 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), tlinear); |
2051 | 1 | ccv_nnc_tensor_free(tlinear); |
2052 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up, ALL_PARAMETERS, 0), t); |
2053 | 1 | ccv_nnc_tensor_free(t); |
2054 | 11 | for (i = 0; i < 10; i++10 ) |
2055 | 10 | x->data.f32[i] = i; |
2056 | 1 | ccv_nnc_tensor_t* const target = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
2057 | 11 | for (i = 0; i < 10; i++10 ) |
2058 | 10 | target->data.f32[i] = 10 - i; |
2059 | 11 | for (i = 0; i < 10; i++10 ) |
2060 | 10 | ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0); |
2061 | 1 | ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0); |
2062 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
2063 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, y->data.f32, target->data.f32, 10, 1e-2, "should match the target after fine-tuning"); |
2064 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable"); |
2065 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(down), 1, "should be marked as trainable"); |
2066 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(up), 1, "should be marked as trainable"); |
2067 | 1 | ccv_nnc_tensor_free(x); |
2068 | 1 | ccv_nnc_tensor_free(target); |
2069 | 1 | ccv_nnc_tensor_free(y); |
2070 | 1 | ccv_cnnp_model_free(final); |
2071 | 1 | } |
2072 | | |
2073 | | TEST_CASE("LoRA fine-tuning MLP with GELU, set is_trainable to false and with gradient checkpointing") |
2074 | 1 | { |
2075 | 1 | ccv_nnc_stream_context_set_seed(0, 47); |
2076 | 1 | const ccv_cnnp_model_io_t input = ccv_cnnp_input(); |
2077 | 1 | ccv_cnnp_model_t* const fc1 = ccv_cnnp_dense(10, 1, 0, -1, "fc1"); |
2078 | 1 | ccv_cnnp_model_t* const fc2 = ccv_cnnp_dense(10, 1, 0, -1, "fc2"); |
2079 | 1 | ccv_cnnp_model_t* const down_fc1 = ccv_cnnp_dense(2, 1, 0, 1, "down_fc1"); |
2080 | 1 | ccv_cnnp_model_t* const up_fc1 = ccv_cnnp_dense(10, 1, 0, 1, "up_fc1"); |
2081 | 1 | ccv_cnnp_model_t* const down_fc2 = ccv_cnnp_dense(2, 1, 0, 1, "down_fc2"); |
2082 | 1 | ccv_cnnp_model_t* const up_fc2 = ccv_cnnp_dense(10, 1, 0, 1, "up_fc2"); |
2083 | 1 | ccv_cnnp_model_t* const fc3 = ccv_cnnp_dense(5, 1, 0, -1, "fc3"); |
2084 | 1 | ccv_cnnp_model_t* const down_fc3 = ccv_cnnp_dense(2, 1, 0, 1, "down_fc3"); |
2085 | 1 | ccv_cnnp_model_t* const up_fc3 = ccv_cnnp_dense(5, 1, 0, 1, "up_fc3"); |
2086 | 1 | ccv_cnnp_model_io_t out_fc1 = ccv_cnnp_model_apply(fc1, MODEL_IO_LIST(input)); |
2087 | 1 | ccv_cnnp_model_io_t out_fc2 = ccv_cnnp_model_apply(fc2, MODEL_IO_LIST(input)); |
2088 | 1 | ccv_cnnp_model_io_t out_down_fc1 = ccv_cnnp_model_apply(down_fc1, MODEL_IO_LIST(input)); |
2089 | 1 | ccv_cnnp_model_io_t out_up_fc1 = ccv_cnnp_model_apply(up_fc1, MODEL_IO_LIST(out_down_fc1)); |
2090 | 1 | ccv_cnnp_model_io_t out_down_fc2 = ccv_cnnp_model_apply(down_fc2, MODEL_IO_LIST(input)); |
2091 | 1 | ccv_cnnp_model_io_t out_up_fc2 = ccv_cnnp_model_apply(up_fc2, MODEL_IO_LIST(out_down_fc2)); |
2092 | 1 | ccv_cnnp_model_io_t out_sum_fc1 = ccv_cnnp_model_apply(ccv_cnnp_sum("sum_fc1"), MODEL_IO_LIST(out_fc1, out_up_fc1)); |
2093 | 1 | ccv_cnnp_model_io_t out_sum_fc2 = ccv_cnnp_model_apply(ccv_cnnp_sum("sum_fc2"), MODEL_IO_LIST(out_fc2, out_up_fc2)); |
2094 | 1 | ccv_cnnp_model_io_t out_gelu_fc2 = ccv_cnnp_model_apply(ccv_cnnp_gelu(0, "gelu_fc2"), MODEL_IO_LIST(out_sum_fc2)); |
2095 | 1 | ccv_cnnp_model_io_t out_mul_fc12 = ccv_cnnp_model_apply(ccv_cnnp_mul(1, "mul_fc12"), MODEL_IO_LIST(out_sum_fc1, out_gelu_fc2)); |
2096 | 1 | ccv_cnnp_model_io_t out_fc3 = ccv_cnnp_model_apply(fc3, MODEL_IO_LIST(out_mul_fc12)); |
2097 | 1 | ccv_cnnp_model_io_t out_down_fc3 = ccv_cnnp_model_apply(down_fc3, MODEL_IO_LIST(out_mul_fc12)); |
2098 | 1 | ccv_cnnp_model_io_t out_up_fc3 = ccv_cnnp_model_apply(up_fc3, MODEL_IO_LIST(out_down_fc3)); |
2099 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_sum("sum_fc3"), MODEL_IO_LIST(out_fc3, out_up_fc3)); |
2100 | 1 | ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input), MODEL_IO_LIST(out), 0, "tiny"); |
2101 | 1 | ccv_cnnp_model_set_gradient_checkpointing(final, 1); |
2102 | 1 | ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); |
2103 | 1 | ccv_nnc_tensor_t* const tlinear = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 10), 0); |
2104 | 1 | int i; |
2105 | 101 | for (i = 0; i < 10 * 10; i++100 ) |
2106 | 100 | tlinear->data.f32[i] = (i / 10 == i % 10) ? 110 : 090 ; |
2107 | 1 | ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 2), 0); |
2108 | 21 | for (i = 0; i < 10 * 2; i++20 ) |
2109 | 20 | t->data.f32[i] = 0; |
2110 | 1 | ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5), 0); |
2111 | 1 | ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10); |
2112 | 1 | ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params), CMD_SGD_FORWARD(1, 0.001, 1, 0.1, 0, 0), CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN)); |
2113 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(fc1, ALL_PARAMETERS, 0), tlinear); |
2114 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(fc2, ALL_PARAMETERS, 0), tlinear); |
2115 | 1 | ccv_nnc_tensor_free(tlinear); |
2116 | 1 | ccv_nnc_tensor_t* const tlinear_fc3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 10), 0); |
2117 | 51 | for (i = 0; i < 5 * 10; i++50 ) |
2118 | 50 | tlinear_fc3->data.f32[i] = (i / 10 == i % 10) ? 15 : 045 ; |
2119 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(fc3, ALL_PARAMETERS, 0), tlinear_fc3); |
2120 | 1 | ccv_nnc_tensor_free(tlinear_fc3); |
2121 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up_fc1, ALL_PARAMETERS, 0), t); |
2122 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up_fc2, ALL_PARAMETERS, 0), t); |
2123 | 1 | ccv_nnc_tensor_free(t); |
2124 | 1 | ccv_nnc_tensor_t* const t_fc3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 2), 0); |
2125 | 11 | for (i = 0; i < 5 * 2; i++10 ) |
2126 | 10 | t_fc3->data.f32[i] = 0; |
2127 | 1 | ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(up_fc3, ALL_PARAMETERS, 0), t_fc3); |
2128 | 1 | ccv_nnc_tensor_free(t_fc3); |
2129 | 11 | for (i = 0; i < 10; i++10 ) |
2130 | 10 | x->data.f32[i] = i; |
2131 | 1 | ccv_nnc_tensor_t* const target = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5), 0); |
2132 | 6 | for (i = 0; i < 5; i++5 ) |
2133 | 5 | target->data.f32[i] = 5 - i; |
2134 | 101 | for (i = 0; i < 100; i++100 ) |
2135 | 100 | ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0); |
2136 | 1 | ccv_cnnp_model_fit(final, TENSOR_LIST(x), TENSOR_LIST(target), TENSOR_LIST(y), 0, 0); |
2137 | 1 | CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH); |
2138 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, y->data.f32, target->data.f32, 5, 1e-1, "should match the target after fine-tuning"); |
2139 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(final), 0, "should be marked as not trainable"); |
2140 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(down_fc1), 1, "should be marked as trainable"); |
2141 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(up_fc1), 1, "should be marked as trainable"); |
2142 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(down_fc2), 1, "should be marked as trainable"); |
2143 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(up_fc2), 1, "should be marked as trainable"); |
2144 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(down_fc3), 1, "should be marked as trainable"); |
2145 | 1 | REQUIRE_EQ(ccv_cnnp_model_is_trainable(up_fc3), 1, "should be marked as trainable"); |
2146 | 1 | ccv_nnc_tensor_free(x); |
2147 | 1 | ccv_nnc_tensor_free(target); |
2148 | 1 | ccv_nnc_tensor_free(y); |
2149 | 1 | ccv_cnnp_model_free(final); |
2150 | 1 | } |
2151 | | |
2152 | | #include "case_main.h" |