Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 576, column 1
Potential leak of memory pointed to by 'trainable_pos'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model static -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -target-feature +sse2 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -resource-dir /usr/local/lib/clang/8.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_UCONTEXT -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -I /usr/local/include -internal-isystem /usr/local/include -internal-isystem /usr/local/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir /home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fmessage-length 0 -fblocks -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -o /home/liu/buildslave/public_html/analyze/2019-07-03-215927-77989-1 -x c ccv_cnnp_model.c -faddrsig
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6
7#pragma mark - Level-5 API
8
9static const ccv_cnnp_model_vtab_t ccv_cnnp_input_isa;
10
11#define CCV_CNNP_IS_MODEL_INPUT(x)((x)->isa == &ccv_cnnp_input_isa) ((x)->isa == &ccv_cnnp_input_isa)
12
13struct ccv_cnnp_model_io_s {
14 int visit; // Temporary bits stored in the ccv_cnnp_model_io_t object, whoever uses it should clean it up.
15 ccv_cnnp_model_t* model; // Reference back to the model who holds it. This is required because the model is the one whole holds the io.
16 ccv_array_t* incomings; // Array of ccv_cnnp_model_io_t. The order is important because it impacts the order of symbols.
17 ccv_array_t* outgoings; // Array of ccv_cnnp_model_io_t.
18 ccv_nnc_tensor_symbol_t* outputs; // This is different from the outputs from a model. A model could be reused, causing the outputs on that model to be the most recent one. This keeps the outputs of each.
19};
20
21typedef struct {
22 ccv_cnnp_model_t super;
23 int sequence_size;
24 ccv_cnnp_model_t* sequence[1];
25} ccv_cnnp_sequential_model_t;
26
27static void _ccv_cnnp_sequential_model_deinit(ccv_cnnp_model_t* const super)
28{
29 ccv_cnnp_sequential_model_t* const self = (ccv_cnnp_sequential_model_t*)super;
30 int i;
31 for (i = 0; i < self->sequence_size; i++)
32 ccv_cnnp_model_free(self->sequence[i]);
33}
34
35static void _ccv_cnnp_sequential_model_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
36{
37 ccv_cnnp_sequential_model_t* const self = (ccv_cnnp_sequential_model_t*)super;
38 int i;
39 ccv_nnc_tensor_symbol_t input = inputs[0];
40 assert(input_size == 1)((void) sizeof ((input_size == 1) ? 1 : 0), __extension__ ({ if
(input_size == 1) ; else __assert_fail ("input_size == 1", "ccv_cnnp_model.c"
, 40, __extension__ __PRETTY_FUNCTION__); }))
;
41 for (i = 0; i < self->sequence_size; i++)
42 {
43 ccv_nnc_tensor_symbol_t output;
44 ccv_cnnp_model_t* const sub_model = self->sequence[i];
45 // Go through each sub model to build the graph.
46 ccv_cnnp_model_build(sub_model, graph, &input, 1, &output, 1);
47 input = output;
48 }
49 outputs[0] = input;
50}
51
52static void _ccv_cnnp_sequential_model_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
53{
54 ccv_cnnp_sequential_model_t* const self = (ccv_cnnp_sequential_model_t*)super;
55 int i;
56 for (i = 0; i < self->sequence_size; i++)
57 ccv_cnnp_model_init_states(self->sequence[i], graph, initializer, context);
58}
59
60static void _ccv_cnnp_sequential_model_add_to_trainable(ccv_cnnp_model_t* const super, ccv_array_t* const trainables)
61{
62 ccv_cnnp_sequential_model_t* const self = (ccv_cnnp_sequential_model_t*)super;
63 int i;
64 for (i = 0; i < self->sequence_size; i++)
65 ccv_cnnp_model_add_to_trainable(self->sequence[i], trainables);
66}
67
68static void _ccv_cnnp_sequential_model_add_to_output(ccv_cnnp_model_t* const super, ccv_array_t* const outputs)
69{
70 ccv_cnnp_sequential_model_t* const self = (ccv_cnnp_sequential_model_t*)super;
71 int i;
72 for (i = 0; i < self->sequence_size; i++)
73 ccv_cnnp_model_add_to_output(self->sequence[i], outputs);
74}
75
76static void _ccv_cnnp_sequential_model_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
77{
78 ccv_cnnp_sequential_model_t* const self = (ccv_cnnp_sequential_model_t*)super;
79 int i;
80 for (i = 0; i < self->sequence_size; i++)
81 ccv_cnnp_model_set_is_test(self->sequence[i], is_test, updater, context);
82}
83
84static const ccv_cnnp_model_vtab_t ccv_cnnp_sequential_model_isa = {
85 .deinit = _ccv_cnnp_sequential_model_deinit,
86 .build = _ccv_cnnp_sequential_model_build,
87 .init_states = _ccv_cnnp_sequential_model_init_states,
88 .add_to_trainable = _ccv_cnnp_sequential_model_add_to_trainable,
89 .add_to_output = _ccv_cnnp_sequential_model_add_to_output,
90 .set_is_test = _ccv_cnnp_sequential_model_set_is_test,
91};
92
93ccv_cnnp_model_t* ccv_cnnp_sequential_new(ccv_cnnp_model_t* const* const models, const int model_size)
94{
95 assert(model_size > 0)((void) sizeof ((model_size > 0) ? 1 : 0), __extension__ (
{ if (model_size > 0) ; else __assert_fail ("model_size > 0"
, "ccv_cnnp_model.c", 95, __extension__ __PRETTY_FUNCTION__);
}))
;
96 ccv_cnnp_sequential_model_t* const sequential_model = (ccv_cnnp_sequential_model_t*)cccalloccalloc(1, sizeof(ccv_cnnp_sequential_model_t) + sizeof(ccv_cnnp_model_t*) * (model_size - 1) + sizeof(ccv_nnc_tensor_symbol_t));
97 sequential_model->super.isa = &ccv_cnnp_sequential_model_isa;
98 sequential_model->super.input_size = 1;
99 sequential_model->super.outputs = (ccv_nnc_tensor_symbol_t*)(sequential_model->sequence + model_size);
100 sequential_model->super.output_size = 1;
101 sequential_model->sequence_size = model_size;
102 memcpy(sequential_model->sequence, models, sizeof(ccv_cnnp_model_t*) * model_size);
103 return (ccv_cnnp_model_t*)sequential_model;
104}
105
106typedef struct {
107 ccv_cnnp_model_t super;
108 // The name is similar to sequential model, but it is just topological sorted models.
109 int sequence_size;
110 ccv_cnnp_model_io_t sequence[1];
111} ccv_cnnp_functional_model_t;
112
113static void _ccv_cnnp_functional_model_deinit(ccv_cnnp_model_t* const super)
114{
115 ccv_cnnp_functional_model_t* const self = (ccv_cnnp_functional_model_t*)super;
116 int i, j = 0, k;
117 for (i = 0; i < self->sequence_size; i++)
118 {
119 ccv_cnnp_model_t* const model = self->sequence[i]->model;
120 if (!model)
121 continue;
122 self->sequence[j++] = (ccv_cnnp_model_io_t)model;
123 // Go through all their IO to remove itself as model.
124 assert(model->io)((void) sizeof ((model->io) ? 1 : 0), __extension__ ({ if (
model->io) ; else __assert_fail ("model->io", "ccv_cnnp_model.c"
, 124, __extension__ __PRETTY_FUNCTION__); }))
;
125 for (k = 0; k < model->io->rnum; k++)
126 {
127 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, k)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(k)))
;
128 model_io->model = 0;
129 }
130 }
131 for (i = 0; i < j; i++)
132 ccv_cnnp_model_free((ccv_cnnp_model_t*)self->sequence[i]);
133}
134
135static void _ccv_cnnp_functional_model_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
136{
137 ccv_cnnp_functional_model_t* const self = (ccv_cnnp_functional_model_t*)super;
138 assert(self->super.input_size == input_size)((void) sizeof ((self->super.input_size == input_size) ? 1
: 0), __extension__ ({ if (self->super.input_size == input_size
) ; else __assert_fail ("self->super.input_size == input_size"
, "ccv_cnnp_model.c", 138, __extension__ __PRETTY_FUNCTION__)
; }))
;
139 assert(self->super.output_size == output_size)((void) sizeof ((self->super.output_size == output_size) ?
1 : 0), __extension__ ({ if (self->super.output_size == output_size
) ; else __assert_fail ("self->super.output_size == output_size"
, "ccv_cnnp_model.c", 139, __extension__ __PRETTY_FUNCTION__)
; }))
;
140 int i, j, k;
141 for (i = 0; i < self->super.input_size; i++)
142 self->sequence[i]->outputs[0] = self->sequence[i]->model->outputs[0] = inputs[i]; // Assigning the output symbol of input layer to be the input symbol.
143 ccv_array_t* input_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 1, 0);
144 for (i = self->super.input_size; i < self->sequence_size; i++)
145 {
146 ccv_cnnp_model_t* const sub_model = self->sequence[i]->model;
147 ccv_array_clear(input_symbols);
148 const ccv_array_t* const incomings = self->sequence[i]->incomings;
149 for (j = 0; j < incomings->rnum; j++)
150 {
151 const ccv_cnnp_model_io_t input = *(ccv_cnnp_model_io_t*)ccv_array_get(incomings, j)((void*)(((char*)((incomings)->data)) + (size_t)(incomings
)->rsize * (size_t)(j)))
;
152 for (k = 0; k < input->model->output_size; k++)
153 ccv_array_push(input_symbols, &input->outputs[k]);
154 }
155 // Go through each sub model to build the graph.
156 ccv_cnnp_model_build(sub_model, graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(input_symbols, 0)((void*)(((char*)((input_symbols)->data)) + (size_t)(input_symbols
)->rsize * (size_t)(0)))
, input_symbols->rnum, self->sequence[i]->outputs, sub_model->output_size);
157 }
158 ccv_array_free(input_symbols);
159 for (i = output_size, k = self->sequence_size - 1; k >= 0; k--)
160 {
161 ccv_cnnp_model_t* const sub_model = self->sequence[k]->model;
162 i -= sub_model->output_size;
163 if (i < 0)
164 break;
165 for (j = 0; j < sub_model->output_size; j++)
166 outputs[i + j] = self->sequence[k]->outputs[j];
167 }
168 assert(i <= 0)((void) sizeof ((i <= 0) ? 1 : 0), __extension__ ({ if (i <=
0) ; else __assert_fail ("i <= 0", "ccv_cnnp_model.c", 168
, __extension__ __PRETTY_FUNCTION__); }))
;
169}
170
171static void _ccv_cnnp_functional_model_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
172{
173 ccv_cnnp_functional_model_t* const self = (ccv_cnnp_functional_model_t*)super;
174 int i;
175 for (i = self->super.input_size; i < self->sequence_size; i++)
176 ccv_cnnp_model_init_states(self->sequence[i]->model, graph, initializer, context);
177}
178
179static void _ccv_cnnp_functional_model_add_to_trainable(ccv_cnnp_model_t* const super, ccv_array_t* const trainables)
180{
181 ccv_cnnp_functional_model_t* const self = (ccv_cnnp_functional_model_t*)super;
182 int i;
183 for (i = self->super.input_size; i < self->sequence_size; i++)
184 ccv_cnnp_model_add_to_trainable(self->sequence[i]->model, trainables);
185}
186
187static void _ccv_cnnp_functional_model_add_to_output(ccv_cnnp_model_t* const super, ccv_array_t* const outputs)
188{
189 ccv_cnnp_functional_model_t* const self = (ccv_cnnp_functional_model_t*)super;
190 int i;
191 for (i = self->super.input_size; i < self->sequence_size; i++)
192 ccv_cnnp_model_add_to_output(self->sequence[i]->model, outputs);
193}
194
195static void _ccv_cnnp_functional_model_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
196{
197 ccv_cnnp_functional_model_t* const self = (ccv_cnnp_functional_model_t*)super;
198 int i;
199 for (i = self->super.input_size; i < self->sequence_size; i++)
200 ccv_cnnp_model_set_is_test(self->sequence[i]->model, is_test, updater, context);
201}
202
203static const ccv_cnnp_model_vtab_t ccv_cnnp_functional_model_isa = {
204 .deinit = _ccv_cnnp_functional_model_deinit,
205 .build = _ccv_cnnp_functional_model_build,
206 .init_states = _ccv_cnnp_functional_model_init_states,
207 .add_to_trainable = _ccv_cnnp_functional_model_add_to_trainable,
208 .add_to_output = _ccv_cnnp_functional_model_add_to_output,
209 .set_is_test = _ccv_cnnp_functional_model_set_is_test,
210};
211
212ccv_cnnp_model_t* ccv_cnnp_model_new(const ccv_cnnp_model_io_t* const inputs, const int input_size, const ccv_cnnp_model_io_t* const outputs, const int output_size)
213{
214 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 214, __extension__ __PRETTY_FUNCTION__)
; }))
;
215 // Do topological sort.
216 ccv_array_t* const reverse_top = ccv_array_new(sizeof(ccv_cnnp_model_io_t), output_size, 0);
217 ccv_array_resize(reverse_top, output_size);
218 memcpy(ccv_array_get(reverse_top, 0)((void*)(((char*)((reverse_top)->data)) + (size_t)(reverse_top
)->rsize * (size_t)(0)))
, outputs, sizeof(ccv_cnnp_model_io_t) * output_size);
219 // Go from the output, until we meet inputs.
220 int i, j, input_count = 0;
221 int tensor_output_size = 0; // io can be mapped to multiple tensor outputs, therefore, need to compute the exact tensor output size.
222 for (i = 0; i < output_size; i++)
223 tensor_output_size += outputs[i]->model->output_size;
224 for (i = 0; i < reverse_top->rnum; i++)
225 {
226 const ccv_cnnp_model_io_t output = *(ccv_cnnp_model_io_t*)ccv_array_get(reverse_top, i)((void*)(((char*)((reverse_top)->data)) + (size_t)(reverse_top
)->rsize * (size_t)(i)))
;
227 assert(!CCV_CNNP_IS_MODEL_INPUT(output->model))((void) sizeof ((!((output->model)->isa == &ccv_cnnp_input_isa
)) ? 1 : 0), __extension__ ({ if (!((output->model)->isa
== &ccv_cnnp_input_isa)) ; else __assert_fail ("!CCV_CNNP_IS_MODEL_INPUT(output->model)"
, "ccv_cnnp_model.c", 227, __extension__ __PRETTY_FUNCTION__)
; }))
;
228 // If it is input, push it here.
229 if (output->incomings)
230 for (j = 0; j < output->incomings->rnum; j++)
231 {
232 const ccv_cnnp_model_io_t input = *(ccv_cnnp_model_io_t*)ccv_array_get(output->incomings, j)((void*)(((char*)((output->incomings)->data)) + (size_t
)(output->incomings)->rsize * (size_t)(j)))
;
233 ++input->visit; // Mark it as visited.
234 if (input->visit != input->outgoings->rnum) // Not all dependencies visited.
235 continue;
236 if (!CCV_CNNP_IS_MODEL_INPUT(input->model)((input->model)->isa == &ccv_cnnp_input_isa))
237 ccv_array_push(reverse_top, &input);
238 else
239 ++input_count;
240 }
241 }
242 for (i = 0; i < reverse_top->rnum; i++)
243 {
244 const ccv_cnnp_model_io_t output = *(ccv_cnnp_model_io_t*)ccv_array_get(reverse_top, i)((void*)(((char*)((reverse_top)->data)) + (size_t)(reverse_top
)->rsize * (size_t)(i)))
;
245 output->visit = 0; // Clean the visit back.
246 }
247 for (i = 0; i < input_size; i++)
248 inputs[i]->visit = 0; // Clean the visit back.
249 assert(input_count == input_size)((void) sizeof ((input_count == input_size) ? 1 : 0), __extension__
({ if (input_count == input_size) ; else __assert_fail ("input_count == input_size"
, "ccv_cnnp_model.c", 249, __extension__ __PRETTY_FUNCTION__)
; }))
; // Assuming they all match.
250 const int sequence_size = reverse_top->rnum + input_size;
251 ccv_cnnp_functional_model_t* const functional_model = (ccv_cnnp_functional_model_t*)cccalloccalloc(1, sizeof(ccv_cnnp_functional_model_t) + sizeof(ccv_cnnp_model_t*) * (sequence_size - 1) + sizeof(ccv_nnc_tensor_symbol_t) * tensor_output_size);
252 functional_model->super.isa = &ccv_cnnp_functional_model_isa;
253 functional_model->super.outputs = (ccv_nnc_tensor_symbol_t*)(functional_model->sequence + sequence_size);
254 functional_model->super.output_size = tensor_output_size;
255 functional_model->super.input_size = input_size;
256 functional_model->sequence_size = sequence_size;
257 memcpy(functional_model->sequence, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
258 for (i = 0; i < reverse_top->rnum; i++)
259 functional_model->sequence[input_size + i] = *(ccv_cnnp_model_io_t*)ccv_array_get(reverse_top, reverse_top->rnum - 1 - i)((void*)(((char*)((reverse_top)->data)) + (size_t)(reverse_top
)->rsize * (size_t)(reverse_top->rnum - 1 - i)))
;
260 ccv_array_free(reverse_top);
261 return (ccv_cnnp_model_t*)functional_model;
262}
263
264ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
265{
266 assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ (
{ if (input_size > 0) ; else __assert_fail ("input_size > 0"
, "ccv_cnnp_model.c", 266, __extension__ __PRETTY_FUNCTION__)
; }))
;
267 if (!model->io)
268 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
269 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
270 model_io->visit = 0;
271 model_io->model = model;
272 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
273 model_io->outgoings = 0;
274 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
275 ccv_array_push(model->io, &model_io);
276 int i;
277 ccv_array_resize(model_io->incomings, input_size);
278 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
279 for (i = 0; i < input_size; i++)
280 {
281 if (!inputs[i]->outgoings)
282 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
283 ccv_array_push(inputs[i]->outgoings, &model_io);
284 }
285 return model_io;
286}
287
288static void _ccv_nnc_array_dedup_tensor_symbols(ccv_array_t* const tensor_symbols)
289{
290 int i, j;
291 for (i = 0; i < tensor_symbols->rnum; i++)
292 {
293 ccv_nnc_tensor_symbol_t* const tensor_symbol = (ccv_nnc_tensor_symbol_t*)ccv_array_get(tensor_symbols, i)((void*)(((char*)((tensor_symbols)->data)) + (size_t)(tensor_symbols
)->rsize * (size_t)(i)))
;
294 // Check whether this tensor symbol has any duplicate.
295 for (j = i + 1; j < tensor_symbols->rnum;)
296 {
297 ccv_nnc_tensor_symbol_t* const other_symbol = (ccv_nnc_tensor_symbol_t*)ccv_array_get(tensor_symbols, j)((void*)(((char*)((tensor_symbols)->data)) + (size_t)(tensor_symbols
)->rsize * (size_t)(j)))
;
298 // If there is a same tensor symbol, remove it.
299 if (other_symbol->d == tensor_symbol->d && other_symbol->graph == tensor_symbol->graph)
300 {
301 if (j + 1 < tensor_symbols->rnum)
302 *other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(tensor_symbols, tensor_symbols->rnum - 1)((void*)(((char*)((tensor_symbols)->data)) + (size_t)(tensor_symbols
)->rsize * (size_t)(tensor_symbols->rnum - 1)))
;
303 --tensor_symbols->rnum;
304 continue;
305 }
306 ++j;
307 }
308 }
309}
310
311static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
312{
313 int i, j;
314 for (i = 0; i < graph_exec_symbol_size; i++)
315 {
316 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
317 // Check whether this tensor symbol has any duplicate.
318 for (j = i + 1; j < graph_exec_symbol_size;)
319 {
320 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
321 // If there is a same tensor symbol, remove it.
322 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
323 {
324 if (j + 1 < graph_exec_symbol_size)
325 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
326 --graph_exec_symbol_size;
327 continue;
328 }
329 ++j;
330 }
331 }
332 return graph_exec_symbol_size;
333}
334
335void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
336{
337 assert(input_size == model->input_size)((void) sizeof ((input_size == model->input_size) ? 1 : 0)
, __extension__ ({ if (input_size == model->input_size) ; else
__assert_fail ("input_size == model->input_size", "ccv_cnnp_model.c"
, 337, __extension__ __PRETTY_FUNCTION__); }))
;
338 if (!model->graph) // The graph is not compiled yet.
339 {
340 model->graph = ccv_nnc_symbolic_graph_new();
341 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
342 int i;
343 for (i = 0; i < input_size; i++)
344 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
345 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
346 ccv_array_t* const trainables = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
347 ccv_cnnp_model_add_to_trainable(model, trainables);
348 _ccv_nnc_array_dedup_tensor_symbols(trainables);
349 // Assert no trainable is alias.
350 for (i = 0; i < trainables->rnum; i++)
351 {
352 const ccv_nnc_tensor_symbol_t trainable = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(trainables, i)((void*)(((char*)((trainables)->data)) + (size_t)(trainables
)->rsize * (size_t)(i)))
;
353 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(trainable.graph, trainable);
354 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 354, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
355 }
356 ccv_array_t* const retainables = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
357 ccv_cnnp_model_add_to_output(model, retainables);
358 _ccv_nnc_array_dedup_tensor_symbols(retainables);
359 // Assert no retainable is alias.
360 for (i = 0; i < retainables->rnum; i++)
361 {
362 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(retainables, i)((void*)(((char*)((retainables)->data)) + (size_t)(retainables
)->rsize * (size_t)(i)))
;
363 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(retained.graph, retained);
364 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 364, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
365 }
366 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
367 ccv_nnc_symbolic_graph_simplify(model->graph,
368 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT
, CCV_NNC_SIMPLIFY_OPS_FUSION, CCV_NNC_SIMPLIFY_GRAPH_PRUNING
}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
369 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT
, CCV_NNC_SIMPLIFY_OPS_FUSION, CCV_NNC_SIMPLIFY_GRAPH_PRUNING
}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
370 CCV_NNC_SIMPLIFY_OPS_FUSION,(int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT
, CCV_NNC_SIMPLIFY_OPS_FUSION, CCV_NNC_SIMPLIFY_GRAPH_PRUNING
}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
371 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT
, CCV_NNC_SIMPLIFY_OPS_FUSION, CCV_NNC_SIMPLIFY_GRAPH_PRUNING
}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
,
372 model->outputs, model->output_size,
373 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
374 int flag = 0;
375 for (i = 0; !flag && i < input_size; i++)
376 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
377 model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (model->output_size - 1));
378 // If inputs are from GPU, stream type is GPU.
379 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
380 model->compiled_data->trainables = trainables;
381 model->compiled_data->retainables = retainables;
382 model->compiled_data->minimize.minimizer = minimizer;
383 model->compiled_data->loss = loss;
384 }
385}
386
387void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
388{
389 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
390 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 390, __extension__ __PRETTY_FUNCTION__); }))
;
391 if (workspace_size == compiled_data->workspace_size)
392 return;
393 compiled_data->workspace_size = workspace_size;
394 if (compiled_data->graph)
395 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
396}
397
398void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
399{
400 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
401 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 401, __extension__ __PRETTY_FUNCTION__); }))
;
402 assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 402, __extension__ __PRETTY_FUNCTION__)
; }))
;
403 if (parallel== 0)
404 compiled_data->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
405 else
406 compiled_data->parallel_count = parallel;
407}
408
409typedef struct {
410 int parallel_count;
411 ccv_nnc_symbolic_graph_t* graph;
412 ccv_nnc_tensor_arena_t* tensor_arena;
413} ccv_nnc_tensor_init_states_t;
414
415static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
416{
417 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
418 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
419 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
420 if (!output_tensor)
421 return;
422 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
423 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
424 const int parallel_count = tensor_init_states->parallel_count;
425 int i;
426 for (i = 1; i < parallel_count; i++)
427 {
428 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
429 if (copy)
430 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
431 }
432}
433
434typedef struct {
435 int parallel_count;
436 ccv_nnc_symbolic_graph_t* graph;
437 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
438} ccv_nnc_graph_exec_update_t;
439
440static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
441{
442 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
443 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
444 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
445 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
446 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
447 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
448 const int parallel_count = graph_exec_update->parallel_count;
449 int i;
450 for (i = 1; i < parallel_count; i++)
451 {
452 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
453 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
454 {
455 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
456 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
457 }
458 }
459}
460
461static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
462{
463 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 463, __extension__ __PRETTY_FUNCTION__); }))
;
464 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 464, __extension__ __PRETTY_FUNCTION__)
; }))
;
465 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
466 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 466, __extension__
__PRETTY_FUNCTION__); }))
;
467 int i;
468 for (i = 0; i < compiled_data->rewindables->rnum; i++)
469 {
470 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
471 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
472 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
473 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
474 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
475 }
476 ccv_array_clear(compiled_data->rewindables);
477 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
478}
479
480
481static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
482{
483 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
484 .type = CCV_CNNP_REWIND_TENSOR,
485 .tensor = symbol
486 };
487 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
488 ccv_array_push(rewind_symbols, &rewind_symbol);
489}
490
491static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(8)], const int inc[CCV_NNC_MAX_DIM_ALLOC(8)], const ccv_nnc_tensor_param_t info, const char* const name)
492{
493 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
494 .type = CCV_CNNP_REWIND_TENSOR,
495 .tensor = symbol
496 };
497 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
498 ccv_array_push(rewind_symbols, &rewind_symbol);
499}
500
501static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
502{
503 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
504 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
505 .graph_exec = symbol
506 };
507 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
508 ccv_array_push(rewind_symbols, &rewind_symbol);
509}
510
511static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_cnnp_model_t* const model, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
512{
513 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
514 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 514, __extension__ __PRETTY_FUNCTION__); }))
;
515 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
516 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
517 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 517, __extension__ __PRETTY_FUNCTION__); }))
;
518 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
519 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
520 if (!graph_exec_arena)
521 return;
522 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
523 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
524 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
525 int i;
526 for (i = 1; i < parallel_count; i++)
527 {
528 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
529 if (copy_symbol.graph)
530 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
531 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
532 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
533 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
534 }
535}
536
537static void _ccv_cnnp_model_set_minimizer_setter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_minimizer_set_f minimizer_setter, const void* const context)
538{
539 int i;
540 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
541 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 541, __extension__ __PRETTY_FUNCTION__); }))
;
12
Taking true branch
542 assert(minimizer_setter)((void) sizeof ((minimizer_setter) ? 1 : 0), __extension__ ({
if (minimizer_setter) ; else __assert_fail ("minimizer_setter"
, "ccv_cnnp_model.c", 542, __extension__ __PRETTY_FUNCTION__)
; }))
;
13
Taking true branch
543 const int trainable_size = compiled_data->trainables->rnum;
544 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
545 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
546 // Collect which trainable exists at which node.
547 const int tensor_symbol_count = ccv_nnc_tensor_symbol_count(symbolic_graph);
548 ccv_array_t** const trainable_pos = (ccv_array_t**)cccalloccalloc(tensor_symbol_count, sizeof(ccv_array_t*));
14
Memory is allocated
549 for (i = 0; i < trainable_size; i++)
15
Loop condition is false. Execution continues on line 551
550 trainable_pos[((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, i)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
i)))
)->d] = ccv_array_new(sizeof(ccv_cnnp_trainable_index_t), 0, 0);
551 ccv_nnc_symbolic_graph_iter_t* const iter = ccv_nnc_symbolic_graph_iter_new(symbolic_graph, 0, 0, 0, 0);
552 while (ccv_nnc_symbolic_graph_iter_next(iter)) {
16
Loop condition is false. Execution continues on line 568
553 ccv_nnc_tensor_symbol_t* inputs;
554 int input_size;
555 ccv_nnc_tensor_symbol_io_from_iter(iter, &inputs, &input_size, 0, 0);
556 for (i = 0; i < input_size; i++)
557 if (inputs[i].d >= 0 && trainable_pos[inputs[i].d])
558 {
559 ccv_nnc_cmd_t cmd;
560 ccv_nnc_graph_exec_symbol_from_iter(iter, &cmd, 0, 0, 0);
561 const ccv_cnnp_trainable_index_t trainable_index = (ccv_cnnp_trainable_index_t){
562 .cmd = cmd,
563 .index = i,
564 };
565 ccv_array_push(trainable_pos[inputs[i].d], &trainable_index);
566 }
567 }
568 ccv_nnc_symbolic_graph_iter_free(iter);
569 for (i = 0; i < trainable_size; i++)
17
Loop condition is false. Execution continues on line 569
570 {
571 ccv_array_t* const trainable_indexes = trainable_pos[((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, i)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
i)))
)->d];
572 const ccv_nnc_cmd_t cmd = minimizer_setter(model, (ccv_cnnp_trainable_index_t*)ccv_array_get(trainable_indexes, 0)((void*)(((char*)((trainable_indexes)->data)) + (size_t)(trainable_indexes
)->rsize * (size_t)(0)))
, trainable_indexes->rnum, context);
573 _ccv_cnnp_model_graph_exec_symbol_set(model, update_nodes[i], cmd);
574 ccv_array_free(trainable_indexes);
575 }
576}
18
Potential leak of memory pointed to by 'trainable_pos'
577
578static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, ccv_nnc_tensor_t* const* const fits, const int fit_size)
579{
580 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
581 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 581, __extension__ __PRETTY_FUNCTION__)
; }))
;
582 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 582, __extension__ __PRETTY_FUNCTION__)
; }))
;
583 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
584 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 584, __extension__ __PRETTY_FUNCTION__)
; }))
;
585 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
586 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
587 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
588 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
589 if (!compiled_data->rewindables)
590 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
591 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables);
592 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables);
593 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables);
594 int i, j;
595 const int output_size = model->output_size;
596 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 596, __extension__ __PRETTY_FUNCTION__)
; }))
;
597 ccv_nnc_tensor_symbol_t f[output_size];
598 if (compiled_data->loss.cmd == CCV_NNC_NOOP)
599 {
600 // If no loss function provided, there is no fits.
601 for (i = 0; i < output_size; i++)
602 {
603 compiled_data->fits[i] = NO_TENSOR_SYMBOL(ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL};
604 f[i] = model->outputs[i];
605 }
606 } else {
607 for (i = 0; i < output_size; i++)
608 {
609 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, fits[i]->info, 0);
610 f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
611 ccv_nnc_graph_exec_symbol_new(model->graph, compiled_data->loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}, (1 +
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 -1)
, TENSOR_SYMBOL_LIST(f[i])(ccv_nnc_tensor_symbol_t []){f[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
612 }
613 }
614 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
615 ccv_nnc_symbolic_graph_simplify(model->graph,
616 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
617 f, model->output_size,
618 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
619 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
620 const int trainable_size = compiled_data->trainables->rnum;
621 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * trainable_size + sizeof(ccv_nnc_tensor_symbol_t) * trainable_size + sizeof(ccv_nnc_graph_exec_symbol_t) * trainable_size);
622 compiled_data->updated_trainables = (ccv_nnc_tensor_symbol_t*)(compiled_data->saved_aux + saved_aux_size * trainable_size);
623 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_trainables + trainable_size);
624 const int trainable_size_maybe_more = gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ? trainable_size : trainable_size + model->input_size;
625 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * trainable_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * trainable_size_maybe_more * parallel_count);
626 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + trainable_size_maybe_more);
627 compiled_data->backward.to_size = trainable_size_maybe_more;
628 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES)
629 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, 0)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
0)))
, trainable_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_trainables, compiled_data->saved_aux, compiled_data->update_nodes);
630 else // Compute minimize with gradients including inputs.
631 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, 0)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
0)))
, trainable_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_trainables, compiled_data->saved_aux, compiled_data->update_nodes);
632 if (compiled_data->minimize.setter)
633 _ccv_cnnp_model_set_minimizer_setter(model, compiled_data->minimize.setter, compiled_data->minimize.context);
634 for (i = 0; i < output_size; i++)
635 {
636 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, f[i]);
637 // Init this to 1 so we can backprop.
638 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
639 }
640 for (i = 0; i < trainable_size_maybe_more; i++)
641 compiled_data->backward.tos[i] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
642 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
643 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, trainable_size);
644 if (parallel_count > 1)
645 {
646 ccv_nnc_symbolic_graph_data_parallel(model->graph, compiled_data->parallel_count,
647 0, 0,
648 compiled_data->gradients, trainable_size_maybe_more,
649 0, 0,
650 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
651 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
652 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
653 for (i = 0; i < evaluate_to_size; i++)
654 for (j = 1; j < parallel_count; j++)
655 {
656 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
657 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
658 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
659 }
660 for (i = 0; i < trainable_size_maybe_more; i++)
661 for (j = 1; j < parallel_count; j++)
662 {
663 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
664 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
665 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
666 }
667 }
668 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
669 compiled_data->gradient_mode = gradient_mode;
670}
671
672void ccv_cnnp_model_tensors_init(const ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data)
673{
674 assert(!compiled_data->tensors.trainables)((void) sizeof ((!compiled_data->tensors.trainables) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.trainables
) ; else __assert_fail ("!compiled_data->tensors.trainables"
, "ccv_cnnp_model.c", 674, __extension__ __PRETTY_FUNCTION__)
; }))
;
675 const int trainable_size = compiled_data->trainables->rnum;
676 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
677 const int retainable_size = compiled_data->retainables->rnum;
678 compiled_data->tensors.trainables = (ccv_nnc_tensor_t**)ccmallocmalloc((sizeof(ccv_nnc_tensor_t*) * trainable_size + sizeof(ccv_nnc_tensor_t*) * retainable_size) * parallel_count);
679 compiled_data->tensors.retainables = compiled_data->tensors.trainables + trainable_size * parallel_count;
680 int i, j;
681 for (i = 0; i < trainable_size; i++)
682 {
683 const ccv_nnc_tensor_symbol_t trainable = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, i)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
i)))
;
684 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(trainable.graph, trainable);
685 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
686 compiled_data->tensors.trainables[i] = ccv_nnc_tensor_new(0, info, 0);
687 for (j = 1; j < parallel_count; j++)
688 {
689 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
690 compiled_data->tensors.trainables[i + j * trainable_size] = ccv_nnc_tensor_new(0, info, 0);
691 }
692 }
693 for (i = 0; i < retainable_size; i++)
694 {
695 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->retainables, i)((void*)(((char*)((compiled_data->retainables)->data)) +
(size_t)(compiled_data->retainables)->rsize * (size_t)
(i)))
;
696 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
697 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
698 compiled_data->tensors.retainables[i] = ccv_nnc_tensor_new(0, info, 0);
699 for (j = 1; j < parallel_count; j++)
700 {
701 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
702 compiled_data->tensors.retainables[i + j * retainable_size] = ccv_nnc_tensor_new(0, info, 0);
703 }
704 }
705}
706
707static void _ccv_cnnp_model_copy_tensors(ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
708{
709 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 709, __extension__ __PRETTY_FUNCTION__)
; }))
;
710 int i, j;
711 for (i = 0; i < tensor_size; i++)
712 {
713 if (!tensors[i])
714 continue;
715 for (j = 1; j < parallel_count; j++)
716 if (tensors[i + j * tensor_size])
717 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &tensors[i], 1, &tensors[i + j * tensor_size], 1, 0);
718 }
719}
720
721static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
722{
723 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 723, __extension__ __PRETTY_FUNCTION__)
; }))
;
724 int i, j;
725 for (i = 0; i < tensor_size; i++)
726 {
727 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
728 for (j = 1; j < parallel_count; j++)
729 {
730 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
731 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
732 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
733 { // We shouldn't allocate this, free it up.
734 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
735 tensors[i + j * tensor_size] = 0;
736 }
737 }
738 }
739}
740
741static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
742{
743 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 743, __extension__ __PRETTY_FUNCTION__)
; }))
;
744 int i, j;
745 for (i = 0; i < tensor_size; i++)
746 {
747 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
748 ccv_nnc_tensor_t* const tensor = tensors[i];
749 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
750 {
751 const ccv_nnc_tensor_bind_t retained_bind = {
752 .symbol = tensor_symbol,
753 .tensor = tensor
754 };
755 ccv_array_push(tensor_binds, &retained_bind);
756 }
757 for (j = 1; j < parallel_count; j++)
758 {
759 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
760 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
761 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
762 {
763 const ccv_nnc_tensor_bind_t bind = {
764 .symbol = copy,
765 .tensor = tensors[i + j * tensor_size]
766 };
767 ccv_array_push(tensor_binds, &bind);
768 }
769 }
770 }
771}
772
773static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
774{
775 if (compiled_data->graph)
776 ccv_nnc_graph_free(compiled_data->graph);
777 if (compiled_data->tensor_arena)
778 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
779 if (compiled_data->graph_exec_arena)
780 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
781}
782
783static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
784{
785 if (compiled_data->gradients)
786 ccfreefree(compiled_data->gradients);
787 if (compiled_data->saved_aux)
788 ccfreefree(compiled_data->saved_aux);
789 if (compiled_data->evaluate.tos)
790 ccfreefree(compiled_data->evaluate.tos);
791 if (compiled_data->backward.from_ops)
792 ccfreefree(compiled_data->backward.from_ops);
793}
794
795static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
796{
797 if (compiled_data->backward.gradients)
798 ccfreefree(compiled_data->backward.gradients);
799 if (compiled_data->backward.accum)
800 ccv_nnc_graph_free(compiled_data->backward.accum);
801 if (compiled_data->backward.tensor_arena)
802 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
803 if (compiled_data->backward.graph_exec_arena)
804 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
805}
806
807static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
808{
809 if (compiled_data->apply_gradients.graph)
810 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
811 if (compiled_data->apply_gradients.tensor_arena)
812 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
813 if (compiled_data->apply_gradients.graph_exec_arena)
814 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
815}
816
817// Compile the graph to run ccv_cnnp_model_fit
818static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
819{
820 int i, j;
821 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
822 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 822, __extension__ __PRETTY_FUNCTION__)
; }))
;
823 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
824 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
825 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 825, __extension__ __PRETTY_FUNCTION__)
; }))
;
826 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 826
, __extension__ __PRETTY_FUNCTION__); }))
;
827 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 827, __extension__ __PRETTY_FUNCTION__)
; }))
;
828 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
829 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, fits, fit_size);
830 else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
831 _ccv_cnnp_model_rewind_graph(model);
832 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
833 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
834 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, fits, fit_size);
835 }
836 const int tensors_init = !!compiled_data->tensors.trainables;
837 if (!tensors_init)
838 ccv_cnnp_model_tensors_init(model->graph, compiled_data);
839 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
840 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 840, __extension__ __PRETTY_FUNCTION__); }))
;
841 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 841, __extension__ __PRETTY_FUNCTION__); }))
;
842 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 842
, __extension__ __PRETTY_FUNCTION__); }))
;
843 const int input_size_per_p = input_size / parallel_count;
844 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
845 const int output_size_per_p = output_size / parallel_count;
846 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
847 const int fit_size_per_p = fit_size / parallel_count;
848 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
849 const int trainable_size = compiled_data->trainables->rnum;
850 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, 0)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
0)))
, compiled_data->tensors.trainables, trainable_size, parallel_count, tensor_binds);
851 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_trainables, compiled_data->tensors.trainables, trainable_size, parallel_count, tensor_binds);
852 const int retainable_size = compiled_data->retainables->rnum;
853 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->retainables, 0)((void*)(((char*)((compiled_data->retainables)->data)) +
(size_t)(compiled_data->retainables)->rsize * (size_t)
(0)))
, compiled_data->tensors.retainables, retainable_size, parallel_count);
854 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->retainables, 0)((void*)(((char*)((compiled_data->retainables)->data)) +
(size_t)(compiled_data->retainables)->rsize * (size_t)
(0)))
, compiled_data->tensors.retainables, retainable_size, parallel_count, tensor_binds);
855 ccv_nnc_symbolic_graph_compile(model->graph, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
856 ccv_array_free(tensor_binds);
857 // If tensor is not init'ed, we need to init states first.
858 if (!tensors_init)
859 {
860 ccv_nnc_tensor_init_states_t tensor_init_states = {
861 .parallel_count = parallel_count,
862 .graph = model->graph,
863 .tensor_arena = compiled_data->tensor_arena
864 };
865 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
866 } else if (parallel_count > 1)
867 _ccv_cnnp_model_copy_tensors(compiled_data->tensors.trainables, compiled_data->trainables->rnum, parallel_count);
868 compiled_data->is_test = 0;
869 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
870 // No need to set because it is default to training mode.
871 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, compiled_data->graph_exec_arena);
872 for (i = 0; i < saved_aux_size * trainable_size; i++)
873 {
874 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
875 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0}}}), 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
876 for (j = 1; j < parallel_count; j++)
877 {
878 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
879 if (copy)
880 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0}}}), 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
881 }
882 }
883 const int evaluate_to_size = compiled_data->evaluate.to_size;
884 compiled_data->evaluate.to_op_size = 0;
885 for (i = 0; i < evaluate_to_size; i++)
886 {
887 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
888 if (to.graph)
889 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
890 }
891 ccv_nnc_graph_static_schedule(compiled_data->graph, compiled_data->stream_type);
892 ccv_nnc_graph_autotune(compiled_data->graph, compiled_data->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
893}
894
895ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
896{
897 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
898 if (!compiled_data || !compiled_data->graph)
899 return 0;
900 return ccv_nnc_graph_default_stream(compiled_data->graph);
901}
902
903uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
904{
905 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
906 if (!compiled_data || !compiled_data->tensor_arena)
907 return 0;
908 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
909}
910
911static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
912{
913 int i, j;
914 for (i = 0; i < tensor_size; i++)
915 {
916 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbols[i], tensors[i]);
917 for (j = 1; j < parallel_count; j++)
918 {
919 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbols[i], j);
920 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
921 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
922 }
923 }
924}
925
926void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
927{
928 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
929 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 929, __extension__ __PRETTY_FUNCTION__); }))
;
930 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
931 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 931, __extension__ __PRETTY_FUNCTION__)
; }))
;
932 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 932, __extension__ __PRETTY_FUNCTION__)
; }))
;
933 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 933
, __extension__ __PRETTY_FUNCTION__); }))
;
934 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 934, __extension__ __PRETTY_FUNCTION__); }))
;
935 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
936 {
937 _ccv_cnnp_compiled_data_graph_free(compiled_data);
938 _ccv_cnnp_compiled_data_backward_free(compiled_data);
939 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
940 // Compile the symbolic graph down only when needed.
941 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
942 } else {
943 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 943, __extension__ __PRETTY_FUNCTION__); }))
;
944 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 944, __extension__ __PRETTY_FUNCTION__); }))
;
945 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 945
, __extension__ __PRETTY_FUNCTION__); }))
;
946 const int input_size_per_p = input_size / parallel_count;
947 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
948 const int output_size_per_p = output_size / parallel_count;
949 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
950 const int fit_size_per_p = fit_size / parallel_count;
951 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
952 }
953 if (compiled_data->is_test)
954 {
955 compiled_data->is_test = 0;
956 ccv_nnc_graph_exec_update_t update = {
957 .parallel_count = parallel_count,
958 .graph = model->graph,
959 .graph_exec_arena = compiled_data->graph_exec_arena,
960 };
961 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
962 }
963 ccv_nnc_graph_run(compiled_data->graph, 0, stream_context, 0, TRAVERSE_FULL0,0,0,0);
964}
965
966// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
967static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
968{
969 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
970 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
971 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
972 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 972, __extension__ __PRETTY_FUNCTION__)
; }))
;
973 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 973, __extension__ __PRETTY_FUNCTION__)
; }))
;
974 const int tensors_init = !!compiled_data->tensors.trainables;
975 if (!tensors_init)
976 ccv_cnnp_model_tensors_init(model->graph, compiled_data);
977 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
978 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 978, __extension__ __PRETTY_FUNCTION__); }))
;
979 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 979, __extension__ __PRETTY_FUNCTION__); }))
;
980 const int input_size_per_p = input_size / parallel_count;
981 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
982 const int output_size_per_p = output_size / parallel_count;
983 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
984 const int trainable_size = compiled_data->trainables->rnum;
985 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, 0)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
0)))
, compiled_data->tensors.trainables, trainable_size, parallel_count, tensor_binds);
986 const int retainable_size = compiled_data->retainables->rnum;
987 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->retainables, 0)((void*)(((char*)((compiled_data->retainables)->data)) +
(size_t)(compiled_data->retainables)->rsize * (size_t)
(0)))
, compiled_data->tensors.retainables, retainable_size, parallel_count);
988 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->retainables, 0)((void*)(((char*)((compiled_data->retainables)->data)) +
(size_t)(compiled_data->retainables)->rsize * (size_t)
(0)))
, compiled_data->tensors.retainables, retainable_size, parallel_count, tensor_binds);
989 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
990 if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
991 ccv_nnc_symbolic_graph_compile(model->graph, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
992 else {
993 assert(compiled_data->parallel_count <= 1)((void) sizeof ((compiled_data->parallel_count <= 1) ? 1
: 0), __extension__ ({ if (compiled_data->parallel_count <=
1) ; else __assert_fail ("compiled_data->parallel_count <= 1"
, "ccv_cnnp_model.c", 993, __extension__ __PRETTY_FUNCTION__)
; }))
; // I don't know how to handle parallel_count larger than 1.
994 ccv_nnc_symbolic_graph_compile(model->graph, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
995 }
996 ccv_array_free(tensor_binds);
997 // If tensor is not init'ed, we need to init states first.
998 if (!tensors_init)
999 {
1000 ccv_nnc_tensor_init_states_t tensor_init_states = {
1001 .parallel_count = parallel_count,
1002 .graph = model->graph,
1003 .tensor_arena = compiled_data->tensor_arena
1004 };
1005 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1006 } else if (parallel_count > 1)
1007 _ccv_cnnp_model_copy_tensors(compiled_data->tensors.trainables, compiled_data->trainables->rnum, parallel_count);
1008 compiled_data->is_test = 1;
1009 ccv_nnc_graph_exec_update_t update = {
1010 .parallel_count = parallel_count,
1011 .graph = model->graph,
1012 .graph_exec_arena = compiled_data->graph_exec_arena,
1013 };
1014 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1015 ccv_nnc_graph_static_schedule(compiled_data->graph, compiled_data->stream_type);
1016 ccv_nnc_graph_autotune(compiled_data->graph, compiled_data->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1017}
1018
1019static void _ccv_cnnp_model_gradient_tensors_init(const ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data)
1020{
1021 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1021, __extension__ __PRETTY_FUNCTION__
); }))
;
1022 const int trainable_size = compiled_data->trainables->rnum;
1023 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1024 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * trainable_size * 2 * parallel_count);
1025 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + trainable_size * parallel_count;
1026 int i, j;
1027 for (i = 0; i < trainable_size; i++)
1028 {
1029 const ccv_nnc_tensor_symbol_t trainable = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, i)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
i)))
;
1030 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(trainable.graph, trainable);
1031 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1032 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1033 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1034 for (j = 1; j < parallel_count; j++)
1035 {
1036 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1037 compiled_data->tensors.gradients[i + j * trainable_size] = ccv_nnc_tensor_new(0, info, 0);
1038 compiled_data->tensors.accum_gradients[i + j * trainable_size] = 0;
1039 }
1040 }
1041}
1042
1043// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1044// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1045static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const int enable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1046{
1047 int i, j;
1048 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1049 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ? 1 : 0),
__extension__ ({ if (!compiled_data->graph || compiled_data
->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else
__assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__
); }))
;
1050 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1051 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1052 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1052, __extension__ __PRETTY_FUNCTION__
); }))
;
1053 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1053, __extension__ __PRETTY_FUNCTION__
); }))
;
1054 // There shouldn't be a loss function if we evaluate with multistage jit.
1055 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1055, __extension__ __PRETTY_FUNCTION__
); }))
;
1056 const int target_gradient_mode = enable_outgrad ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES;
1057 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1058 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1059 else if (compiled_data->gradient_mode != target_gradient_mode) {
1060 _ccv_cnnp_model_rewind_graph(model);
1061 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1062 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1063 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1064 }
1065 const int tensors_init = !!compiled_data->tensors.trainables;
1066 if (!tensors_init)
1067 ccv_cnnp_model_tensors_init(model->graph, compiled_data);
1068 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1069 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1069, __extension__ __PRETTY_FUNCTION__); }))
;
1070 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1070, __extension__ __PRETTY_FUNCTION__); }))
;
1071 const int input_size_per_p = input_size / parallel_count;
1072 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1073 const int output_size_per_p = output_size / parallel_count;
1074 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1075 const int trainable_size = compiled_data->trainables->rnum;
1076 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, 0)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
0)))
, compiled_data->tensors.trainables, trainable_size, parallel_count, tensor_binds);
1077 const int retainable_size = compiled_data->retainables->rnum;
1078 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->retainables, 0)((void*)(((char*)((compiled_data->retainables)->data)) +
(size_t)(compiled_data->retainables)->rsize * (size_t)
(0)))
, compiled_data->tensors.retainables, retainable_size, parallel_count);
1079 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->retainables, 0)((void*)(((char*)((compiled_data->retainables)->data)) +
(size_t)(compiled_data->retainables)->rsize * (size_t)
(0)))
, compiled_data->tensors.retainables, retainable_size, parallel_count, tensor_binds);
1080 if (!compiled_data->tensors.gradients)
1081 _ccv_cnnp_model_gradient_tensors_init(model->graph, compiled_data);
1082 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, trainable_size, parallel_count, tensor_binds);
1083 ccv_nnc_symbolic_graph_compile(model->graph, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1084 ccv_array_free(tensor_binds);
1085 // If tensor is not init'ed, we need to init states first.
1086 if (!tensors_init)
1087 {
1088 ccv_nnc_tensor_init_states_t tensor_init_states = {
1089 .parallel_count = parallel_count,
1090 .graph = model->graph,
1091 .tensor_arena = compiled_data->tensor_arena
1092 };
1093 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1094 } else if (parallel_count > 1)
1095 _ccv_cnnp_model_copy_tensors(compiled_data->tensors.trainables, compiled_data->trainables->rnum, parallel_count);
1096 compiled_data->is_test = is_test;
1097 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, compiled_data->graph_exec_arena);
1098 const int evaluate_to_size = compiled_data->evaluate.to_size;
1099 compiled_data->evaluate.to_op_size = 0;
1100 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1101 for (i = 0; i < evaluate_to_size; i++)
1102 {
1103 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1104 if (to_op.graph)
1105 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1106 const int* tos;
1107 int to_size;
1108 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1109 for (j = 0; j < to_size; j++)
1110 {
1111 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1112 .d = tos[j],
1113 .graph = model->graph
1114 });
1115 if (to_op.graph)
1116 ccv_array_add_unique_int(backward_from, to_op.d);
1117 }
1118 }
1119 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1119, __extension__
__PRETTY_FUNCTION__); }))
;
1120 compiled_data->backward.from_op_size = backward_from->rnum;
1121 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1122 for (i = 0; i < backward_from->rnum; i++)
1123 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1124 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1125 .graph = compiled_data->graph,
1126 };
1127 ccv_array_free(backward_from);
1128 ccv_nnc_graph_static_schedule(compiled_data->graph, compiled_data->stream_type);
1129 ccv_nnc_graph_autotune(compiled_data->graph, compiled_data->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1130}
1131
1132void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1133{
1134 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1135 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1135, __extension__ __PRETTY_FUNCTION__); }))
;
1136 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1137 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1137, __extension__ __PRETTY_FUNCTION__
); }))
;
1138 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1138, __extension__ __PRETTY_FUNCTION__
); }))
;
1139 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1139, __extension__ __PRETTY_FUNCTION__); }))
;
1140 // If we enable outgrad, we should also enable requires grad.
1141 if (params.enable_outgrad)
1142 { assert(params.requires_grad)((void) sizeof ((params.requires_grad) ? 1 : 0), __extension__
({ if (params.requires_grad) ; else __assert_fail ("params.requires_grad"
, "ccv_cnnp_model.c", 1142, __extension__ __PRETTY_FUNCTION__
); }))
; }
1143 if (!compiled_data->graph ||
1144 (params.requires_grad && compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ||
1145 // If a stream context is provided, we need to recompile because we cannot run them efficiently in FIT_MODE.
1146 (stream_context && !params.requires_grad && compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD))
1147 {
1148 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1149 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1150 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1151 if (params.requires_grad)
1152 _ccv_cnnp_model_multistage_jit_0(model, params.enable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1153 else
1154 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1155 } else {
1156 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1156, __extension__ __PRETTY_FUNCTION__); }))
;
1157 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1157, __extension__ __PRETTY_FUNCTION__); }))
;
1158 const int input_size_per_p = input_size / parallel_count;
1159 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1160 const int output_size_per_p = output_size / parallel_count;
1161 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1162 }
1163 if (compiled_data->is_test != params.is_test)
1164 {
1165 compiled_data->is_test = params.is_test;
1166 ccv_nnc_graph_exec_update_t update = {
1167 .parallel_count = parallel_count,
1168 .graph = model->graph,
1169 .graph_exec_arena = compiled_data->graph_exec_arena,
1170 };
1171 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1172 }
1173 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1174 ccv_nnc_graph_run(compiled_data->graph, 0, stream_context, 0, TRAVERSE_FULL0,0,0,0);
1175 else
1176 ccv_nnc_graph_run(compiled_data->graph, 0, stream_context, 0, 0, 0,
1177 compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1178}
1179
1180// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1181// Particularly, this method compiles the accumulator graph.
1182static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1183{
1184 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1185 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1185, __extension__ __PRETTY_FUNCTION__); }))
;
1186 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1186, __extension__ __PRETTY_FUNCTION__
); }))
;
1187 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1188 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1189 const int trainable_size = compiled_data->trainables->rnum;
1190 int i, j;
1191 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * trainable_size * parallel_count * 3);
1192 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + trainable_size * parallel_count;
1193 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + trainable_size * parallel_count;
1194 for (i = 0; i < trainable_size; i++)
1195 for (j = 0; j < parallel_count; j++)
1196 {
1197 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * trainable_size]->info;
1198 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1199 compiled_data->tensors.accum_gradients[i + j * trainable_size] = compiled_data->tensors.gradients[i + j * trainable_size];
1200 compiled_data->tensors.gradients[i + j * trainable_size] = ccv_nnc_tensor_new(0, info, 0);
1201 ccv_nnc_tensor_symbol_t inputs[2];
1202 inputs[0] = compiled_data->backward.accum_gradients[i + j * trainable_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1203 inputs[1] = compiled_data->backward.gradients[i + j * trainable_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1204 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * trainable_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1205 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1206 }
1207 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1208 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1209 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, trainable_size * parallel_count, 1, tensor_binds);
1210 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, trainable_size * parallel_count, 1, tensor_binds);
1211 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, trainable_size * parallel_count, 1, tensor_binds);
1212 ccv_nnc_symbolic_graph_compile(accum, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1213 ccv_nnc_symbolic_graph_free(accum);
1214 ccv_nnc_graph_static_schedule(compiled_data->backward.accum, compiled_data->stream_type);
1215 ccv_array_free(tensor_binds);
1216}
1217
1218void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_stream_context_t* const stream_context)
1219{
1220 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1221 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1221, __extension__ __PRETTY_FUNCTION__); }))
;
1222 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1222, __extension__ __PRETTY_FUNCTION__
); }))
;
1223 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1224 assert(ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (ingrad_size == model->output_size
* parallel_count) ; else __assert_fail ("ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1224, __extension__ __PRETTY_FUNCTION__
); }))
;
1225 if (outgrads)
1226 { assert(outgrad_size == model->input_size * parallel_count)((void) sizeof ((outgrad_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (outgrad_size == model->input_size
* parallel_count) ; else __assert_fail ("outgrad_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1226, __extension__ __PRETTY_FUNCTION__
); }))
; }
1227 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1227, __extension__ __PRETTY_FUNCTION__); }))
;
1228 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1228, __extension__ __PRETTY_FUNCTION__
); }))
;
1229 const int trainable_size = compiled_data->trainables->rnum;
1230 // If we need to accumulate the gradients now, do jit on accumulator.
1231 if (compiled_data->backward.count > 0)
1232 {
1233 if (!compiled_data->backward.accum)
1234 _ccv_cnnp_model_multistage_jit_1(model);
1235 else {
1236 // Otherwise, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1237 int i;
1238 for (i = 0; i < trainable_size * parallel_count; i++)
1239 {
1240 ccv_nnc_tensor_t* tensor;
1241 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1242 }
1243 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1244 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, trainable_size * parallel_count, 1);
1245 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, trainable_size * parallel_count, 1);
1246 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, trainable_size * parallel_count, 1);
1247 }
1248 }
1249 const int ingrad_size_per_p = ingrad_size / parallel_count;
1250 const int outgrad_size_per_p = outgrad_size / parallel_count;
1251 int i, j;
1252 for (i = 0; i < ingrad_size_per_p; i++)
1253 {
1254 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, model->outputs[i]);
1255 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
1256 for (j = 1; j < parallel_count; j++)
1257 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
1258 }
1259 if (outgrads)
1260 {
1261 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS) ; else
__assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1261, __extension__ __PRETTY_FUNCTION__
); }))
;
1262 for (i = 0; i < outgrad_size_per_p; i++)
1263 {
1264 const ccv_nnc_tensor_symbol_t outgrad = ccv_nnc_tensor_symbol_for_backward(model->graph, model->inputs[i]);
1265 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
1266 for (j = 1; j < parallel_count; j++)
1267 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
1268 }
1269 } else {
1270 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES"
, "ccv_cnnp_model.c", 1270, __extension__ __PRETTY_FUNCTION__
); }))
;
1271 }
1272 // Bind to the gradients (if we start to accumulate at 2 (i.e. accum_gradients and gradients binding no longer changes), no need to do the binding.
1273 if (compiled_data->backward.count <= 1)
1274 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, trainable_size, parallel_count);
1275 // Run the backward pass.
1276 ccv_nnc_graph_run(compiled_data->graph, 0, stream_context, 0, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
1277 // If we need to run accumulation round, do that now.
1278 if (compiled_data->backward.count > 0)
1279 ccv_nnc_graph_run(compiled_data->backward.accum, 0, stream_context, 0, TRAVERSE_FULL0,0,0,0);
1280 // Update the count, this determines whether we need to accumulate or not.
1281 ++compiled_data->backward.count;
1282}
1283
1284// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
1285// Particularly, this method compiles the trainable update graph.
1286static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
1287{
1288 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1289 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1289, __extension__ __PRETTY_FUNCTION__
); }))
;
1290 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1291 const int trainable_size = compiled_data->trainables->rnum;
1292 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1293 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->trainables, 0)((void*)(((char*)((compiled_data->trainables)->data)) +
(size_t)(compiled_data->trainables)->rsize * (size_t)(
0)))
, compiled_data->tensors.trainables, trainable_size, parallel_count, tensor_binds);
1294 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_trainables, compiled_data->tensors.trainables, trainable_size, parallel_count, tensor_binds);
1295 // Bind accumulated gradients.
1296 if (compiled_data->backward.count > 1)
1297 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, trainable_size, parallel_count, tensor_binds);
1298 else
1299 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, trainable_size, parallel_count, tensor_binds);
1300 // TODO: Need to find the start point for this.
1301 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
1302 int i, j;
1303 for (i = 0; i < compiled_data->backward.to_size; i++)
1304 {
1305 const int* tos;
1306 int to_size;
1307 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
1308 for (j = 0; j < to_size; j++)
1309 {
1310 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
1311 // gradients graph.
1312 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1313 .d = tos[j],
1314 .graph = model->graph,
1315 });
1316 if (!exec.graph)
1317 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
1318 }
1319 }
1320 const int from_size = apply_gradients_from->rnum;
1321 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
1322 for (i = 0; i < from_size; i++)
1323 froms[i] = (ccv_nnc_graph_exec_symbol_t){
1324 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
1325 .graph = model->graph
1326 };
1327 ccv_array_free(apply_gradients_from);
1328 ccv_nnc_symbolic_graph_compile(model->graph, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
1329 ccv_array_free(tensor_binds);
1330 ccfreefree(froms);
1331 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1332 for (i = 0; i < saved_aux_size * trainable_size; i++)
1333 {
1334 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
1335 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0}}}), 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1336 for (j = 1; j < parallel_count; j++)
1337 {
1338 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1339 if (copy)
1340 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0}}}), 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1341 }
1342 }
1343 ccv_nnc_graph_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type);
1344}
1345
1346void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
1347{
1348 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1349 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1349, __extension__ __PRETTY_FUNCTION__); }))
;
1350 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1350, __extension__ __PRETTY_FUNCTION__
); }))
;
1351 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1352 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1352, __extension__ __PRETTY_FUNCTION__); }))
;
1353 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1353, __extension__ __PRETTY_FUNCTION__
); }))
;
1354 assert(compiled_data->backward.count > 0)((void) sizeof ((compiled_data->backward.count > 0) ? 1
: 0), __extension__ ({ if (compiled_data->backward.count >
0) ; else __assert_fail ("compiled_data->backward.count > 0"
, "ccv_cnnp_model.c", 1354, __extension__ __PRETTY_FUNCTION__
); }))
;
1355 if (!compiled_data->apply_gradients.graph)
1356 _ccv_cnnp_model_multistage_jit_2(model);
1357 else {
1358 const int trainable_size = compiled_data->trainables->rnum;
1359 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
1360 if (compiled_data->backward.count > 1)
1361 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, trainable_size, parallel_count);
1362 else
1363 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, trainable_size, parallel_count);
1364 }
1365 ccv_nnc_graph_run(compiled_data->apply_gradients.graph, 0, stream_context, 0, TRAVERSE_FULL0,0,0,0);
1366 // Reset backward count to 0.
1367 compiled_data->backward.count = 0;
1368}
1369
1370void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const ccv_cnnp_model_minimizer_set_f minimizer_setter, const void* const context)
1371{
1372 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1373 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1373, __extension__ __PRETTY_FUNCTION__); }))
;
1
Assuming 'compiled_data' is non-null
2
Taking true branch
1374 compiled_data->minimize.minimizer = minimizer;
1375 compiled_data->minimize.setter = minimizer_setter;
1376 compiled_data->minimize.context = context;
1377 if (!compiled_data->update_nodes)
3
Assuming the condition is false
4
Taking false branch
1378 return;
1379 int i;
1380 const int trainable_size = compiled_data->trainables->rnum;
1381 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
1382 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
1383 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 1383, __extension__ __PRETTY_FUNCTION__); }))
;
5
Assuming 'symbolic_graph' is non-null
6
Taking true branch
1384 for (i = 0; i < trainable_size; i++)
7
Assuming 'i' is >= 'trainable_size'
8
Loop condition is false. Execution continues on line 1387
1385 _ccv_cnnp_model_graph_exec_symbol_set(model, update_nodes[i], minimizer);
1386 // Use the minimizer to update.
1387 if (!minimizer_setter)
9
Assuming 'minimizer_setter' is non-null
10
Taking false branch
1388 return;
1389 _ccv_cnnp_model_set_minimizer_setter(model, minimizer_setter, context);
11
Calling '_ccv_cnnp_model_set_minimizer_setter'
1390}
1391
1392void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
1393{
1394 if (model->graph && out_size > 0)
1395 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
1396 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
1397 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
1398 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
1399 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
1400 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
1401 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
1402}
1403
1404static const ccv_cnnp_model_vtab_t ccv_cnnp_input_isa = {};
1405
1406ccv_cnnp_model_io_t ccv_cnnp_input(void)
1407{
1408 ccv_cnnp_model_t* const input = (ccv_cnnp_model_t*)cccalloccalloc(1, sizeof(ccv_cnnp_model_t) + sizeof(ccv_nnc_tensor_symbol_t));
1409 input->isa = &ccv_cnnp_input_isa;
1410 input->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
1411 ccv_cnnp_model_io_t input_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t));
1412 input_io->visit = 0;
1413 input_io->incomings = 0;
1414 input_io->outgoings = 0;
1415 input_io->model = input;
1416 input_io->outputs = (ccv_nnc_tensor_symbol_t*)(input_io + 1);
1417 ccv_array_push(input->io, &input_io);
1418 input->outputs = (ccv_nnc_tensor_symbol_t*)(input + 1);
1419 input->output_size = 1;
1420 return input_io;
1421}
1422
1423static void _ccv_cnnp_compiled_data_free(ccv_cnnp_compiled_data_t* const compiled_data)
1424{
1425 int i;
1426 const int trainable_size = compiled_data->trainables->rnum;
1427 ccv_array_free(compiled_data->trainables);
1428 const int retainable_size = compiled_data->retainables->rnum;
1429 ccv_array_free(compiled_data->retainables);
1430 const int parallel_count = ccv_max(compiled_data->parallel_count, 1)({ typeof (compiled_data->parallel_count) _a = (compiled_data
->parallel_count); typeof (1) _b = (1); (_a > _b) ? _a :
_b; })
;
1431 if (compiled_data->tensors.trainables)
1432 {
1433 for (i = 0; i < trainable_size * parallel_count; i++)
1434 ccv_nnc_tensor_free(compiled_data->tensors.trainables[i]);
1435 for (i = 0; i < retainable_size * parallel_count; i++)
1436 if (compiled_data->tensors.retainables[i])
1437 ccv_nnc_tensor_free(compiled_data->tensors.retainables[i]);
1438 ccfreefree(compiled_data->tensors.trainables);
1439 }
1440 if (compiled_data->tensors.gradients)
1441 {
1442 for (i = 0; i < trainable_size * parallel_count; i++)
1443 {
1444 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
1445 if (compiled_data->tensors.accum_gradients[i])
1446 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
1447 }
1448 ccfreefree(compiled_data->tensors.gradients);
1449 }
1450 if (compiled_data->rewindables)
1451 ccv_array_free(compiled_data->rewindables);
1452 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1453 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1454 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1455 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1456 ccfreefree(compiled_data);
1457}
1458
1459void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
1460{
1461 if (model->isa->deinit)
1462 model->isa->deinit(model);
1463 if (model->io)
1464 {
1465 int i;
1466 for (i = 0; i < model->io->rnum; i++)
1467 {
1468 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
1469 if (model_io->outgoings)
1470 ccv_array_free(model_io->outgoings);
1471 if (model_io->incomings)
1472 ccv_array_free(model_io->incomings);
1473 ccfreefree(model_io);
1474 }
1475 ccv_array_free(model->io);
1476 }
1477 if (model->inputs)
1478 ccfreefree(model->inputs);
1479 if (model->graph)
1480 ccv_nnc_symbolic_graph_free(model->graph);
1481 if (model->compiled_data)
1482 _ccv_cnnp_compiled_data_free(model->compiled_data);
1483 ccfreefree(model);
1484}