Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2488, column 1
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2025-04-14-192742-479958-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7
8// MARK - Level-5 API
9
10ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
11{
12 if (!model->io)
13 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
14 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
15 model_io->param_ref = 0;
16 model_io->param_sel = 0;
17 model_io->visit = 0;
18 model_io->model = model;
19 model_io->dependencies = 0;
20 model_io->dependents = 0;
21 model_io->outgoings = 0;
22 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
23 ccv_array_push(model->io, &model_io);
24 if (input_size > 0)
25 {
26 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
27 ccv_array_resize(model_io->incomings, input_size);
28 int i;
29 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
30 for (i = 0; i < input_size; i++)
31 {
32 if (!inputs[i]->outgoings)
33 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
34 ccv_array_push(inputs[i]->outgoings, &model_io);
35 }
36 } else {
37 model_io->incomings = 0;
38 }
39 return model_io;
40}
41
42void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
43{
44 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 44, __extension__ __PRETTY_FUNCTION__);
}))
;
45 if (!model_io->dependencies)
46 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
47 int i, j;
48 for (i = 0; i < dependency_size; i++)
49 {
50 int flag = 0;
51 // Check if it is already exist or not.
52 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
53 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
54 flag = 1;
55 if (flag)
56 continue;
57 ccv_array_push(model_io->dependencies, dependencies + i);
58 ++dependencies[i]->dependents;
59 }
60}
61
62int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
63{
64 return model->output_size;
65}
66
67int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
68{
69 // If the model is compiled, it is default to 1 unless it is not.
70 if (model->compiled_data)
71 return model->is_trainable >= 0 ? model->is_trainable : 1;
72 return model->is_trainable;
73}
74
75ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
76{
77 if (!model->io)
78 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
79 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
80 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
81 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
82 model_io->visit = 0;
83 model_io->model = model;
84 model_io->outputs = 0;
85 model_io->dependencies = 0;
86 model_io->dependents = 0;
87 model_io->incomings = 0;
88 model_io->outgoings = 0;
89 ccv_array_push(model->io, &model_io);
90 return model_io;
91}
92
93void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
94{
95 model->notify_hook.func = func;
96 model->notify_hook.context = context;
97}
98
99void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
100{
101 if (model->notify_hook.func)
102 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
103 if (model->isa->notify)
104 model->isa->notify(model, tag, payload);
105}
106
107static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
108{
109 int i, j;
110 for (i = 0; i < graph_exec_symbol_size; i++)
111 {
112 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
113 // Check whether this tensor symbol has any duplicate.
114 for (j = i + 1; j < graph_exec_symbol_size;)
115 {
116 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
117 // If there is a same tensor symbol, remove it.
118 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
119 {
120 if (j + 1 < graph_exec_symbol_size)
121 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
122 --graph_exec_symbol_size;
123 continue;
124 }
125 ++j;
126 }
127 }
128 return graph_exec_symbol_size;
129}
130
131void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
132{
133 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
134 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
135 int i;
136 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
137 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
138 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
139 {
140 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
141 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
142 {
143 // Only add to parameter_indices if it is trainable.
144 if (add_to_array_context->add_parameter_indices)
145 ccv_array_add_unique_int(model->parameter_indices, i);
146 // Found it, return, don't add it.
147 return;
148 }
149 }
150 // Only add to parameter_indices if it is trainable.
151 if (add_to_array_context->add_parameter_indices)
152 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
153 // This is a new one, no need to add_unique_int, it is unique.
154 ccv_array_push(add_to_array_context->symbols, &symbol);
155 if (add_to_array_context->trainables)
156 ccv_array_push(add_to_array_context->trainables, &is_trainable);
157 char id[2048];
158 id[0] = add_to_array_context->prefix;
159 id[1] = '-';
160 int total_len = 2;
161 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
162 {
163 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
164 int len;
165 if (name->name && name->name[0] != '\0')
166 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
167 else
168 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
169 total_len += len;
170 if (total_len >= 2047)
171 break;
172 }
173 if (total_len < 2047)
174 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
175 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 175, __extension__ __PRETTY_FUNCTION__)
; }))
;
176 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
177 memcpy(heap_id, id, total_len + 1);
178 ccv_array_push(add_to_array_context->ids, &heap_id);
179 ++add_to_array_context->sequence->it;
180}
181
182static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
183{
184 compiled_data->f = compiled_data->fits + output_size;
185 compiled_data->xpu_alloc.mp_hdr = -1;
186 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
187 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
188 compiled_data->gradient_checkpoints = gradient_checkpoints;
189}
190
191typedef struct {
192 void* old_graph_exec_symbol_new_hook_context;
193 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
194 ccv_nnc_symbolic_graph_t* graph;
195 ccv_cnnp_model_build_data_t* build_data;
196} ccv_cnnp_model_set_exec_flags_context_t;
197
198static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
199{
200 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
201 if (flags_context->build_data->exec_flags)
202 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
203 if (flags_context->old_graph_exec_symbol_new_hook)
204 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
205}
206
207static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
208{
209 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 209, __extension__ __PRETTY_FUNCTION__); }))
;
210 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
211 int i;
212 for (i = 0; i < input_size; i++)
213 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
214 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
215 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
216 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
217 ccv_cnnp_model_sequence_t model_sequence = {
218 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
219 };
220 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
221 .add_parameter_indices = 1,
222 .prefix = 't',
223 .sequence = &model_sequence,
224 .symbols = parameters,
225 .ids = parameter_ids,
226 .trainables = parameter_trainables,
227 };
228 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
229 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
230 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
231 .add_parameter_indices = 0,
232 .prefix = 'r',
233 .sequence = &model_sequence,
234 .symbols = internals,
235 .ids = internal_ids,
236 .trainables = 0,
237 };
238 ccv_cnnp_model_build_data_t build_data = {
239 .exec_flags = 0,
240 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
241 .model_sequence = &model_sequence,
242 .add_to_array = ccv_cnnp_model_add_to_array,
243 .parameters = parameters,
244 .context = {
245 .add_to_parameter = &add_to_parameter_context,
246 .add_to_output = &add_to_output_context,
247 },
248 .gradient_checkpoints = 0,
249 };
250 model->data = &build_data;
251 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
252 .graph = model->graph,
253 .build_data = &build_data,
254 .old_graph_exec_symbol_new_hook = 0,
255 .old_graph_exec_symbol_new_hook_context = 0
256 };
257 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
258 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
259 // Reset back to previous hook.
260 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
261 for (i = 0; i < model->output_size; i++)
262 {
263 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
264 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
265 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
266 continue;
267 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
268 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
269 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
270 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
271 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
272 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
273 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
274 }
275 model->data = 0;
276 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
277 if (model_sequence.sequences)
278 ccv_array_free(model_sequence.sequences);
279 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
280 int not_trainables = 0;
281 // Assert no parameter is alias.
282 for (i = 0; i < parameters->rnum; i++)
283 {
284 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
285 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
286 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 286, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
287 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
288 not_trainables = 1;
289 }
290 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 290, __extension__ __PRETTY_FUNCTION__)
; }))
;
291 uint64_t* parameter_flags = 0;
292 if (not_trainables)
293 {
294 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
295 for (i = 0; i < parameter_trainables->rnum; i++)
296 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
297 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
298 }
299 ccv_array_free(parameter_trainables);
300 // Assert no internal is alias.
301 for (i = 0; i < internals->rnum; i++)
302 {
303 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
304 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
305 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 305, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
306 }
307 const int output_size = model->output_size;
308 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
309 const int parameters_rnum = parameters->rnum;
310 if (input_size > 0)
311 {
312 ccv_array_resize(parameters, parameters_rnum + input_size);
313 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
314 }
315 ccv_nnc_symbolic_graph_simplify(model->graph,
316 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
317 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
318 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
319 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
320 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
321 model->outputs, output_size,
322 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
323 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
324 // Size it down.
325 parameters->rnum = parameters_rnum;
326 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
327 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
328 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
329 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 329, __extension__ __PRETTY_FUNCTION__)
; }))
;
330 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
331 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
332 compiled_data->loss = loss;
333 if (loss.cmd == CCV_NNC_NOOP)
334 {
335 // If no loss function provided, there is no fits.
336 for (i = 0; i < output_size; i++)
337 {
338 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
339 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
340 if (alias_to.d < 0)
341 compiled_data->f[i] = model->outputs[i];
342 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
343 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
344 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
345 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
346 int j;
347 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
348 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 348, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
349 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
350 }
351 }
352 } else {
353 for (i = 0; i < output_size; i++)
354 {
355 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
356 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
357 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
358 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
359 }
360 }
361 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
362 ccv_nnc_symbolic_graph_simplify(model->graph,
363 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
364 0, 0, // No need to provide binds at this point.
365 compiled_data->f, model->output_size,
366 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
367 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
368 // If inputs are from GPU, stream type is GPU.
369 compiled_data->parameters = parameters;
370 compiled_data->parameter_flags = parameter_flags;
371 compiled_data->internals = internals;
372 compiled_data->ids.parameters = parameter_ids;
373 compiled_data->ids.internals = internal_ids;
374 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
375}
376
377static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
378{
379 ccv_array_t* const stack = (ccv_array_t*)context;
380 ccv_array_push(stack, &symbol.d);
381}
382
383static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
384{
385 const ccv_nnc_tensor_symbol_t src_symbol = {
386 .d = src_index,
387 .graph = src_graph
388 };
389 const ccv_nnc_tensor_symbol_t dest_symbol = {
390 .d = dest_index,
391 .graph = dest_graph
392 };
393 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
394 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
395 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
396 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
397 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
398 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
399}
400
401static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
402{
403 const ccv_nnc_tensor_symbol_t src_symbol = {
404 .d = src_index,
405 .graph = src_graph
406 };
407 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
408 const ccv_nnc_tensor_symbol_t dest_symbol = {
409 .d = dest_index,
410 .graph = dest_graph
411 };
412 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
413 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
414}
415
416static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
417static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
418
419typedef struct {
420 int parallel_count;
421 ccv_nnc_symbolic_graph_t* graph;
422 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
423} ccv_nnc_graph_exec_update_t;
424
425static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
426{
427 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
428 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
429 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
430 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
431 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
432 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
433 const int parallel_count = graph_exec_update->parallel_count;
434 int i;
435 for (i = 1; i < parallel_count; i++)
436 {
437 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
438 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
439 {
440 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
441 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
442 }
443 }
444}
445
446void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
447{
448 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 448, __extension__ __PRETTY_FUNCTION__); }))
;
449 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 449, __extension__ __PRETTY_FUNCTION__)
; }))
;
450 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 450, __extension__ __PRETTY_FUNCTION__); }))
;
451 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
452 init->graph = ccv_nnc_symbolic_graph_new();
453 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
454 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
455 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
456 init->parallel_count = model->parallel_count;
457 init->memory_compression = model->memory_compression;
458 init->memory_reduction = model->memory_reduction;
459 init->gradient_checkpointing = model->gradient_checkpointing;
460 init->compiled_data->stream_type = model->compiled_data->stream_type;
461 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
462 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
463 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
464 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
465 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
466 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
467 int i, j;
468 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
469 for (i = 0; i < compiled_data->parameters->rnum; i++)
470 {
471 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
472 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 472, __extension__ __PRETTY_FUNCTION__)
; }))
;
473 }
474 for (i = 0; i < compiled_data->internals->rnum; i++)
475 {
476 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
477 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 477, __extension__ __PRETTY_FUNCTION__)
; }))
;
478 }
479 // Update inputs.
480 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 480, __extension__ __PRETTY_FUNCTION__)
; }))
;
481 for (i = 0; i < model->input_size; i++)
482 if (model->inputs[i].d >= 0)
483 {
484 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 484, __extension__ __PRETTY_FUNCTION__)
; }))
;
485 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
486 }
487 // Update outputs.
488 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 488, __extension__ __PRETTY_FUNCTION__)
; }))
;
489 for (i = 0; i < model->output_size; i++)
490 {
491 if (model->outputs[i].d >= 0)
492 {
493 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 493, __extension__
__PRETTY_FUNCTION__); }))
;
494 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
495 }
496 if (model->outputs[i].d != model->compiled_data->f[i].d)
497 {
498 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 498, __extension__ __PRETTY_FUNCTION__)
; }))
;
499 if (model->compiled_data->f[i].d >= 0)
500 {
501 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__)
; }))
;
502 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
503 }
504 }
505 }
506 // Go through the graph to set tensor on matching symbols
507 for (i = 0; i < stack->rnum; i++)
508 {
509 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
510 // If exceed range, skip.
511 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
512 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
513 continue;
514 const ccv_nnc_graph_exec_symbol_t src_symbol = {
515 .d = d,
516 .graph = init->graph
517 };
518 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
519 .d = d,
520 .graph = model->graph
521 };
522 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
523 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
524 // If the name doesn't match, skip.
525 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
526 continue;
527 // Now get all the inputs and outputs, if matches, set them.
528 const int* src_inputs;
529 int src_input_size;
530 const int* src_outputs;
531 int src_output_size;
532 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
533 const int* dest_inputs;
534 int dest_input_size;
535 const int* dest_outputs;
536 int dest_output_size;
537 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
538 // We may have unmatched input / output size because this is the minimizer and it has
539 // different saved_aux (for example, when we shrunk with CMD_NOOP).
540 if (src_input_size != dest_input_size)
541 continue;
542 if (src_output_size != dest_output_size)
543 continue;
544 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
545 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
546 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
547 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
548 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
549 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
550 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
551 // a new exec symbol.
552 for (j = 0; j < src_input_size; j++)
553 if (src_inputs[j] >= 0)
554 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
555 for (j = 0; j < src_output_size; j++)
556 if (src_outputs[j] >= 0)
557 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
558 }
559 ccv_array_free(stack);
560 // After this, we get all tensors in the model graph resolved through tensor_auto.
561 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
562 // Verify symbols we get matches.
563 const int parameter_size = compiled_data->parameters->rnum;
564 for (i = 0; i < parameter_size; i++)
565 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 565, __extension__ __PRETTY_FUNCTION__)
; }))
; }
566 const int internal_size = compiled_data->internals->rnum;
567 for (i = 0; i < internal_size; i++)
568 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 568, __extension__ __PRETTY_FUNCTION__)
; }))
; }
569 // Go through compiled data.
570 if (compiled_data->tensor_arena)
571 {
572 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
573 if (flag == 0 && compiled_data->graph_exec_arena)
574 {
575 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
576 // Since we will reinit, if we previously set is_test, we need to set it again.
577 if (compiled_data->is_test)
578 {
579 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
580 ccv_nnc_graph_exec_update_t update = {
581 .parallel_count = parallel_count,
582 .graph = model->graph,
583 .graph_exec_arena = compiled_data->graph_exec_arena,
584 };
585 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
586 }
587 } else
588 // Free-up tensor arena & graph exec arena.
589 _ccv_cnnp_compiled_data_graph_free(compiled_data);
590 }
591 // There are other compiled graphs, for accum and apply gradients.
592 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
593 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
594 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
595 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
596 // That is why we don't update these compiled graphs at all this point.
597 // Free the model, we've already "absorbed" it.
598 ccv_cnnp_model_free(init);
599}
600
601void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
602{
603 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 603, __extension__ __PRETTY_FUNCTION__)
; }))
;
604 if (model->input_size == 0)
605 model->input_size = input_size;
606 if (!model->graph) // The graph is not compiled yet.
607 {
608 model->graph = ccv_nnc_symbolic_graph_new();
609 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
610 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 610, __extension__ __PRETTY_FUNCTION__)
; }))
;
611 int i, flag = 0;
612 for (i = 0; !flag && i < input_size; i++)
613 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
614 // If inputs are from GPU, stream type is GPU.
615 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
616 model->compiled_data->minimize.minimizer = minimizer;
617 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
618 } else {
619 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
620 // And then absorb the "new model" to the old one.
621 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
622 ccv_cnnp_model_absorb(model, init, inputs, input_size);
623 // Reset minimizer.
624 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
625 }
626}
627
628ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
629{
630 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
631 new_model->is_trainable = is_trainable;
632 return new_model;
633}
634
635void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
636{
637 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 637, __extension__ __PRETTY_FUNCTION__); }))
;
638 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 638, __extension__ __PRETTY_FUNCTION__)
; }))
;
639 ccv_nnc_symbolic_graph_t* const graph = model->graph;
640 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
641 int i;
642 for (i = 0; i < output_size; i++)
643 {
644 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 644, __extension__ __PRETTY_FUNCTION__)
; }))
;
645 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
646 }
647}
648
649void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
650{
651 if (workspace_size == model->workspace_size)
652 return;
653 model->workspace_size = workspace_size;
654 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
655 if (compiled_data && compiled_data->graph)
656 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
657}
658
659size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
660{
661 return model->workspace_size;
662}
663
664void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
665{
666 if (parallel == 0)
667 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
668 else
669 model->parallel_count = parallel;
670 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
671 if (compiled_data)
672 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 672, __extension__ __PRETTY_FUNCTION__)
; }))
; }
673}
674
675void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
676{
677 model->max_stream_count = max_stream_count;
678 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
679 if (compiled_data)
680 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 680, __extension__ __PRETTY_FUNCTION__)
; }))
; }
681}
682
683void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
684{
685 model->memory_compression = memory_compression;
686 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
687 if (compiled_data)
688 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 688, __extension__ __PRETTY_FUNCTION__)
; }))
; }
689}
690
691void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
692{
693 model->memory_reduction = memory_reduction;
694 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
695 if (compiled_data)
696 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 696, __extension__ __PRETTY_FUNCTION__)
; }))
; }
697}
698
699void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
700{
701 model->gradient_checkpointing = gradient_checkpointing;
702}
703
704int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
705{
706 return model->gradient_checkpointing;
707}
708
709typedef struct {
710 int parallel_count;
711 ccv_nnc_symbolic_graph_t* graph;
712 ccv_cnnp_compiled_data_t* compiled_data;
713 ccv_nnc_tensor_arena_t* tensor_arena;
714} ccv_nnc_tensor_init_states_t;
715
716static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
717{
718 int i;
719 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
720 for (i = 0; i < compiled_data->parameters->rnum; i++)
721 {
722 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
723 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
724 return 1;
725 }
726 for (i = 0; i < compiled_data->internals->rnum; i++)
727 {
728 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
729 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
730 return 1;
731 }
732 return 0;
733}
734
735static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
736{
737 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
738 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
739 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
740 if (!output_tensor)
741 return;
742 const int d = output_symbol.d;
743 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 743, __extension__ __PRETTY_FUNCTION__)
; }))
;
744 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
745 if (init_v[d >> 5] & (1u << (d & 0x1f)))
746 return;
747 init_v[d >> 5] |= (1u << (d & 0x1f));
748 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
749 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
750 const int parallel_count = tensor_init_states->parallel_count;
751 int i;
752 for (i = 1; i < parallel_count; i++)
753 {
754 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
755 if (copy)
756 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
757 }
758}
759
760// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
761// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
762static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
763{
764 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 764, __extension__ __PRETTY_FUNCTION__); }))
;
765 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 765, __extension__ __PRETTY_FUNCTION__)
; }))
;
766 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
767 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 767, __extension__
__PRETTY_FUNCTION__); }))
;
768 int i;
769 for (i = 0; i < compiled_data->rewindables->rnum; i++)
770 {
771 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
772 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
773 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
774 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
775 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
776 }
777 ccv_array_clear(compiled_data->rewindables);
778 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
779}
780
781static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
782{
783 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
784 .type = CCV_CNNP_REWIND_TENSOR,
785 .tensor = symbol
786 };
787 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
788 ccv_array_push(rewind_symbols, &rewind_symbol);
789}
790
791static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
792{
793 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
794 .type = CCV_CNNP_REWIND_TENSOR,
795 .tensor = symbol
796 };
797 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
798 ccv_array_push(rewind_symbols, &rewind_symbol);
799}
800
801static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
802{
803 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
804 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
805 .graph_exec = symbol
806 };
807 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
808 ccv_array_push(rewind_symbols, &rewind_symbol);
809}
810
811static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
812{
813 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
814 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
815 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
816 int i;
817 for (i = 1; i < parallel_count; i++)
818 {
819 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
820 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
821 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
822 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
823 }
824}
825
826static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
827{
828 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 828, __extension__ __PRETTY_FUNCTION__); }))
;
829 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 829, __extension__ __PRETTY_FUNCTION__); }))
;
830 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
831 int i;
832 for (i = 1; i < parallel_count; i++)
833 {
834 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
835 if (copy_symbol.graph)
836 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
837 }
838 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
839 if (graph_exec_arena)
840 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
841 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
842 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
843 if (gradient_graph_exec_arena)
844 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
845}
846
847static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
848{
849 int this_parameter_flag = 0;
850 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
851 return this_parameter_flag;
852 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
853 int j, k;
854 // For no-op, we can preserve previous saved_aux_size.
855 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
856 {
857 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
858 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
859 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
860 // make sure some model parameters don't update if we don't want them to.
861 int old_saved_aux_size;
862 if (old_minimizer.cmd == CCV_NNC_NOOP)
863 {
864 int input_size;
865 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
866 if (input_size < 2) // This is not legit.
867 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
868 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
869 old_saved_aux_size = input_size - 2;
870 } else
871 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
872 if (old_saved_aux_size != saved_aux_size)
873 {
874 this_parameter_flag = 1;
875 if (saved_aux_size > old_saved_aux_size)
876 {
877 // Allocate new tensor symbols.
878 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
879 for (j = old_saved_aux_size; j < saved_aux_size; j++)
880 {
881 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
882 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
883 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
884 for (k = 1; k < parallel_count; k++)
885 {
886 ccv_nnc_tensor_param_t dev_info = info;
887 if (k != device_id)
888 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
889 else
890 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
891 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
892 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
893 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
894 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
895 }
896 }
897 } else {
898 for (j = saved_aux_size; j < old_saved_aux_size; j++)
899 {
900 for (k = 1; k < parallel_count; k++)
901 {
902 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
903 if (src_copy.d >= 0)
904 {
905 ccv_nnc_tensor_symbol_free(graph, src_copy);
906 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
907 }
908 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
909 if (dest_copy.d >= 0)
910 {
911 ccv_nnc_tensor_symbol_free(graph, dest_copy);
912 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
913 }
914 }
915 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
916 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
917 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
918 }
919 }
920 }
921 }
922 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
923 if (this_parameter_flag)
924 {
925 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
926 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
927 const int* inputs = 0;
928 int input_size = 0;
929 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
930 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 930, __extension__ __PRETTY_FUNCTION__)
; }))
;
931 update_inputs[0].d = inputs[0];
932 update_inputs[0].graph = graph;
933 update_inputs[1].d = inputs[1];
934 update_inputs[1].graph = graph;
935 update_outputs[0] = updated_parameters[parameter_indice];
936 for (j = 0; j < saved_aux_size; j++)
937 {
938 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
939 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
940 }
941 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
942 for (k = 1; k < parallel_count; k++)
943 {
944 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
945 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 945, __extension__ __PRETTY_FUNCTION__); }))
;
946 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
947 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 947, __extension__ __PRETTY_FUNCTION__)
; }))
;
948 update_inputs[0].d = inputs[0];
949 update_inputs[0].graph = graph;
950 update_inputs[1].d = inputs[1];
951 update_inputs[1].graph = graph;
952 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
953 for (j = 0; j < saved_aux_size; j++)
954 {
955 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
956 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
957 }
958 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
959 }
960 }
961 return this_parameter_flag;
962}
963
964typedef struct {
965 int parameter_size;
966 ccv_nnc_cmd_t minimizer;
967 ccv_cnnp_model_io_t parameters[1];
968} ccv_cnnp_set_minimizer_for_parameter_t;
969
970static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
971{
972 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
973 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 973, __extension__ __PRETTY_FUNCTION__); }))
;
974 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
975 // We update all parameters, at this point, we have one minimizer.
976 const int parameter_size = compiled_data->parameters->rnum;
977 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
978 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
979 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 979, __extension__ __PRETTY_FUNCTION__); }))
;
980 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
981 ccv_array_t* const parameters = compiled_data->minimize.parameters;
982 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
983 int i, j, flag = 0;
984 for (i = 0; i < parameters->rnum; i++)
985 {
986 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
987 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
988 {
989 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
990 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 990, __extension__ __PRETTY_FUNCTION__)
; }))
;
991 const int old_rnum = parameter_indices->rnum;
992 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
993 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
994 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 994, __extension__ __PRETTY_FUNCTION__)
; }))
;
995 if (param_ref >= 0)
996 {
997 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__)
; }))
;
998 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
999 parameter_indices->rnum = old_rnum + 1;
1000 }
1001 }
1002 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1003 // We may have duplicated indices, but that is OK, we will set it twice.
1004 for (j = 0; j < parameter_indices->rnum; j++)
1005 {
1006 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1007 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1007, __extension__ __PRETTY_FUNCTION__
); }))
;
1008 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1009 flag = 1;
1010 }
1011 ccv_array_clear(parameter_indices);
1012 }
1013 ccv_array_free(parameter_indices);
1014 return flag;
1015}
1016
1017static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1018{
1019 if (new_saved_aux_size == old_saved_aux_size)
1020 return;
1021 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1021, __extension__ __PRETTY_FUNCTION__
); }))
;
1022 int i, j;
1023 for (i = parameter_size - 1; i >= 0; i--)
1024 {
1025 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1026 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1027 for (j = old_saved_aux_size - 1; j >= 0; j--)
1028 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1029 }
1030}
1031
1032static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1033{
1034 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1035 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1035, __extension__ __PRETTY_FUNCTION__); }))
;
1036 if (!compiled_data->rewindables)
1037 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1038 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1039 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1040 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1041}
1042
1043static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1044{
1045 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1046 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1046, __extension__ __PRETTY_FUNCTION__
); }))
;
1047 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1047, __extension__ __PRETTY_FUNCTION__
); }))
;
1048 const int evaluate_to_size = compiled_data->evaluate.to_size;
1049 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__
); }))
;
1050 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1051 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1052 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1053 int i, j;
1054 const int output_size = model->output_size;
1055 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1055, __extension__ __PRETTY_FUNCTION__
); }))
;
1056 if (fits)
1057 for (i = 0; i < output_size; i++)
1058 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1059 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1060 const int parameter_size = compiled_data->parameters->rnum;
1061 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1062 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1063 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1064 int parameter_size_maybe_more = parameter_size;
1065 compiled_data->disable_outgrad = disable_outgrad;
1066 int outgrad_size;
1067 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1068 outgrad_size = 0;
1069 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1070 outgrad_size = model->input_size;
1071 else {
1072 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1072, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1073 outgrad_size = 0;
1074 for (i = 0; i < model->input_size; i++)
1075 if (!(disable_outgrad & ((uint64_t)1 << i)))
1076 ++outgrad_size;
1077 }
1078 compiled_data->outgrad_size = outgrad_size;
1079 parameter_size_maybe_more += outgrad_size;
1080 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1081 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1082 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1083 compiled_data->backward.to_size = parameter_size_maybe_more;
1084 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1085 if (compiled_data->parameter_flags)
1086 {
1087 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1088 for (i = 0; i < parameter_size; i++)
1089 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1090 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1091 else
1092 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1093 }
1094 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1095 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1096 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1097 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1098 else { // Compute minimize with gradients including selected inputs.
1099 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1099, __extension__ __PRETTY_FUNCTION__
); }))
;
1100 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1100, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1101 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1101, __extension__ __PRETTY_FUNCTION__
); }))
;
1102 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1103 j = 0;
1104 for (i = 0; i < model->input_size; i++)
1105 if (!(disable_outgrad & ((uint64_t)1 << i)))
1106 outgrads[j++] = model->inputs[i];
1107 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1108 }
1109 if (compiled_data->parameter_flags)
1110 ccfreefree(parameters);
1111 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1112 if (compiled_data->minimize.parameters)
1113 _ccv_cnnp_apply_parameters_with_minimizer(model);
1114 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1115 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1116 for (i = 0; i < output_size; i++)
1117 {
1118 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1119 // Init this to 1 so we can backprop.
1120 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1121 }
1122 compiled_data->backward.to_size = 0;
1123 for (i = 0; i < parameter_size_maybe_more; i++)
1124 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1125 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1126 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1127 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1128 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1129 {
1130 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1131 continue;
1132 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1133 const int* tos;
1134 int to_size;
1135 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1136 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1137 {
1138 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1139 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1140 int flag = 0;
1141 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1142 for (j = i - 1; !flag && j >= 0; j--)
1143 if (j + outgrad_destination_start < destination_count)
1144 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1145 if (!flag) // Only if we cannot find it, we add it.
1146 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1147 }
1148 }
1149 if (parallel_count > 1)
1150 {
1151 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1152 0, 0,
1153 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1154 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1155 0, 0, 0,
1156 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1157 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1158 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1159 for (i = 0; i < evaluate_to_size; i++)
1160 for (j = 1; j < parallel_count; j++)
1161 {
1162 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1163 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1164 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1165 }
1166 const int backward_to_size = compiled_data->backward.to_size;
1167 for (i = 0; i < backward_to_size; i++)
1168 for (j = 1; j < parallel_count; j++)
1169 {
1170 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1171 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1172 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1173 }
1174 }
1175 // Only use memory compression if we are in gradient parameter mode.
1176 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1177 {
1178 if (model->memory_compression)
1179 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1180 if (model->memory_reduction)
1181 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1182 }
1183 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1184 compiled_data->gradient_mode = gradient_mode;
1185}
1186
1187void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1188{
1189 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1189, __extension__ __PRETTY_FUNCTION__
); }))
;
1190 const int parameter_size = compiled_data->parameters->rnum;
1191 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1192 const int internal_size = compiled_data->internals->rnum;
1193 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1194 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1195 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1196 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1197}
1198
1199int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1200{
1201 int i, j;
1202 const int parameter_size = compiled_data->parameters->rnum;
1203 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1204 const int internal_size = compiled_data->internals->rnum;
1205 for (i = 0; i < parameter_size; i++)
1206 {
1207 // parameters has to be allocated all together.
1208 if (compiled_data->tensors.parameters[i])
1209 {
1210 for (j = 1; j < parallel_count; j++)
1211 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1211, __extension__ __PRETTY_FUNCTION__
); }))
; }
1212 continue;
1213 }
1214 return 1;
1215 }
1216 for (i = 0; i < internal_size; i++)
1217 {
1218 if (!compiled_data->tensors.internals[i])
1219 return 1;
1220 for (j = 1; j < parallel_count; j++)
1221 if (!compiled_data->tensors.internals[i + j * internal_size])
1222 return 1;
1223 }
1224 return 0;
1225}
1226
1227void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1228{
1229 int i, j;
1230 const int parameter_size = compiled_data->parameters->rnum;
1231 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1232 const int internal_size = compiled_data->internals->rnum;
1233 for (i = 0; i < parameter_size; i++)
1234 {
1235 // parameters has to be allocated all together.
1236 if (compiled_data->tensors.parameters[i])
1237 {
1238 for (j = 1; j < parallel_count; j++)
1239 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1239, __extension__ __PRETTY_FUNCTION__
); }))
; }
1240 continue;
1241 }
1242 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1243 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1244 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1245 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1246 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1247 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1248 for (j = 1; j < parallel_count; j++)
1249 {
1250 if (j != device_id)
1251 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1252 else
1253 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1254 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1255 }
1256 }
1257 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1258 for (i = 0; i < internal_size; i++)
1259 {
1260 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1261 const int d = retained.d;
1262 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1263 continue;
1264 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1265 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1266 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1267 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1268 if (!compiled_data->tensors.internals[i])
1269 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1270 for (j = 1; j < parallel_count; j++)
1271 {
1272 if (j != device_id)
1273 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1274 else
1275 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1276 if (!compiled_data->tensors.internals[i + j * internal_size])
1277 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1278 }
1279 }
1280 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1281}
1282
1283static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1284{
1285 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1286 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1287}
1288
1289static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1290{
1291 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1291, __extension__ __PRETTY_FUNCTION__
); }))
;
1292 int i, j;
1293 for (i = 0; i < tensor_size; i++)
1294 {
1295 if (!tensors[i])
1296 continue;
1297 const int d = tensor_symbols[i].d;
1298 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1299 continue;
1300 for (j = 1; j < parallel_count; j++)
1301 if (tensors[i + j * tensor_size])
1302 {
1303 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1304 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1305 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1306 }
1307 }
1308}
1309
1310static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1311{
1312 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1312, __extension__ __PRETTY_FUNCTION__
); }))
;
1313 int i, j;
1314 for (i = 0; i < tensor_size; i++)
1315 {
1316 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1317 for (j = 1; j < parallel_count; j++)
1318 {
1319 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1320 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1321 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1322 { // We shouldn't allocate this, free it up.
1323 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1324 tensors[i + j * tensor_size] = 0;
1325 }
1326 }
1327 }
1328}
1329
1330static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1331{
1332 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1332, __extension__ __PRETTY_FUNCTION__
); }))
;
1333 int i, j;
1334 for (i = 0; i < tensor_size; i++)
1335 {
1336 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1337 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1338 continue;
1339 if (graph)
1340 {
1341 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1342 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1343 tensor_symbol = alias_to;
1344 }
1345 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1346 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1347 {
1348 const ccv_nnc_tensor_bind_t retained_bind = {
1349 .symbol = tensor_symbol,
1350 .tensor = tensor
1351 };
1352 ccv_array_push(tensor_binds, &retained_bind);
1353 }
1354 for (j = 1; j < parallel_count; j++)
1355 {
1356 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1357 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1358 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1359 {
1360 const ccv_nnc_tensor_bind_t bind = {
1361 .symbol = copy,
1362 .tensor = tensors[i + j * tensor_size]
1363 };
1364 ccv_array_push(tensor_binds, &bind);
1365 }
1366 }
1367 }
1368}
1369
1370static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1371{
1372 if (compiled_data->graph)
1373 ccv_nnc_graph_free(compiled_data->graph);
1374 compiled_data->graph = 0;
1375 compiled_data->is_test = 0;
1376 if (compiled_data->tensor_arena)
1377 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1378 compiled_data->tensor_arena = 0;
1379 if (compiled_data->graph_exec_arena)
1380 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1381 compiled_data->graph_exec_arena = 0;
1382 if (compiled_data->backward.from_ops)
1383 ccfreefree(compiled_data->backward.from_ops);
1384 compiled_data->backward.from_ops = 0;
1385 if (compiled_data->evaluate.schedule)
1386 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1387 compiled_data->evaluate.schedule = 0;
1388 if (compiled_data->backward.schedule)
1389 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1390 compiled_data->backward.schedule = 0;
1391}
1392
1393static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1394{
1395 if (compiled_data->gradients)
1396 ccfreefree(compiled_data->gradients);
1397 compiled_data->gradients = 0;
1398 if (compiled_data->updated_parameters)
1399 ccfreefree(compiled_data->updated_parameters);
1400 compiled_data->updated_parameters = 0;
1401 compiled_data->update_nodes = 0;
1402 compiled_data->saved_aux = 0;
1403}
1404
1405static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1406{
1407 if (compiled_data->backward.gradients)
1408 ccfreefree(compiled_data->backward.gradients);
1409 compiled_data->backward.gradients = 0;
1410 if (compiled_data->backward.accum)
1411 ccv_nnc_graph_free(compiled_data->backward.accum);
1412 compiled_data->backward.accum = 0;
1413 if (compiled_data->backward.tensor_arena)
1414 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1415 compiled_data->backward.tensor_arena = 0;
1416 if (compiled_data->backward.graph_exec_arena)
1417 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1418 compiled_data->backward.graph_exec_arena = 0;
1419}
1420
1421static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1422{
1423 if (compiled_data->apply_gradients.graph)
1424 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1425 compiled_data->apply_gradients.graph = 0;
1426 if (compiled_data->apply_gradients.tensor_arena)
1427 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1428 compiled_data->apply_gradients.tensor_arena = 0;
1429 if (compiled_data->apply_gradients.graph_exec_arena)
1430 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1431 compiled_data->apply_gradients.graph_exec_arena = 0;
1432}
1433
1434// Compile the graph to run ccv_cnnp_model_fit
1435static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1436{
1437 int i, j;
1438 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1439 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1439, __extension__ __PRETTY_FUNCTION__
); }))
;
1440 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1441 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1442 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1442, __extension__ __PRETTY_FUNCTION__
); }))
;
1443 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1443
, __extension__ __PRETTY_FUNCTION__); }))
;
1444 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1444, __extension__ __PRETTY_FUNCTION__
); }))
;
1445 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1446 {
1447 _ccv_cnnp_model_set_rewindables(model);
1448 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1449 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1450 _ccv_cnnp_model_rewind_graph(model);
1451 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1452 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1453 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1454 }
1455 const int tensors_init = !!compiled_data->tensors_init.v;
1456 if (!tensors_init)
1457 _ccv_cnnp_model_tensors_init(model, compiled_data);
1458 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1459 // Check if it is not fully allocated, if it is not, init_1.
1460 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1461 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1462 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1462, __extension__ __PRETTY_FUNCTION__); }))
;
1463 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1463, __extension__ __PRETTY_FUNCTION__); }))
;
1464 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1464
, __extension__ __PRETTY_FUNCTION__); }))
;
1465 const int input_size_per_p = input_size / parallel_count;
1466 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1467 const int output_size_per_p = output_size / parallel_count;
1468 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1469 const int fit_size_per_p = fit_size / parallel_count;
1470 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1471 const int parameter_size = compiled_data->parameters->rnum;
1472 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1473 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1474 const int internal_size = compiled_data->internals->rnum;
1475 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1476 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1477 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1478 ccv_array_free(tensor_binds);
1479 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1480 if (tensors_init && parallel_count > 1)
1481 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1482 // If tensor is not init'ed, we need to init states first.
1483 if (_ccv_cnnp_any_to_init(compiled_data))
1484 {
1485 ccv_nnc_tensor_init_states_t tensor_init_states = {
1486 .parallel_count = parallel_count,
1487 .graph = model->graph,
1488 .compiled_data = compiled_data,
1489 .tensor_arena = compiled_data->tensor_arena
1490 };
1491 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1492 }
1493 compiled_data->is_test = 0;
1494 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1495 // No need to set because it is default to training mode.
1496 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1497 for (i = 0; i < saved_aux_size * parameter_size; i++)
1498 {
1499 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1500 continue;
1501 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1502 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1503 for (j = 1; j < parallel_count; j++)
1504 {
1505 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1506 if (copy)
1507 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1508 }
1509 }
1510 const int evaluate_to_size = compiled_data->evaluate.to_size;
1511 compiled_data->evaluate.to_op_size = 0;
1512 for (i = 0; i < evaluate_to_size; i++)
1513 {
1514 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1515 if (to.graph)
1516 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1517 }
1518 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1519 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1520}
1521
1522ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1523{
1524 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1525 if (!compiled_data || !compiled_data->graph)
1526 return 0;
1527 return ccv_nnc_graph_default_stream(compiled_data->graph);
1528}
1529
1530uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1531{
1532 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1533 if (!compiled_data || !compiled_data->tensor_arena)
1534 return 0;
1535 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1536}
1537
1538static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1539{
1540 int i, j;
1541 for (i = 0; i < tensor_size; i++)
1542 {
1543 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1544 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1545 continue;
1546 if (graph)
1547 {
1548 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1549 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1550 tensor_symbol = alias_to;
1551 }
1552 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1553 for (j = 1; j < parallel_count; j++)
1554 {
1555 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1556 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1557 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1558 }
1559 }
1560}
1561
1562void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1563{
1564 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1565 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1565, __extension__ __PRETTY_FUNCTION__); }))
;
1566 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1567 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1567, __extension__ __PRETTY_FUNCTION__
); }))
;
1568 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1568, __extension__ __PRETTY_FUNCTION__
); }))
;
1569 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1569
, __extension__ __PRETTY_FUNCTION__); }))
;
1570 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1570, __extension__ __PRETTY_FUNCTION__); }))
;
1571 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1572 {
1573 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1574 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1575 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1576 // Compile the symbolic graph down only when needed.
1577 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1578 } else {
1579 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1579, __extension__ __PRETTY_FUNCTION__); }))
;
1580 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1580, __extension__ __PRETTY_FUNCTION__); }))
;
1581 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1581
, __extension__ __PRETTY_FUNCTION__); }))
;
1582 const int input_size_per_p = input_size / parallel_count;
1583 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1584 const int output_size_per_p = output_size / parallel_count;
1585 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1586 const int fit_size_per_p = fit_size / parallel_count;
1587 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1588 }
1589 if (compiled_data->is_test)
1590 {
1591 compiled_data->is_test = 0;
1592 ccv_nnc_graph_exec_update_t update = {
1593 .parallel_count = parallel_count,
1594 .graph = model->graph,
1595 .graph_exec_arena = compiled_data->graph_exec_arena,
1596 };
1597 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1598 }
1599 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1600}
1601
1602// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1603static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1604{
1605 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1606 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1607 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1608 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1608, __extension__ __PRETTY_FUNCTION__
); }))
;
1609 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1609, __extension__ __PRETTY_FUNCTION__
); }))
;
1610 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1611 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1612 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1613 {
1614 const int evaluate_to_size = compiled_data->evaluate.to_size;
1615 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1616 _ccv_cnnp_model_set_rewindables(model);
1617 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1618 0, 0,
1619 0, 0, 0,
1620 0, 0, 0,
1621 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1622 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1623 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1624 int i, j;
1625 for (i = 0; i < evaluate_to_size; i++)
1626 for (j = 1; j < parallel_count; j++)
1627 {
1628 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1629 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1630 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1631 }
1632 }
1633 const int tensors_init = !!compiled_data->tensors_init.v;
1634 if (!tensors_init)
1635 _ccv_cnnp_model_tensors_init(model, compiled_data);
1636 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1637 // Check if it is not fully allocated, if it is not, init_1.
1638 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1639 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1640 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1640, __extension__ __PRETTY_FUNCTION__); }))
;
1641 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1641, __extension__ __PRETTY_FUNCTION__); }))
;
1642 const int input_size_per_p = input_size / parallel_count;
1643 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1644 const int output_size_per_p = output_size / parallel_count;
1645 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1646 const int parameter_size = compiled_data->parameters->rnum;
1647 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1648 const int internal_size = compiled_data->internals->rnum;
1649 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1650 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1651 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1652 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1653 ccv_array_free(tensor_binds);
1654 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1655 // If tensor is not init'ed, we need to init states first.
1656 if (tensors_init && parallel_count > 1)
1657 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1658 if (_ccv_cnnp_any_to_init(compiled_data))
1659 {
1660 ccv_nnc_tensor_init_states_t tensor_init_states = {
1661 .parallel_count = parallel_count,
1662 .graph = model->graph,
1663 .compiled_data = compiled_data,
1664 .tensor_arena = compiled_data->tensor_arena
1665 };
1666 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1667 }
1668 compiled_data->is_test = 1;
1669 ccv_nnc_graph_exec_update_t update = {
1670 .parallel_count = parallel_count,
1671 .graph = model->graph,
1672 .graph_exec_arena = compiled_data->graph_exec_arena,
1673 };
1674 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1675 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1676 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1677}
1678
1679static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1680{
1681 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1681, __extension__ __PRETTY_FUNCTION__
); }))
;
1682 const int parameter_size = compiled_data->parameters->rnum;
1683 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1684 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1685 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1686 int i, j;
1687 for (i = 0; i < parameter_size; i++)
1688 {
1689 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1690 {
1691 compiled_data->tensors.gradients[i] = 0;
1692 compiled_data->tensors.accum_gradients[i] = 0;
1693 for (j = 1; j < parallel_count; j++)
1694 {
1695 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1696 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1697 }
1698 continue;
1699 }
1700 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1701 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1702 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1703 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1704 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1705 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1706 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1707 for (j = 1; j < parallel_count; j++)
1708 {
1709 if (j != device_id)
1710 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1711 else
1712 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1713 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1714 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1715 }
1716 }
1717}
1718
1719static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1720{
1721 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1722 return 1;
1723 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1724 return 0;
1725 int i;
1726 for (i = 0; i < input_size; i++)
1727 if (!(disable_outgrad & ((uint64_t)1 << i)))
1728 return 0;
1729 return 1;
1730}
1731
1732// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1733// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1734static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1735{
1736 int i, j;
1737 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1738 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1739 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1739, __extension__ __PRETTY_FUNCTION__
); }))
;
1740 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1741 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1742 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1742, __extension__ __PRETTY_FUNCTION__
); }))
;
1743 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1743, __extension__ __PRETTY_FUNCTION__
); }))
;
1744 // There shouldn't be a loss function if we evaluate with multistage jit.
1745 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1745, __extension__ __PRETTY_FUNCTION__
); }))
;
1746 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1747 {
1748 _ccv_cnnp_model_set_rewindables(model);
1749 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1750 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1751 _ccv_cnnp_model_rewind_graph(model);
1752 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1753 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1754 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1755 }
1756 const int tensors_init = !!compiled_data->tensors_init.v;
1757 if (!tensors_init)
1758 _ccv_cnnp_model_tensors_init(model, compiled_data);
1759 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1760 // Check if it is not fully allocated, if it is not, init_1.
1761 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1762 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1763 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1763, __extension__ __PRETTY_FUNCTION__); }))
;
1764 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1764, __extension__ __PRETTY_FUNCTION__); }))
;
1765 const int input_size_per_p = input_size / parallel_count;
1766 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1767 const int output_size_per_p = output_size / parallel_count;
1768 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1769 const int parameter_size = compiled_data->parameters->rnum;
1770 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1771 const int internal_size = compiled_data->internals->rnum;
1772 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1773 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1774 if (!compiled_data->tensors.gradients)
1775 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1776 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1777 if (compiled_data->backward.to_size > 0)
1778 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1779 else
1780 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1781 ccv_array_free(tensor_binds);
1782 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1783 if (tensors_init && parallel_count > 1)
1784 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1785 // If tensor is not init'ed, we need to init states first.
1786 if (_ccv_cnnp_any_to_init(compiled_data))
1787 {
1788 ccv_nnc_tensor_init_states_t tensor_init_states = {
1789 .parallel_count = parallel_count,
1790 .graph = model->graph,
1791 .compiled_data = compiled_data,
1792 .tensor_arena = compiled_data->tensor_arena
1793 };
1794 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1795 }
1796 compiled_data->is_test = is_test;
1797 ccv_nnc_graph_exec_update_t update = {
1798 .parallel_count = parallel_count,
1799 .graph = model->graph,
1800 .graph_exec_arena = compiled_data->graph_exec_arena,
1801 };
1802 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1803 const int evaluate_to_size = compiled_data->evaluate.to_size;
1804 compiled_data->evaluate.to_op_size = 0;
1805 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1806 for (i = 0; i < evaluate_to_size; i++)
1807 {
1808 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1809 if (to_op.graph)
1810 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1811 const int* tos;
1812 int to_size;
1813 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1814 for (j = 0; j < to_size; j++)
1815 {
1816 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1817 .d = tos[j],
1818 .graph = model->graph
1819 });
1820 if (to_op.graph)
1821 ccv_array_add_unique_int(backward_from, to_op.d);
1822 }
1823 }
1824 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1824, __extension__
__PRETTY_FUNCTION__); }))
;
1825 compiled_data->backward.from_op_size = backward_from->rnum;
1826 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1827 for (i = 0; i < backward_from->rnum; i++)
1828 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1829 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1830 .graph = compiled_data->graph,
1831 };
1832 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1833 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1834 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1835 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1836 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1837 const int source_size = compiled_data->graph->sources->rnum;
1838 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1838, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1839 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1840 visited[(idx >> 5)] |= (1u << (idx & 31));
1841 } ccv_nnc_graph_visit_endfor} }
1842 ccv_nnc_graph_visit_free(visit);
1843 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1844 const int destination_size = compiled_data->graph->destinations->rnum;
1845 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1845, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1846 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1847 visited[(idx >> 5)] |= (1u << (idx & 31));
1848 } ccv_nnc_graph_visit_endfor} }
1849 ccv_nnc_graph_visit_free(visit);
1850 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1851 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1852 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1853 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1854 {
1855 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1855, __extension__ __PRETTY_FUNCTION__
); }))
;
1856 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1857 ccv_array_add_unique_int(backward_from, idx);
1858 }
1859 } ccv_nnc_graph_visit_endfor} }
1860 ccv_nnc_graph_visit_free(visit);
1861 ccfreefree(visited);
1862 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1863 {
1864 compiled_data->backward.from_op_size = backward_from->rnum;
1865 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1866 for (i = 0; i < backward_from->rnum; i++)
1867 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1868 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1869 .graph = compiled_data->graph,
1870 };
1871 }
1872 ccv_array_free(backward_from);
1873 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1874 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1875}
1876
1877void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1878{
1879 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1880 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1880, __extension__ __PRETTY_FUNCTION__); }))
;
1881 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1882 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1882, __extension__ __PRETTY_FUNCTION__
); }))
;
1883 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1883, __extension__ __PRETTY_FUNCTION__
); }))
;
1884 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1884, __extension__ __PRETTY_FUNCTION__); }))
;
1885 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1886 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1887 if (!compiled_data->graph || mode_mismatch)
1888 {
1889 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1890 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1891 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1892 if (params.requires_grad)
1893 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1894 else
1895 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1896 } else {
1897 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1898 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1898, __extension__ __PRETTY_FUNCTION__); }))
;
1899 const int input_size_per_p = input_size / parallel_count;
1900 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1901 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1901, __extension__ __PRETTY_FUNCTION__); }))
;
1902 const int output_size_per_p = output_size / parallel_count;
1903 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1904 }
1905 if (compiled_data->is_test != params.is_test)
1906 {
1907 compiled_data->is_test = params.is_test;
1908 ccv_nnc_graph_exec_update_t update = {
1909 .parallel_count = parallel_count,
1910 .graph = model->graph,
1911 .graph_exec_arena = compiled_data->graph_exec_arena,
1912 };
1913 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1914 }
1915}
1916
1917void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1918{
1919 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1920 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1920, __extension__ __PRETTY_FUNCTION__); }))
;
1921 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1922 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1923 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1924 else {
1925 if (!compiled_data->evaluate.schedule)
1926 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1927 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1928 }
1929}
1930
1931// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1932// Particularly, this method compiles the accumulator graph.
1933static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1934{
1935 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1936 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1936, __extension__ __PRETTY_FUNCTION__); }))
;
1937 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1937, __extension__ __PRETTY_FUNCTION__
); }))
;
1938 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1939 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1940 const int parameter_size = compiled_data->parameters->rnum;
1941 int i, j;
1942 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1943 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1944 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1945 for (i = 0; i < parameter_size; i++)
1946 for (j = 0; j < parallel_count; j++)
1947 if (compiled_data->tensors.gradients[i + j * parameter_size])
1948 {
1949 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1950 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1951 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1952 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1953 ccv_nnc_tensor_symbol_t inputs[2];
1954 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1955 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1956 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1957 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1958 } else {
1959 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1960 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1961 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1962 }
1963 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1964 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1965 {
1966 ccv_nnc_symbolic_graph_free(accum);
1967 // Create empty graph.
1968 compiled_data->backward.accum = ccv_nnc_graph_new();
1969 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1970 return;
1971 }
1972 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1973 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1974 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1975 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1976 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1977 ccv_nnc_symbolic_graph_free(accum);
1978 ccv_array_free(tensor_binds);
1979 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1980}
1981
1982void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1983{
1984 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1985 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1985, __extension__ __PRETTY_FUNCTION__); }))
;
1986 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
;
1987 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1988 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1988, __extension__ __PRETTY_FUNCTION__
); }))
;
1989 if (outgrad_size > 0)
1990 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1990, __extension__ __PRETTY_FUNCTION__
); }))
; }
1991 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1991, __extension__ __PRETTY_FUNCTION__); }))
;
1992 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1992, __extension__ __PRETTY_FUNCTION__
); }))
;
1993 const int parameter_size = compiled_data->parameters->rnum;
1994 // If we need to accumulate the gradients now, do jit on accumulator.
1995 if (compiled_data->backward.count > 0)
1996 {
1997 if (!compiled_data->backward.accum)
1998 _ccv_cnnp_model_multistage_jit_1(model);
1999 else if (compiled_data->backward.count == 1) {
2000 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2001 int i;
2002 for (i = 0; i < parameter_size * parallel_count; i++)
2003 {
2004 ccv_nnc_tensor_t* tensor;
2005 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2006 }
2007 if (compiled_data->backward.tensor_arena)
2008 {
2009 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2010 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2011 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2012 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2013 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2014 }
2015 }
2016 }
2017 const int ingrad_size_per_p = model->output_size;
2018 const int outgrad_size_per_p = compiled_data->outgrad_size;
2019 int i, j;
2020 for (i = 0; i < ingrad_size_per_p; i++)
2021 {
2022 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2023 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2024 {
2025 // Set it to 1 if it is not specified.
2026 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2027 if (ingrad_tensor)
2028 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2029 for (j = 1; j < parallel_count; j++)
2030 {
2031 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2032 if (ingrad_tensor)
2033 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2034 }
2035 } else {
2036 // Make sure the length matches, in case it is an alias.
2037 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2037, __extension__ __PRETTY_FUNCTION__
); }))
;
2038 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2039 for (j = 1; j < parallel_count; j++)
2040 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2041 }
2042 }
2043 if (outgrad_size > 0)
2044 {
2045 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2045, __extension__ __PRETTY_FUNCTION__
); }))
;
2046 for (i = 0; i < outgrad_size_per_p; i++)
2047 if (outgrads[i])
2048 {
2049 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2050 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2051 for (j = 1; j < parallel_count; j++)
2052 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2053 }
2054 } else {
2055 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2056, __extension__ __PRETTY_FUNCTION__
); }))
2056 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2056, __extension__ __PRETTY_FUNCTION__
); }))
;
2057 }
2058 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2059 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2060 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2061 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2062 if (!compiled_data->backward.schedule)
2063 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2064 // Run the backward pass.
2065 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2066 // If we need to run accumulation round, do that now.
2067 if (compiled_data->backward.count > 0)
2068 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2069 // Update the count, this determines whether we need to accumulate or not.
2070 ++compiled_data->backward.count;
2071}
2072
2073// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2074// Particularly, this method compiles the parameter update graph.
2075static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2076{
2077 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2078 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2078, __extension__ __PRETTY_FUNCTION__
); }))
;
2079 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2080 const int parameter_size = compiled_data->parameters->rnum;
2081 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2082 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2083 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2084 // Bind accumulated gradients.
2085 if (compiled_data->backward.count > 1)
2086 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2087 else
2088 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2089 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2090 int i, j;
2091 for (i = 0; i < compiled_data->backward.to_size; i++)
2092 {
2093 const int* tos;
2094 int to_size;
2095 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2096 for (j = 0; j < to_size; j++)
2097 {
2098 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2099 // gradients graph.
2100 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2101 .d = tos[j],
2102 .graph = model->graph,
2103 });
2104 if (!exec.graph)
2105 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2106 }
2107 }
2108 const int from_size = apply_gradients_from->rnum;
2109 if (from_size == 0)
2110 {
2111 ccv_array_free(apply_gradients_from);
2112 ccv_array_free(tensor_binds);
2113 return;
2114 }
2115 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2116 for (i = 0; i < from_size; i++)
2117 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2118 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2119 .graph = model->graph
2120 };
2121 ccv_array_free(apply_gradients_from);
2122 // It can only ends with updates on the parameters.
2123 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2124 for (i = 0; i < parameter_size; i++)
2125 {
2126 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2127 continue;
2128 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2129 for (j = 1; j < parallel_count; j++)
2130 {
2131 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2132 ccv_array_push(tos, &copy);
2133 }
2134 }
2135 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2136 ccv_array_free(tos);
2137 ccv_array_free(tensor_binds);
2138 ccfreefree(froms);
2139 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2140 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2141 {
2142 // Skip on no tensor.
2143 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2144 continue;
2145 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2146 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2147 for (j = 1; j < parallel_count; j++)
2148 {
2149 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2150 if (copy)
2151 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2152 }
2153 }
2154 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2155}
2156
2157void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2158{
2159 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2160 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2160, __extension__ __PRETTY_FUNCTION__); }))
;
2161 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2161, __extension__ __PRETTY_FUNCTION__
); }))
;
2162 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2163 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2163, __extension__ __PRETTY_FUNCTION__); }))
;
2164 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2164, __extension__ __PRETTY_FUNCTION__
); }))
;
2165 // Skip if there is no backward pass.
2166 if (compiled_data->backward.count <= 0)
2167 return;
2168 // Skip if there is no parameters.
2169 if (compiled_data->parameters->rnum == 0)
2170 {
2171 compiled_data->backward.count = 0;
2172 return;
2173 }
2174 if (!compiled_data->apply_gradients.graph)
2175 _ccv_cnnp_model_multistage_jit_2(model);
2176 else {
2177 const int parameter_size = compiled_data->parameters->rnum;
2178 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2179 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2180 if (compiled_data->backward.count > 1)
2181 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2182 else
2183 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2184 }
2185 if (compiled_data->apply_gradients.graph)
2186 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2187 // Reset backward count to 0.
2188 compiled_data->backward.count = 0;
2189}
2190
2191void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2192{
2193 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2194 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2195 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2195, __extension__ __PRETTY_FUNCTION__
); }))
;
2196 const int tensors_init = !!compiled_data->tensors_init.v;
2197 if (!tensors_init)
2198 _ccv_cnnp_model_tensors_init(model, compiled_data);
2199 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2200 // Check if it is not fully allocated, if it is not, init_1.
2201 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2202 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2203 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2204 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2205 if (param_ref < 0)
2206 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2206
, __extension__ __PRETTY_FUNCTION__); }))
; }
2207 else
2208 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2208, __extension__ __PRETTY_FUNCTION__
); }))
; }
2209 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2210 ccv_array_free(parameter_indices);
2211 const int parameter_size = compiled_data->parameters->rnum;
2212 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2212
, __extension__ __PRETTY_FUNCTION__); }))
;
2213 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2213, __extension__ __PRETTY_FUNCTION__
); }))
;
2214 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2215 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2216 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2216, __extension__
__PRETTY_FUNCTION__); }))
;
2217 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2218 int i;
2219 for (i = 1; i < parallel_count; i++)
2220 {
2221 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2222 if (copy_tensor)
2223 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2224 }
2225 // Mark this symbol as init'ed.
2226 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2227 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2228 init_v[s >> 5] |= (1u << (s & 0x1f));
2229}
2230
2231void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2232{
2233 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2234 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2235 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2235, __extension__ __PRETTY_FUNCTION__
); }))
;
2236 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2236, __extension__ __PRETTY_FUNCTION__
); }))
;
2237 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2238 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2239 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2240 if (param_ref < 0)
2241 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2241
, __extension__ __PRETTY_FUNCTION__); }))
; }
2242 else
2243 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2243, __extension__ __PRETTY_FUNCTION__
); }))
; }
2244 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2245 ccv_array_free(parameter_indices);
2246 const int parameter_size = compiled_data->parameters->rnum;
2247 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2247
, __extension__ __PRETTY_FUNCTION__); }))
;
2248 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2248, __extension__ __PRETTY_FUNCTION__
); }))
;
2249 // We don't need to consider parallel_count, every parameter on each device is identical.
2250 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2251 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2251, __extension__
__PRETTY_FUNCTION__); }))
;
2252 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2253}
2254
2255ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2256{
2257 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2258 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2259 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2259, __extension__ __PRETTY_FUNCTION__
); }))
;
2260 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2260, __extension__ __PRETTY_FUNCTION__
); }))
;
2261 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2262 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2263 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2264 if (param_ref < 0)
2265 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2265
, __extension__ __PRETTY_FUNCTION__); }))
; }
2266 else
2267 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2267, __extension__ __PRETTY_FUNCTION__
); }))
; }
2268 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2269 ccv_array_free(parameter_indices);
2270 const int parameter_size = compiled_data->parameters->rnum;
2271 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2271
, __extension__ __PRETTY_FUNCTION__); }))
;
2272 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2272, __extension__ __PRETTY_FUNCTION__
); }))
;
2273 // We don't need to consider parallel_count, every parameter on each device is identical.
2274 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2275 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2275, __extension__
__PRETTY_FUNCTION__); }))
;
2276 return tensor->info;
2277}
2278
2279const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2280{
2281 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2282 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2283 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2283, __extension__ __PRETTY_FUNCTION__
); }))
;
2284 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2285 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2286 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2287 if (param_ref < 0)
2288 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2288
, __extension__ __PRETTY_FUNCTION__); }))
; }
2289 else
2290 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2290, __extension__ __PRETTY_FUNCTION__
); }))
; }
2291 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2292 ccv_array_free(parameter_indices);
2293 const int parameter_size = compiled_data->parameters->rnum;
2294 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2294
, __extension__ __PRETTY_FUNCTION__); }))
;
2295 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2295, __extension__ __PRETTY_FUNCTION__
); }))
;
2296 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2297}
2298
2299int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2300{
2301 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2301, __extension__ __PRETTY_FUNCTION__
); }))
;
2302 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2303 return compiled_data->parameters->rnum;
2304}
2305
2306uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2307{
2308 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2308, __extension__ __PRETTY_FUNCTION__
); }))
;
2309 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2310 const int parameter_size = compiled_data->parameters->rnum;
2311 int i;
2312 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2313 uint64_t size = 0;
2314 for (i = 0; i < parameter_size; i++)
2315 {
2316 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2317 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2318 .graph = graph,
2319 .d = d
2320 });
2321 size += ccv_nnc_tensor_data_size(params);
2322 }
2323 return size;
2324}
2325
2326ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2327{
2328 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2329 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2329, __extension__ __PRETTY_FUNCTION__); }))
;
2330 const int parameter_size = compiled_data->parameters->rnum;
2331 int i;
2332 for (i = 0; i < parameter_size; i++)
2333 {
2334 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2335 if (first(model, name, context))
2336 return ccv_cnnp_model_parameters(model, -1, i);
2337 }
2338 return 0;
2339}
2340
2341ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2342{
2343 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2344 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2344, __extension__ __PRETTY_FUNCTION__); }))
;
2345 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2346 const int parameter_size = compiled_data->parameters->rnum;
2347 int i;
2348 for (i = 0; i < parameter_size; i++)
2349 {
2350 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2351 if (filter(model, name, context))
2352 {
2353 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2354 ccv_array_push(parameters, &parameter);
2355 }
2356 }
2357 return parameters;
2358
2359}
2360
2361CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2362{
2363 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2364 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2364, __extension__ __PRETTY_FUNCTION__); }))
;
2365 const int tensors_init = !!compiled_data->tensors_init.v;
2366 if (!tensors_init) // If nothing initialized, we return parameter 0.
2367 return ccv_cnnp_model_parameters(model, -1, 0);
2368 const int parameter_size = compiled_data->parameters->rnum;
2369 int i;
2370 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2371 for (i = 0; i < parameter_size; i++)
2372 {
2373 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2374 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2375 return ccv_cnnp_model_parameters(model, -1, i);
2376 }
2377 return 0;
2378}
2379
2380static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2381{
2382 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2383 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2383, __extension__
__PRETTY_FUNCTION__); }))
;
2384 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2385 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2386 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2387 return to_parameter_indices;
2388}
2389
2390static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2391{
2392 // If the model is not compiled yet. Compile them now.
2393 if (!model->graph)
2394 {
2395 model->graph = ccv_nnc_symbolic_graph_new();
2396 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2396, __extension__ __PRETTY_FUNCTION__
); }))
;
2397 const int input_size = from_model->input_size;
2398 ccv_nnc_tensor_param_t input_params[input_size];
2399 int i;
2400 for (i = 0; i < input_size; i++)
2401 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2402 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2403 model->parallel_count = from_model->parallel_count;
2404 model->memory_compression = from_model->memory_compression;
2405 model->memory_reduction = from_model->memory_reduction;
2406 model->gradient_checkpointing = from_model->gradient_checkpointing;
2407 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2408 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2409 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2410 }
2411 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2412 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2412, __extension__ __PRETTY_FUNCTION__
); }))
;
2413 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2414 if (!to_tensors_init)
2415 {
2416 if (only_init_0)
2417 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2418 else
2419 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2420 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2421 // Check if it is not fully allocated, if it is not, init_1.
2422 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2423 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2423, __extension__ __PRETTY_FUNCTION__
); }))
;
2424 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2425 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2426 if (*from_param_ref < 0 && *param_ref >= 0)
2427 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2427, __extension__ __PRETTY_FUNCTION__
); }))
; }
2428 else if (*from_param_ref >= 0)
2429 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2429, __extension__ __PRETTY_FUNCTION__
); }))
; }
2430 if (*param_ref < 0 && *from_param_ref >= 0)
2431 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2431, __extension__ __PRETTY_FUNCTION__); }))
; }
2432 else if (*param_ref >= 0)
2433 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2433, __extension__ __PRETTY_FUNCTION__
); }))
; }
2434}
2435
2436void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2437{
2438 ccv_array_t* to_parameter_indices;
2439 int to_param_ref;
2440 ccv_array_t* from_parameter_indices;
2441 int from_param_ref;
2442 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2443 // Should be exactly the same tensor.
2444 if (to_param_ref < 0 && from_param_ref < 0)
2445 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2445, __extension__ __PRETTY_FUNCTION__
); }))
; }
2446 // To models.
2447 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2448 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2448, __extension__ __PRETTY_FUNCTION__
); }))
;
2449 // From models.
2450 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2451 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2452 const int to_parameter_size = to_compiled_data->parameters->rnum;
2453 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2454 int i, j;
2455 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2456 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2457 for (i = 0; i < rnum; i++)
2458 {
2459 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2460 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2460, __extension__ __PRETTY_FUNCTION__); }))
;
2461 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2461, __extension__ __PRETTY_FUNCTION__
); }))
;
2462 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2463 // If the original is not init'ed. We cannot copy from.
2464 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2465 continue;
2466 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2467 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2467, __extension__ __PRETTY_FUNCTION__); }))
;
2468 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2468, __extension__ __PRETTY_FUNCTION__
); }))
;
2469 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2470 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2470, __extension__
__PRETTY_FUNCTION__); }))
;
2471 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2472 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2472, __extension__
__PRETTY_FUNCTION__); }))
;
2473 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2474 for (j = 1; j < parallel_count; j++)
2475 {
2476 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2477 if (copy_tensor)
2478 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2479 }
2480 // Mark this symbol as init'ed.
2481 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2482 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2483 }
2484 ccv_array_free(to_parameter_indices);
2485 ccv_array_free(from_parameter_indices);
2486}
2487
2488KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2488
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Assuming field 'n_occupied' is >= field 'upper_bound'
58
Taking true branch
59
Taking true branch
60
Calling 'kh_resize_ccv_cnnp_parameter_id'
61
Taking false branch
62
Assuming the condition is false
63
Taking false branch
64
'?' condition is true
65
Assuming 'new_flags' is non-null
66
Taking false branch
67
'?' condition is true
68
Taking false branch
69
Taking true branch
70
Loop condition is true. Entering loop body
71
Assuming the condition is false
72
Taking false branch
73
The value 1 is assigned to 'j'
74
Loop condition is true. Entering loop body
75
Assuming the condition is true
76
Taking true branch
77
Assigned value is garbage or undefined
2489
2490void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2491{
2492 ccv_array_t* to_parameter_indices;
2493 int to_param_ref;
2494 ccv_array_t* from_parameter_indices;
2495 int from_param_ref;
2496 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2497 // Should be exactly the same tensor.
2498 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2499 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2499, __extension__ __PRETTY_FUNCTION__
); }))
; }
2500 // To models.
2501 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2502 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2502, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2503 // From models.
2504 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2505 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2506 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2506, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2507 const int from_parameter_size = from_compiled_data->parameters->rnum;
2508 const int to_parameter_size = to_compiled_data->parameters->rnum;
2509 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2510 int i, j;
2511 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2512 char* updated_name = 0;
2513 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2514 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2515 for (i = 0; i < rnum; i++)
2516 {
2517 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2518 // Need to figure out how to use the renamer here.
2519 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2520 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2520, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2521 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2521, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2522 if (renamer
18.1
'renamer' is non-null
)
2523 {
2524 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2525 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2526 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2527 updated_name = (char*)ccmallocmalloc(1024);
2528 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2529 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2530 memcpy(updated_name, src_name, src_name_len);
2531 updated_name[src_name_len] = 0;
2532 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2533 continue; // Skip this.
2534 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2535 {
2536 // Nothing changed.
2537 } else {
2538 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2539 {
2540 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2541 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is < 'from_parameter_size'
55
Loop condition is true. Entering loop body
2542 {
2543 int ret;
2544 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
56
Calling 'kh_put_ccv_cnnp_parameter_id'
2545 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2545
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2546 kh_val(id_map, k)((id_map)->vals[k]) = j;
2547 }
2548 }
2549 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2550 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2551 continue;
2552 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2553 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2553, __extension__ __PRETTY_FUNCTION__); }))
;
2554 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2554, __extension__
__PRETTY_FUNCTION__); }))
;
2555 }
2556 }
2557 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2557, __extension__ __PRETTY_FUNCTION__); }))
;
2558 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2558, __extension__
__PRETTY_FUNCTION__); }))
;
2559 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2560 // If the original is not init'ed. We cannot share from.
2561 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2562 continue;
2563 for (j = 0; j < parallel_count; j++)
2564 {
2565 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2566 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2566, __extension__
__PRETTY_FUNCTION__); }))
;
2567 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2568 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2569 ccv_nnc_tensor_free(dest);
2570 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2571 }
2572 // Mark this symbol as init'ed.
2573 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2574 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2575 }
2576 ccv_array_free(to_parameter_indices);
2577 ccv_array_free(from_parameter_indices);
2578 if (id_map)
2579 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2580 if (updated_name)
2581 ccfreefree(updated_name);
2582 // Mark it as incomplete so we will call init_1.
2583 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2584 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2585 else // Remove the flag.
2586 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2587}
2588
2589ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2590{
2591 if (!compiled_data->stream_map)
2592 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2593 int ret = 0;
2594 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2595 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2595, __extension__ __PRETTY_FUNCTION__); }))
;
2596 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2597 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2598 if (ret != 0)
2599 {
2600 stream = ccv_nnc_stream_context_new(type);
2601 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2602 }
2603 return stream;
2604}
2605
2606void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2607{
2608 ccv_array_t* to_parameter_indices;
2609 int to_param_ref;
2610 ccv_array_t* from_parameter_indices;
2611 int from_param_ref;
2612 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2613 // Should be exactly the same tensor.
2614 if (to_param_ref < 0 && from_param_ref < 0)
2615 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2615, __extension__ __PRETTY_FUNCTION__
); }))
; }
2616 // To models.
2617 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2618 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2618, __extension__ __PRETTY_FUNCTION__
); }))
;
2619 // From models.
2620 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2621 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2622 const int to_parameter_size = to_compiled_data->parameters->rnum;
2623 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2624 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2624, __extension__ __PRETTY_FUNCTION__
); }))
;
2625 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2625, __extension__ __PRETTY_FUNCTION__
); }))
;
2626 int i, j;
2627 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2628 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2629 for (i = 0; i < aux_in_size; i++)
2630 inputs[i + 2] = aux_ins[i];
2631 for (i = 0; i < aux_out_size; i++)
2632 outputs[i + 1] = aux_outs[i];
2633 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2634 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2635 for (i = 0; i < rnum; i++)
2636 {
2637 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2638 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2638, __extension__ __PRETTY_FUNCTION__); }))
;
2639 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2639, __extension__ __PRETTY_FUNCTION__
); }))
;
2640 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2641 // If the original is not init'ed. We cannot copy from.
2642 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2643 continue;
2644 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2645 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2645, __extension__ __PRETTY_FUNCTION__); }))
;
2646 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2646, __extension__ __PRETTY_FUNCTION__
); }))
;
2647 if (parallel_count > 1)
2648 {
2649 ccv_nnc_stream_context_t* streams[parallel_count];
2650 ccv_nnc_stream_signal_t* signal;
2651 if (stream_context)
2652 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2653 for (j = 0; j < parallel_count; j++)
2654 {
2655 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2656 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2657 if (!dest || !src)
2658 {
2659 streams[j] = 0;
2660 continue;
2661 }
2662 // At the moment, can only handle them on the same device.
2663 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2663, __extension__ __PRETTY_FUNCTION__
); }))
;
2664 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2664, __extension__ __PRETTY_FUNCTION__
); }))
;
2665 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2666 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2667 int type = stream_type;
2668 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2669 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2670 // Wait signal to finish.
2671 if (stream_context)
2672 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2673 inputs[0] = outputs[0] = dest;
2674 inputs[1] = src;
2675 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2676 if (stream_context)
2677 {
2678 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2679 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2680 }
2681 streams[j] = stream_0;
2682 }
2683 // If this should be blocking, blocking it.
2684 if (!stream_context)
2685 for (j = 0; j < parallel_count; j++)
2686 if (streams[j])
2687 ccv_nnc_stream_context_wait(streams[j]);
2688 } else {
2689 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2690 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2690, __extension__
__PRETTY_FUNCTION__); }))
;
2691 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2692 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2692, __extension__
__PRETTY_FUNCTION__); }))
;
2693 inputs[0] = outputs[0] = dest;
2694 inputs[1] = src;
2695 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2696 }
2697 // Mark this symbol as init'ed.
2698 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2699 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2700 }
2701 ccv_array_free(to_parameter_indices);
2702 ccv_array_free(from_parameter_indices);
2703}
2704
2705void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2706{
2707 int to_param_ref;
2708 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2709 // To models.
2710 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2711 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2711, __extension__ __PRETTY_FUNCTION__
); }))
;
2712 // Tensor has to be inited already.
2713 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2713, __extension__ __PRETTY_FUNCTION__
); }))
;
2714 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2714, __extension__ __PRETTY_FUNCTION__
); }))
;
2715 // From models.
2716 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2717 const int to_parameter_size = to_compiled_data->parameters->rnum;
2718 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2719 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2719, __extension__ __PRETTY_FUNCTION__
); }))
;
2720 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2720, __extension__ __PRETTY_FUNCTION__
); }))
;
2721 int i, j;
2722 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2723 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2724 for (i = 0; i < aux_in_size; i++)
2725 inputs[i + 1] = aux_ins[i];
2726 for (i = 0; i < aux_out_size; i++)
2727 outputs[i + 1] = aux_outs[i];
2728 for (i = 0; i < rnum; i++)
2729 {
2730 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2731 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2731, __extension__ __PRETTY_FUNCTION__); }))
;
2732 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2732, __extension__ __PRETTY_FUNCTION__
); }))
;
2733 if (parallel_count > 1)
2734 {
2735 ccv_nnc_stream_context_t* streams[parallel_count];
2736 ccv_nnc_stream_signal_t* signal;
2737 if (stream_context)
2738 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2739 for (j = 0; j < parallel_count; j++)
2740 {
2741 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2742 if (!dest)
2743 {
2744 streams[j] = 0;
2745 continue;
2746 }
2747 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2748 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2749 int type = stream_type;
2750 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2751 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2752 // Wait signal to finish.
2753 if (stream_context)
2754 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2755 inputs[0] = outputs[0] = dest;
2756 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2757 if (stream_context)
2758 {
2759 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2760 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2761 }
2762 streams[j] = stream_0;
2763 }
2764 // If this should be blocking, blocking it.
2765 if (!stream_context)
2766 for (j = 0; j < parallel_count; j++)
2767 if (streams[j])
2768 ccv_nnc_stream_context_wait(streams[j]);
2769 } else {
2770 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2771 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2771, __extension__
__PRETTY_FUNCTION__); }))
;
2772 inputs[0] = outputs[0] = dest;
2773 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2774 }
2775 // No need to mark this symbol as init'ed, it is already.
2776 }
2777 ccv_array_free(to_parameter_indices);
2778}
2779
2780void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2781{
2782 int to_param_ref;
2783 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2784 // To models.
2785 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2786 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2786, __extension__ __PRETTY_FUNCTION__
); }))
;
2787 // Tensor has to be inited already.
2788 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2788, __extension__ __PRETTY_FUNCTION__
); }))
;
2789 ccv_nnc_tensor_t** tensor_gradients;
2790 if (to_compiled_data->backward.count > 1)
2791 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2792 else
2793 tensor_gradients = to_compiled_data->tensors.gradients;
2794 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2794, __extension__ __PRETTY_FUNCTION__
); }))
;
2795 // From models.
2796 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2797 const int to_parameter_size = to_compiled_data->parameters->rnum;
2798 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2799 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2799, __extension__ __PRETTY_FUNCTION__
); }))
;
2800 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2800, __extension__ __PRETTY_FUNCTION__
); }))
;
2801 int i, j;
2802 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2803 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2804 for (i = 0; i < aux_in_size; i++)
2805 inputs[i + 1] = aux_ins[i];
2806 for (i = 0; i < aux_out_size; i++)
2807 outputs[i + 1] = aux_outs[i];
2808 for (i = 0; i < rnum; i++)
2809 {
2810 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2811 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2811, __extension__ __PRETTY_FUNCTION__); }))
;
2812 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2812, __extension__ __PRETTY_FUNCTION__
); }))
;
2813 if (parallel_count > 1)
2814 {
2815 ccv_nnc_stream_context_t* streams[parallel_count];
2816 ccv_nnc_stream_signal_t* signal;
2817 if (stream_context)
2818 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2819 for (j = 0; j < parallel_count; j++)
2820 {
2821 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2822 if (!dest)
2823 {
2824 streams[j] = 0;
2825 continue;
2826 }
2827 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2828 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2829 int type = stream_type;
2830 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2831 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2832 // Wait signal to finish.
2833 if (stream_context)
2834 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2835 inputs[0] = outputs[0] = dest;
2836 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2837 if (stream_context)
2838 {
2839 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2840 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2841 }
2842 streams[j] = stream_0;
2843 }
2844 // If this should be blocking, blocking it.
2845 if (!stream_context)
2846 for (j = 0; j < parallel_count; j++)
2847 if (streams[j])
2848 ccv_nnc_stream_context_wait(streams[j]);
2849 } else {
2850 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2851 if (!dest)
2852 continue;
2853 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2853, __extension__
__PRETTY_FUNCTION__); }))
;
2854 inputs[0] = outputs[0] = dest;
2855 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2856 }
2857 // No need to mark this symbol as init'ed, it is already.
2858 }
2859 ccv_array_free(to_parameter_indices);
2860}
2861
2862ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2863{
2864 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2865 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2865, __extension__ __PRETTY_FUNCTION__); }))
;
2866 return compiled_data->minimize.minimizer;
2867}
2868
2869void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2870{
2871 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2872 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2872, __extension__ __PRETTY_FUNCTION__); }))
;
2873 const int parameter_size = compiled_data->parameters->rnum;
2874 if (parameter_size == 0)
2875 return;
2876 if (reset)
2877 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2877, __extension__ __PRETTY_FUNCTION__
); }))
; }
2878 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2879 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2880 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2881 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2882 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2883 // We update all parameters, at this point, we have one minimizer.
2884 if (set_parameters == 0 || set_parameter_size == 0)
2885 compiled_data->minimize.minimizer = minimizer;
2886 int i;
2887 if (set_parameters && set_parameter_size)
2888 {
2889 // I need to save what's the minimizer along with this.
2890 if (!compiled_data->minimize.parameters)
2891 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2892 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2893 set_minimizer_for_parameter->minimizer = minimizer;
2894 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2895 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2896 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2897 }
2898 // If reset is true, clear the parameters array.
2899 if (reset && compiled_data->minimize.parameters)
2900 {
2901 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2902 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2903 ccv_array_clear(compiled_data->minimize.parameters);
2904 }
2905 if (!compiled_data->update_nodes)
2906 return;
2907 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2908 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2908, __extension__ __PRETTY_FUNCTION__); }))
;
2909 if (saved_aux_size > old_max_saved_aux_size)
2910 {
2911 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2911, __extension__ __PRETTY_FUNCTION__
); }))
;
2912 // Reallocate first, move them around later.
2913 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2914 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2915 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2916 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2917 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2918 }
2919 int flag = 0;
2920 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2921 if (set_parameters && set_parameter_size)
2922 {
2923 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2924 for (i = 0; i < set_parameter_size; i++)
2925 {
2926 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2927 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2927, __extension__ __PRETTY_FUNCTION__
); }))
;
2928 const int old_rnum = parameter_indices->rnum;
2929 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2930 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2931 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2931, __extension__ __PRETTY_FUNCTION__
); }))
;
2932 if (param_ref >= 0)
2933 {
2934 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2934, __extension__ __PRETTY_FUNCTION__
); }))
;
2935 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2936 parameter_indices->rnum = old_rnum + 1;
2937 }
2938 }
2939 // We may have duplicated indices, but that is OK, we will set it twice.
2940 for (i = 0; i < parameter_indices->rnum; i++)
2941 {
2942 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2943 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2944 flag = 1;
2945 }
2946 ccv_array_free(parameter_indices);
2947 } else {
2948 for (i = 0; i < parameter_size; i++)
2949 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2950 flag = 1;
2951 if (compiled_data->minimize.parameters)
2952 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2953 flag = 1;
2954 }
2955 if (flag)
2956 {
2957 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2958 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2959 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2960 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2961 }
2962}
2963
2964void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2965{
2966 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2967 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2967, __extension__ __PRETTY_FUNCTION__); }))
;
2968 compiled_data->compile_params = compile_params;
2969}
2970
2971void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2972{
2973 if (model->graph && out_size > 0)
2974 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2975 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2976 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2977 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2978 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2979 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2980 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2981}
2982
2983void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2984{
2985 if (model->graph)
2986 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2987}
2988
2989static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2990{
2991 int i;
2992 const int parameter_size = compiled_data->parameters->rnum;
2993 ccv_array_free(compiled_data->parameters);
2994 if (compiled_data->parameter_flags)
2995 ccfreefree(compiled_data->parameter_flags);
2996 const int internal_size = compiled_data->internals->rnum;
2997 ccv_array_free(compiled_data->internals);
2998 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2998, __extension__ __PRETTY_FUNCTION__
); }))
;
2999 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2999, __extension__ __PRETTY_FUNCTION__
); }))
;
3000 for (i = 0; i < parameter_size; i++)
3001 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3002 ccv_array_free(compiled_data->ids.parameters);
3003 for (i = 0; i < internal_size; i++)
3004 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3005 ccv_array_free(compiled_data->ids.internals);
3006 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3007 if (compiled_data->tensors.parameters)
3008 {
3009 for (i = 0; i < parameter_size * parallel_count; i++)
3010 // If it is not marked as not belonging, we can free it.
3011 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3012 if (compiled_data->tensors.parameters[i])
3013 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3014 for (i = 0; i < internal_size * parallel_count; i++)
3015 if (compiled_data->tensors.internals[i])
3016 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3017 ccfreefree(compiled_data->tensors.parameters);
3018 }
3019 if (compiled_data->tensors.gradients)
3020 {
3021 for (i = 0; i < parameter_size * parallel_count; i++)
3022 {
3023 if (compiled_data->tensors.gradients[i])
3024 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3025 if (compiled_data->tensors.accum_gradients[i])
3026 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3027 }
3028 ccfreefree(compiled_data->tensors.gradients);
3029 }
3030 if (compiled_data->minimize.parameters)
3031 {
3032 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3033 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3034 ccv_array_free(compiled_data->minimize.parameters);
3035 }
3036 if (compiled_data->rewindables)
3037 ccv_array_free(compiled_data->rewindables);
3038 if (compiled_data->tensors_init.v)
3039 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3040 if (compiled_data->evaluate.tos)
3041 ccfreefree(compiled_data->evaluate.tos);
3042 compiled_data->evaluate.tos = 0;
3043 if (compiled_data->stream_map)
3044 {
3045 khiter_t k;
3046 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3047 {
3048 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3049 continue;
3050 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3051 ccv_nnc_stream_context_free(stream);
3052 }
3053 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3054 }
3055 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3056 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3057 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3058 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3059 if (compiled_data->gradient_checkpoints)
3060 {
3061 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3062 {
3063 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3064 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3064, __extension__ __PRETTY_FUNCTION__
); }))
;
3065 ccfreefree(checkpoint->inputs);
3066 ccv_array_free(checkpoint->tensor_symbols);
3067 }
3068 ccv_array_free(compiled_data->gradient_checkpoints);
3069 }
3070 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3071 ccfreefree(compiled_data);
3072}
3073
3074void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3075{
3076 ccv_cnnp_model_deinit(model);
3077 if (model->isa->dealloc)
3078 model->isa->dealloc(model);
3079 if (model->io)
3080 {
3081 int i;
3082 for (i = 0; i < model->io->rnum; i++)
3083 {
3084 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3085 if (model_io->outgoings)
3086 ccv_array_free(model_io->outgoings);
3087 if (model_io->incomings)
3088 ccv_array_free(model_io->incomings);
3089 if (model_io->dependencies)
3090 ccv_array_free(model_io->dependencies);
3091 ccfreefree(model_io);
3092 }
3093 ccv_array_free(model->io);
3094 }
3095 if (model->parameter_indices)
3096 ccv_array_free(model->parameter_indices);
3097 if (model->inputs)
3098 ccfreefree(model->inputs);
3099 if (model->graph)
3100 ccv_nnc_symbolic_graph_free(model->graph);
3101 if (model->compiled_data)
3102 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3103 if (model->name)
3104 ccfreefree(model->name);
3105 ccfreefree(model);
3106}
3107
3108void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3109{
3110 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3111 if (!compiled_data)
3112 return;
3113 if (compiled_data->graph)
3114 ccv_nnc_graph_cancel(compiled_data->graph);
3115 if (compiled_data->apply_gradients.graph)
3116 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3117}
3118
3119void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3120{
3121 model->exec_flags = flags;
3122}
3123
3124int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3125{
3126 return model->exec_flags;
3127}