Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2813, column 11
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2026-05-21-174237-2883545-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7#include "_ccv_nnc_symbolic_graph.h"
8#ifdef HAVE_CUDA1
9#include "gpu/ccv_nnc_compat.h"
10#endif
11
12// MARK - Level-5 API
13
14ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
15{
16 if (!model->io)
17 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
18 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
19 model_io->param_ref = 0;
20 model_io->param_sel = 0;
21 model_io->visit = 0;
22 model_io->model = model;
23 model_io->dependencies = 0;
24 model_io->dependents = 0;
25 model_io->outgoings = 0;
26 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
27 ccv_array_push(model->io, &model_io);
28 if (input_size > 0)
29 {
30 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
31 ccv_array_resize(model_io->incomings, input_size);
32 int i;
33 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
34 for (i = 0; i < input_size; i++)
35 {
36 if (!inputs[i]->outgoings)
37 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
38 ccv_array_push(inputs[i]->outgoings, &model_io);
39 }
40 } else {
41 model_io->incomings = 0;
42 }
43 return model_io;
44}
45
46void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
47{
48 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 48, __extension__ __PRETTY_FUNCTION__);
}))
;
49 if (!model_io->dependencies)
50 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
51 int i, j;
52 for (i = 0; i < dependency_size; i++)
53 {
54 int flag = 0;
55 // Check if it is already exist or not.
56 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
57 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
58 flag = 1;
59 if (flag)
60 continue;
61 ccv_array_push(model_io->dependencies, dependencies + i);
62 ++dependencies[i]->dependents;
63 }
64}
65
66int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
67{
68 return model->output_size;
69}
70
71int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
72{
73 // If the model is compiled, it is default to 1 unless it is not.
74 if (model->compiled_data)
75 return model->is_trainable >= 0 ? model->is_trainable : 1;
76 return model->is_trainable;
77}
78
79ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
80{
81 if (!model->io)
82 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
83 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
84 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
85 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
86 model_io->visit = 0;
87 model_io->model = model;
88 model_io->outputs = 0;
89 model_io->dependencies = 0;
90 model_io->dependents = 0;
91 model_io->incomings = 0;
92 model_io->outgoings = 0;
93 ccv_array_push(model->io, &model_io);
94 return model_io;
95}
96
97void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
98{
99 model->notify_hook.func = func;
100 model->notify_hook.context = context;
101}
102
103void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
104{
105 if (model->notify_hook.func)
106 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
107 if (model->isa->notify)
108 model->isa->notify(model, tag, payload);
109}
110
111static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
112{
113 int i, j;
114 for (i = 0; i < graph_exec_symbol_size; i++)
115 {
116 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
117 // Check whether this tensor symbol has any duplicate.
118 for (j = i + 1; j < graph_exec_symbol_size;)
119 {
120 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
121 // If there is a same tensor symbol, remove it.
122 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
123 {
124 if (j + 1 < graph_exec_symbol_size)
125 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
126 --graph_exec_symbol_size;
127 continue;
128 }
129 ++j;
130 }
131 }
132 return graph_exec_symbol_size;
133}
134
135void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
136{
137 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
138 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
139 int i;
140 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
141 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
142 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
143 {
144 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
145 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
146 {
147 // Only add to parameter_indices if it is trainable.
148 if (add_to_array_context->add_parameter_indices)
149 ccv_array_add_unique_int(model->parameter_indices, i);
150 // Found it, return, don't add it.
151 return;
152 }
153 }
154 // Only add to parameter_indices if it is trainable.
155 if (add_to_array_context->add_parameter_indices)
156 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
157 // This is a new one, no need to add_unique_int, it is unique.
158 ccv_array_push(add_to_array_context->symbols, &symbol);
159 if (add_to_array_context->trainables)
160 ccv_array_push(add_to_array_context->trainables, &is_trainable);
161 char id[2048];
162 id[0] = add_to_array_context->prefix;
163 id[1] = '-';
164 int total_len = 2;
165 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
166 {
167 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
168 int len;
169 if (name->name && name->name[0] != '\0')
170 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
171 else
172 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
173 total_len += len;
174 if (total_len >= 2047)
175 break;
176 }
177 if (total_len < 2047)
178 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
179 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 179, __extension__ __PRETTY_FUNCTION__)
; }))
;
180 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
181 memcpy(heap_id, id, total_len + 1);
182 ccv_array_push(add_to_array_context->ids, &heap_id);
183 ++add_to_array_context->sequence->it;
184}
185
186static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
187{
188 compiled_data->f = compiled_data->fits + output_size;
189 compiled_data->xpu_alloc.mp_hdr = -1;
190 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
191 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
192 compiled_data->gradient_checkpoints = gradient_checkpoints;
193}
194
195static int _ccv_cnnp_model_root_parallel_count(const ccv_cnnp_model_t* const model)
196{
197 return ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
198}
199
200static int _ccv_cnnp_model_effective_parallel_count(const ccv_cnnp_model_t* const model)
201{
202 int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
203 if (model->graph && model->graph->data_parallel.count > parallel_count)
204 parallel_count = model->graph->data_parallel.count;
205 return parallel_count;
206}
207
208static int _ccv_cnnp_compiled_data_parallel_count(const ccv_cnnp_model_t* const model, const ccv_cnnp_compiled_data_t* const compiled_data)
209{
210 return compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_effective_parallel_count(model);
211}
212
213ccv_nnc_tensor_symbol_t ccv_cnnp_model_get_symbol(ccv_cnnp_model_t* const self, const ccv_nnc_tensor_symbol_t symbol)
214{
215 assert(self->data)((void) sizeof ((self->data) ? 1 : 0), __extension__ ({ if
(self->data) ; else __assert_fail ("self->data", "ccv_cnnp_model.c"
, 215, __extension__ __PRETTY_FUNCTION__); }))
;
216 ccv_cnnp_model_build_data_t* const build_data = (ccv_cnnp_model_build_data_t*)self->data;
217 if (build_data->parallel_count <= 1 || build_data->parallel_rank == 0)
218 return symbol;
219 const int rank = build_data->parallel_rank;
220 assert(rank > 0)((void) sizeof ((rank > 0) ? 1 : 0), __extension__ ({ if (
rank > 0) ; else __assert_fail ("rank > 0", "ccv_cnnp_model.c"
, 220, __extension__ __PRETTY_FUNCTION__); }))
;
221 assert(rank < build_data->parallel_count)((void) sizeof ((rank < build_data->parallel_count) ? 1
: 0), __extension__ ({ if (rank < build_data->parallel_count
) ; else __assert_fail ("rank < build_data->parallel_count"
, "ccv_cnnp_model.c", 221, __extension__ __PRETTY_FUNCTION__)
; }))
;
222 ccv_nnc_symbolic_graph_t* const graph = (ccv_nnc_symbolic_graph_t*)symbol.graph;
223 ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, symbol, rank);
224 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
225 return copy;
226 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, symbol);
227 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY)
228 CCV_TENSOR_SET_DEVICE_ID(params.type, rank)(params.type) = (((params.type) & ~0xfff00) | (((rank) &
0xfff) << 8))
;
229 copy = ccv_nnc_tensor_symbol_new(graph, params, 0);
230 ccv_nnc_tensor_symbol_set_copy(graph, symbol, rank, copy);
231 return copy;
232}
233
234typedef struct {
235 void* old_graph_exec_symbol_new_hook_context;
236 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
237 ccv_nnc_symbolic_graph_t* graph;
238 ccv_cnnp_model_build_data_t* build_data;
239} ccv_cnnp_model_set_exec_flags_context_t;
240
241static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
242{
243 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
244 if (flags_context->build_data->exec_flags)
245 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
246 if (flags_context->old_graph_exec_symbol_new_hook)
247 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
248}
249
250static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
251{
252 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 252, __extension__ __PRETTY_FUNCTION__); }))
;
253 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
254 int i;
255 for (i = 0; i < input_size; i++)
256 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
257 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
258 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
259 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
260 ccv_cnnp_model_sequence_t model_sequence = {
261 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
262 };
263 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
264 .add_parameter_indices = 1,
265 .prefix = 't',
266 .sequence = &model_sequence,
267 .symbols = parameters,
268 .ids = parameter_ids,
269 .trainables = parameter_trainables,
270 };
271 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
272 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
273 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
274 .add_parameter_indices = 0,
275 .prefix = 'r',
276 .sequence = &model_sequence,
277 .symbols = internals,
278 .ids = internal_ids,
279 .trainables = 0,
280 };
281 ccv_cnnp_model_build_data_t build_data = {
282 .exec_flags = 0,
283 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
284 .parallel_count = 1,
285 .parallel_rank = 0,
286 .model_sequence = &model_sequence,
287 .add_to_array = ccv_cnnp_model_add_to_array,
288 .parameters = parameters,
289 .context = {
290 .add_to_parameter = &add_to_parameter_context,
291 .add_to_output = &add_to_output_context,
292 },
293 .gradient_checkpoints = 0,
294 };
295 model->data = &build_data;
296 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
297 .graph = model->graph,
298 .build_data = &build_data,
299 .old_graph_exec_symbol_new_hook = 0,
300 .old_graph_exec_symbol_new_hook_context = 0
301 };
302 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
303 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
304 // Reset back to previous hook.
305 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
306 for (i = 0; i < model->output_size; i++)
307 {
308 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
309 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
310 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
311 continue;
312 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
313 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
314 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
315 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
316 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
317 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
318 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
319 }
320 model->data = 0;
321 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
322 if (model_sequence.sequences)
323 ccv_array_free(model_sequence.sequences);
324 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
325 int not_trainables = 0;
326 // Assert no parameter is alias.
327 for (i = 0; i < parameters->rnum; i++)
328 {
329 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
330 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
331 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 331, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
332 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
333 not_trainables = 1;
334 }
335 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 335, __extension__ __PRETTY_FUNCTION__)
; }))
;
336 uint64_t* parameter_flags = 0;
337 if (not_trainables)
338 {
339 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
340 for (i = 0; i < parameter_trainables->rnum; i++)
341 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
342 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
343 }
344 ccv_array_free(parameter_trainables);
345 // Assert no internal is alias.
346 for (i = 0; i < internals->rnum; i++)
347 {
348 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
349 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
350 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 350, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
351 }
352 const int output_size = model->output_size;
353 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
354 const int parameters_rnum = parameters->rnum;
355 if (input_size > 0)
356 {
357 ccv_array_resize(parameters, parameters_rnum + input_size);
358 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
359 }
360 ccv_nnc_symbolic_graph_simplify(model->graph,
361 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
362 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
363 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
364 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
365 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
366 model->outputs, output_size,
367 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
368 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
369 // Size it down.
370 parameters->rnum = parameters_rnum;
371 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
372 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
373 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
374 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 374, __extension__ __PRETTY_FUNCTION__)
; }))
;
375 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
376 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
377 compiled_data->loss = loss;
378 if (loss.cmd == CCV_NNC_NOOP)
379 {
380 // If no loss function provided, there is no fits.
381 for (i = 0; i < output_size; i++)
382 {
383 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
384 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
385 if (alias_to.d < 0)
386 compiled_data->f[i] = model->outputs[i];
387 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
388 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
389 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
390 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
391 int j;
392 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
393 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 393, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
394 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
395 }
396 }
397 } else {
398 for (i = 0; i < output_size; i++)
399 {
400 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
401 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
402 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
403 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
404 }
405 }
406 if (loss.cmd != CCV_NNC_NOOP)
407 {
408 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
409 ccv_nnc_symbolic_graph_simplify(model->graph,
410 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
411 0, 0, // No need to provide binds at this point.
412 compiled_data->f, model->output_size,
413 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
414 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
415 }
416 // If inputs are from GPU, stream type is GPU.
417 compiled_data->parameters = parameters;
418 compiled_data->parameter_flags = parameter_flags;
419 compiled_data->internals = internals;
420 compiled_data->ids.parameters = parameter_ids;
421 compiled_data->ids.internals = internal_ids;
422 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
423}
424
425static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
426{
427 ccv_array_t* const stack = (ccv_array_t*)context;
428 ccv_array_push(stack, &symbol.d);
429}
430
431static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
432{
433 const ccv_nnc_tensor_symbol_t src_symbol = {
434 .d = src_index,
435 .graph = src_graph
436 };
437 const ccv_nnc_tensor_symbol_t dest_symbol = {
438 .d = dest_index,
439 .graph = dest_graph
440 };
441 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
442 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
443 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
444 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
445 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
446 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
447}
448
449static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
450{
451 const ccv_nnc_tensor_symbol_t src_symbol = {
452 .d = src_index,
453 .graph = src_graph
454 };
455 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
456 const ccv_nnc_tensor_symbol_t dest_symbol = {
457 .d = dest_index,
458 .graph = dest_graph
459 };
460 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
461 if (src_params.dim[0] == 0 || dest_params.dim[0] == 0)
462 return 1;
463 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
464}
465
466static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
467static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
468
469typedef struct {
470 int parallel_count;
471 ccv_nnc_symbolic_graph_t* graph;
472 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
473} ccv_nnc_graph_exec_update_t;
474
475static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
476{
477 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
478 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
479 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
480 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
481 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
482 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
483 const int parallel_count = graph_exec_update->parallel_count;
484 int i;
485 for (i = 1; i < parallel_count; i++)
486 {
487 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
488 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
489 {
490 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
491 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
492 }
493 }
494}
495
496void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
497{
498 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 498, __extension__ __PRETTY_FUNCTION__); }))
;
499 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 499, __extension__ __PRETTY_FUNCTION__)
; }))
;
500 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 500, __extension__ __PRETTY_FUNCTION__); }))
;
501 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
502 init->graph = ccv_nnc_symbolic_graph_new();
503 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
504 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
505 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
506 init->parallel_count = model->parallel_count;
507 init->memory_compression = model->memory_compression;
508 init->memory_reduction = model->memory_reduction;
509 init->gradient_checkpointing = model->gradient_checkpointing;
510 init->compiled_data->stream_type = model->compiled_data->stream_type;
511 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
512 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
513 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
514 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
515 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
516 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
517 int i, j;
518 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
519 for (i = 0; i < compiled_data->parameters->rnum; i++)
520 {
521 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
522 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 522, __extension__ __PRETTY_FUNCTION__)
; }))
;
523 }
524 for (i = 0; i < compiled_data->internals->rnum; i++)
525 {
526 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
527 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 527, __extension__ __PRETTY_FUNCTION__)
; }))
;
528 }
529 // Update inputs.
530 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 530, __extension__ __PRETTY_FUNCTION__)
; }))
;
531 for (i = 0; i < model->input_size; i++)
532 if (model->inputs[i].d >= 0)
533 {
534 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 534, __extension__ __PRETTY_FUNCTION__)
; }))
;
535 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
536 }
537 // Update outputs.
538 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 538, __extension__ __PRETTY_FUNCTION__)
; }))
;
539 for (i = 0; i < model->output_size; i++)
540 {
541 if (model->outputs[i].d >= 0)
542 {
543 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 543, __extension__
__PRETTY_FUNCTION__); }))
;
544 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
545 }
546 if (model->outputs[i].d != model->compiled_data->f[i].d)
547 {
548 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 548, __extension__ __PRETTY_FUNCTION__)
; }))
;
549 if (model->compiled_data->f[i].d >= 0)
550 {
551 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 551, __extension__ __PRETTY_FUNCTION__)
; }))
;
552 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
553 }
554 }
555 }
556 // Go through the graph to set tensor on matching symbols
557 for (i = 0; i < stack->rnum; i++)
558 {
559 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
560 // If exceed range, skip.
561 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
562 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
563 continue;
564 const ccv_nnc_graph_exec_symbol_t src_symbol = {
565 .d = d,
566 .graph = init->graph
567 };
568 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
569 .d = d,
570 .graph = model->graph
571 };
572 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
573 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
574 // If the name doesn't match, skip.
575 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
576 continue;
577 // Now get all the inputs and outputs, if matches, set them.
578 const int* src_inputs;
579 int src_input_size;
580 const int* src_outputs;
581 int src_output_size;
582 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
583 const int* dest_inputs;
584 int dest_input_size;
585 const int* dest_outputs;
586 int dest_output_size;
587 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
588 // We may have unmatched input / output size because this is the minimizer and it has
589 // different saved_aux (for example, when we shrunk with CMD_NOOP).
590 if (src_input_size != dest_input_size)
591 continue;
592 if (src_output_size != dest_output_size)
593 continue;
594 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
595 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
596 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
597 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
598 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
599 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
600 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
601 // a new exec symbol.
602 for (j = 0; j < src_input_size; j++)
603 if (src_inputs[j] >= 0)
604 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
605 for (j = 0; j < src_output_size; j++)
606 if (src_outputs[j] >= 0)
607 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
608 }
609 ccv_array_free(stack);
610 // After this, we get all tensors in the model graph resolved through tensor_auto.
611 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
612 // Verify symbols we get matches.
613 const int parameter_size = compiled_data->parameters->rnum;
614 for (i = 0; i < parameter_size; i++)
615 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 615, __extension__ __PRETTY_FUNCTION__)
; }))
; }
616 const int internal_size = compiled_data->internals->rnum;
617 for (i = 0; i < internal_size; i++)
618 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 618, __extension__ __PRETTY_FUNCTION__)
; }))
; }
619 // Go through compiled data.
620 if (compiled_data->tensor_arena)
621 {
622 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
623 if (flag == 0 && compiled_data->graph_exec_arena)
624 {
625 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
626 // Since we will reinit, if we previously set is_test, we need to set it again.
627 if (compiled_data->is_test)
628 {
629 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
630 ccv_nnc_graph_exec_update_t update = {
631 .parallel_count = parallel_count,
632 .graph = model->graph,
633 .graph_exec_arena = compiled_data->graph_exec_arena,
634 };
635 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
636 }
637 } else
638 // Free-up tensor arena & graph exec arena.
639 _ccv_cnnp_compiled_data_graph_free(compiled_data);
640 }
641 // There are other compiled graphs, for accum and apply gradients.
642 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
643 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
644 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
645 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
646 // That is why we don't update these compiled graphs at all this point.
647 // Free the model, we've already "absorbed" it.
648 ccv_cnnp_model_free(init);
649}
650
651void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
652{
653 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 653, __extension__ __PRETTY_FUNCTION__)
; }))
;
654 if (model->input_size == 0)
655 model->input_size = input_size;
656 if (!model->graph) // The graph is not compiled yet.
657 {
658 model->graph = ccv_nnc_symbolic_graph_new();
659 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
660 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 660, __extension__ __PRETTY_FUNCTION__)
; }))
;
661 int i, flag = 0;
662 for (i = 0; !flag && i < input_size; i++)
663 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
664 // If inputs are from GPU, stream type is GPU.
665 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
666 model->compiled_data->minimize.minimizer = minimizer;
667 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
668 } else {
669 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
670 // And then absorb the "new model" to the old one.
671 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
672 ccv_cnnp_model_absorb(model, init, inputs, input_size);
673 // Reset minimizer.
674 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
675 }
676}
677
678ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
679{
680 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
681 new_model->is_trainable = is_trainable;
682 return new_model;
683}
684
685void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
686{
687 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 687, __extension__ __PRETTY_FUNCTION__); }))
;
688 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 688, __extension__ __PRETTY_FUNCTION__)
; }))
;
689 ccv_nnc_symbolic_graph_t* const graph = model->graph;
690 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
691 int i;
692 for (i = 0; i < output_size; i++)
693 {
694 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 694, __extension__ __PRETTY_FUNCTION__)
; }))
;
695 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
696 }
697}
698
699void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
700{
701 if (workspace_size == model->workspace_size)
702 return;
703 model->workspace_size = workspace_size;
704 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
705 if (compiled_data && compiled_data->graph)
706 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
707}
708
709size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
710{
711 return model->workspace_size;
712}
713
714void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
715{
716 if (parallel == 0)
717 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
718 else
719 model->parallel_count = parallel;
720 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
721 if (compiled_data)
722 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 722, __extension__ __PRETTY_FUNCTION__)
; }))
; }
723}
724
725void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
726{
727 model->max_stream_count = max_stream_count;
728 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
729 if (compiled_data)
730 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 730, __extension__ __PRETTY_FUNCTION__)
; }))
; }
731}
732
733void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
734{
735 model->memory_compression = memory_compression;
736 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
737 if (compiled_data)
738 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 738, __extension__ __PRETTY_FUNCTION__)
; }))
; }
739}
740
741void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
742{
743 model->memory_reduction = memory_reduction;
744 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
745 if (compiled_data)
746 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 746, __extension__ __PRETTY_FUNCTION__)
; }))
; }
747}
748
749void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
750{
751 model->gradient_checkpointing = gradient_checkpointing;
752}
753
754int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
755{
756 return model->gradient_checkpointing;
757}
758
759typedef struct {
760 int parallel_count;
761 ccv_nnc_symbolic_graph_t* graph;
762 ccv_cnnp_compiled_data_t* compiled_data;
763 ccv_nnc_tensor_arena_t* tensor_arena;
764} ccv_nnc_tensor_init_states_t;
765
766static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
767{
768 int i;
769 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
770 for (i = 0; i < compiled_data->parameters->rnum; i++)
771 {
772 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
773 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
774 return 1;
775 }
776 for (i = 0; i < compiled_data->internals->rnum; i++)
777 {
778 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
779 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
780 return 1;
781 }
782 return 0;
783}
784
785static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
786{
787 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
788 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
789 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
790 if (!output_tensor)
791 return;
792 const int d = output_symbol.d;
793 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 793, __extension__ __PRETTY_FUNCTION__)
; }))
;
794 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
795 if (init_v[d >> 5] & (1u << (d & 0x1f)))
796 return;
797 init_v[d >> 5] |= (1u << (d & 0x1f));
798 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
799 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
800 const int parallel_count = tensor_init_states->parallel_count;
801 int i;
802 for (i = 1; i < parallel_count; i++)
803 {
804 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
805 if (copy)
806 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
807 }
808}
809
810// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
811// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
812static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
813{
814 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 814, __extension__ __PRETTY_FUNCTION__); }))
;
815 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 815, __extension__ __PRETTY_FUNCTION__)
; }))
;
816 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
817 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 817, __extension__
__PRETTY_FUNCTION__); }))
;
818 int i;
819 for (i = 0; i < compiled_data->rewindables->rnum; i++)
820 {
821 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
822 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
823 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
824 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
825 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
826 }
827 ccv_array_clear(compiled_data->rewindables);
828 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
829}
830
831static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
832{
833 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
834 .type = CCV_CNNP_REWIND_TENSOR,
835 .tensor = symbol
836 };
837 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
838 ccv_array_push(rewind_symbols, &rewind_symbol);
839}
840
841static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
842{
843 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
844 .type = CCV_CNNP_REWIND_TENSOR,
845 .tensor = symbol
846 };
847 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
848 ccv_array_push(rewind_symbols, &rewind_symbol);
849}
850
851static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
852{
853 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
854 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
855 .graph_exec = symbol
856 };
857 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
858 ccv_array_push(rewind_symbols, &rewind_symbol);
859}
860
861static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
862{
863 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
864 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
865 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
866 int i;
867 for (i = 1; i < parallel_count; i++)
868 {
869 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
870 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
871 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
872 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
873 }
874}
875
876static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
877{
878 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 878, __extension__ __PRETTY_FUNCTION__); }))
;
879 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 879, __extension__ __PRETTY_FUNCTION__); }))
;
880 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
881 int i;
882 for (i = 1; i < parallel_count; i++)
883 {
884 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
885 if (copy_symbol.graph)
886 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
887 }
888 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
889 if (graph_exec_arena)
890 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
891 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
892 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
893 if (gradient_graph_exec_arena)
894 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
895}
896
897static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
898{
899 int this_parameter_flag = 0;
900 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
901 return this_parameter_flag;
902 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
903 int j, k;
904 // For no-op, we can preserve previous saved_aux_size.
905 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
906 {
907 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
908 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
909 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
910 // make sure some model parameters don't update if we don't want them to.
911 int old_saved_aux_size;
912 if (old_minimizer.cmd == CCV_NNC_NOOP)
913 {
914 int input_size;
915 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
916 if (input_size < 2) // This is not legit.
917 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
918 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
919 old_saved_aux_size = input_size - 2;
920 } else
921 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
922 if (old_saved_aux_size != saved_aux_size)
923 {
924 this_parameter_flag = 1;
925 if (saved_aux_size > old_saved_aux_size)
926 {
927 // Allocate new tensor symbols.
928 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
929 for (j = old_saved_aux_size; j < saved_aux_size; j++)
930 {
931 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
932 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
933 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
934 for (k = 1; k < parallel_count; k++)
935 {
936 ccv_nnc_tensor_param_t dev_info = info;
937 if (k != device_id)
938 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
939 else
940 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
941 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
942 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
943 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
944 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
945 }
946 }
947 } else {
948 for (j = saved_aux_size; j < old_saved_aux_size; j++)
949 {
950 for (k = 1; k < parallel_count; k++)
951 {
952 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
953 if (src_copy.d >= 0)
954 {
955 ccv_nnc_tensor_symbol_free(graph, src_copy);
956 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
957 }
958 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
959 if (dest_copy.d >= 0)
960 {
961 ccv_nnc_tensor_symbol_free(graph, dest_copy);
962 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
963 }
964 }
965 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
966 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
967 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
968 }
969 }
970 }
971 }
972 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
973 if (this_parameter_flag)
974 {
975 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
976 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
977 const int* inputs = 0;
978 int input_size = 0;
979 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
980 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 980, __extension__ __PRETTY_FUNCTION__)
; }))
;
981 update_inputs[0].d = inputs[0];
982 update_inputs[0].graph = graph;
983 update_inputs[1].d = inputs[1];
984 update_inputs[1].graph = graph;
985 update_outputs[0] = updated_parameters[parameter_indice];
986 for (j = 0; j < saved_aux_size; j++)
987 {
988 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
989 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
990 }
991 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
992 for (k = 1; k < parallel_count; k++)
993 {
994 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
995 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 995, __extension__ __PRETTY_FUNCTION__); }))
;
996 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
997 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__)
; }))
;
998 update_inputs[0].d = inputs[0];
999 update_inputs[0].graph = graph;
1000 update_inputs[1].d = inputs[1];
1001 update_inputs[1].graph = graph;
1002 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
1003 for (j = 0; j < saved_aux_size; j++)
1004 {
1005 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
1006 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
1007 }
1008 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
1009 }
1010 }
1011 return this_parameter_flag;
1012}
1013
1014typedef struct {
1015 int parameter_size;
1016 ccv_nnc_cmd_t minimizer;
1017 ccv_cnnp_model_io_t parameters[1];
1018} ccv_cnnp_set_minimizer_for_parameter_t;
1019
1020static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
1021{
1022 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1023 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1023, __extension__ __PRETTY_FUNCTION__); }))
;
1024 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1025 // We update all parameters, at this point, we have one minimizer.
1026 const int parameter_size = compiled_data->parameters->rnum;
1027 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
1028 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
1029 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 1029, __extension__ __PRETTY_FUNCTION__); }))
;
1030 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1031 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1031, __extension__ __PRETTY_FUNCTION__
); }))
;
1032 ccv_array_t* const parameters = compiled_data->minimize.parameters;
1033 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1034 int i, j, flag = 0;
1035 for (i = 0; i < parameters->rnum; i++)
1036 {
1037 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
1038 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
1039 {
1040 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
1041 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 1041, __extension__ __PRETTY_FUNCTION__
); }))
;
1042 const int old_rnum = parameter_indices->rnum;
1043 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
1044 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
1045 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 1045, __extension__ __PRETTY_FUNCTION__
); }))
;
1046 if (param_ref >= 0)
1047 {
1048 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1048, __extension__ __PRETTY_FUNCTION__
); }))
;
1049 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
1050 parameter_indices->rnum = old_rnum + 1;
1051 }
1052 }
1053 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1054 // We may have duplicated indices, but that is OK, we will set it twice.
1055 for (j = 0; j < parameter_indices->rnum; j++)
1056 {
1057 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1058 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1058, __extension__ __PRETTY_FUNCTION__
); }))
;
1059 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1060 flag = 1;
1061 }
1062 ccv_array_clear(parameter_indices);
1063 }
1064 ccv_array_free(parameter_indices);
1065 return flag;
1066}
1067
1068static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1069{
1070 if (new_saved_aux_size == old_saved_aux_size)
1071 return;
1072 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1072, __extension__ __PRETTY_FUNCTION__
); }))
;
1073 int i, j;
1074 for (i = parameter_size - 1; i >= 0; i--)
1075 {
1076 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1077 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1078 for (j = old_saved_aux_size - 1; j >= 0; j--)
1079 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1080 }
1081}
1082
1083static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1084{
1085 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1086 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1086, __extension__ __PRETTY_FUNCTION__); }))
;
1087 if (!compiled_data->rewindables)
1088 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1089 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1090 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1091 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1092}
1093
1094static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1095{
1096 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1097 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1097, __extension__ __PRETTY_FUNCTION__
); }))
;
1098 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1098, __extension__ __PRETTY_FUNCTION__
); }))
;
1099 const int evaluate_to_size = compiled_data->evaluate.to_size;
1100 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1100, __extension__ __PRETTY_FUNCTION__
); }))
;
1101 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1102 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1102, __extension__ __PRETTY_FUNCTION__
); }))
;
1103 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1104 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1105 int i, j;
1106 const int output_size = model->output_size;
1107 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1107, __extension__ __PRETTY_FUNCTION__
); }))
;
1108 if (fits)
1109 for (i = 0; i < output_size; i++)
1110 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1111 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1112 const int parameter_size = compiled_data->parameters->rnum;
1113 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1114 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1115 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1116 int parameter_size_maybe_more = parameter_size;
1117 compiled_data->disable_outgrad = disable_outgrad;
1118 int outgrad_size;
1119 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1120 outgrad_size = 0;
1121 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1122 outgrad_size = model->input_size;
1123 else {
1124 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1124, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1125 outgrad_size = 0;
1126 for (i = 0; i < model->input_size; i++)
1127 if (!(disable_outgrad & ((uint64_t)1 << i)))
1128 ++outgrad_size;
1129 }
1130 compiled_data->outgrad_size = outgrad_size;
1131 parameter_size_maybe_more += outgrad_size;
1132 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1133 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1134 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1135 compiled_data->backward.to_size = parameter_size_maybe_more;
1136 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1137 if (compiled_data->parameter_flags)
1138 {
1139 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1140 for (i = 0; i < parameter_size; i++)
1141 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1142 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1143 else
1144 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1145 }
1146 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1147 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1148 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1149 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1150 else { // Compute minimize with gradients including selected inputs.
1151 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1151, __extension__ __PRETTY_FUNCTION__
); }))
;
1152 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1152, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1153 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1153, __extension__ __PRETTY_FUNCTION__
); }))
;
1154 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1155 j = 0;
1156 for (i = 0; i < model->input_size; i++)
1157 if (!(disable_outgrad & ((uint64_t)1 << i)))
1158 outgrads[j++] = model->inputs[i];
1159 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1160 }
1161 if (compiled_data->parameter_flags)
1162 ccfreefree(parameters);
1163 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1164 if (compiled_data->minimize.parameters)
1165 _ccv_cnnp_apply_parameters_with_minimizer(model);
1166 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1167 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1168 for (i = 0; i < output_size; i++)
1169 {
1170 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1171 // Init this to 1 so we can backprop.
1172 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1173 }
1174 compiled_data->backward.to_size = 0;
1175 for (i = 0; i < parameter_size_maybe_more; i++)
1176 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1177 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1178 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1179 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1180 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1181 {
1182 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1183 continue;
1184 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1185 const int* tos;
1186 int to_size;
1187 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1188 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1189 {
1190 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1191 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1192 int flag = 0;
1193 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1194 for (j = i - 1; !flag && j >= 0; j--)
1195 if (j + outgrad_destination_start < destination_count)
1196 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1197 if (!flag) // Only if we cannot find it, we add it.
1198 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1199 }
1200 }
1201 if (parallel_count > 1)
1202 {
1203 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1204 0, 0,
1205 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1206 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1207 0, 0, 0,
1208 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1209 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1210 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1211 for (i = 0; i < evaluate_to_size; i++)
1212 for (j = 1; j < parallel_count; j++)
1213 {
1214 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1215 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1216 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1217 }
1218 const int backward_to_size = compiled_data->backward.to_size;
1219 for (i = 0; i < backward_to_size; i++)
1220 for (j = 1; j < parallel_count; j++)
1221 {
1222 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1223 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1224 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1225 }
1226 }
1227 // Only use memory compression if we are in gradient parameter mode.
1228 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1229 {
1230 if (model->memory_compression)
1231 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1232 if (model->memory_reduction)
1233 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1234 }
1235 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1236 compiled_data->gradient_mode = gradient_mode;
1237}
1238
1239void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1240{
1241 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1241, __extension__ __PRETTY_FUNCTION__
); }))
;
1242 const int parameter_size = compiled_data->parameters->rnum;
1243 const int parallel_count = _ccv_cnnp_model_effective_parallel_count(model);
1244 compiled_data->parallel_count = parallel_count;
1245 const int internal_size = compiled_data->internals->rnum;
1246 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1247 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1248 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1249 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1250}
1251
1252int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1253{
1254 int i, j;
1255 const int parameter_size = compiled_data->parameters->rnum;
1256 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1257 const int internal_size = compiled_data->internals->rnum;
1258 for (i = 0; i < parameter_size; i++)
1259 {
1260 // parameters has to be allocated all together.
1261 if (compiled_data->tensors.parameters[i])
1262 {
1263 for (j = 1; j < parallel_count; j++)
1264 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1264, __extension__ __PRETTY_FUNCTION__
); }))
; }
1265 continue;
1266 }
1267 return 1;
1268 }
1269 for (i = 0; i < internal_size; i++)
1270 {
1271 if (!compiled_data->tensors.internals[i])
1272 return 1;
1273 for (j = 1; j < parallel_count; j++)
1274 if (!compiled_data->tensors.internals[i + j * internal_size])
1275 return 1;
1276 }
1277 return 0;
1278}
1279
1280void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1281{
1282 int i, j;
1283 const int parameter_size = compiled_data->parameters->rnum;
1284 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1285 compiled_data->parallel_count = parallel_count;
1286 const int internal_size = compiled_data->internals->rnum;
1287 for (i = 0; i < parameter_size; i++)
1288 {
1289 // parameters has to be allocated all together.
1290 if (compiled_data->tensors.parameters[i])
1291 {
1292 for (j = 1; j < parallel_count; j++)
1293 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1293, __extension__ __PRETTY_FUNCTION__
); }))
; }
1294 continue;
1295 }
1296 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1297 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1298 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1299 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1300 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1301 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1302 for (j = 1; j < parallel_count; j++)
1303 {
1304 if (j != device_id)
1305 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1306 else
1307 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1308 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1309 }
1310 }
1311 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1312 for (i = 0; i < internal_size; i++)
1313 {
1314 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1315 const int d = retained.d;
1316 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1317 continue;
1318 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1319 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1320 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1321 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1322 if (!compiled_data->tensors.internals[i])
1323 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1324 for (j = 1; j < parallel_count; j++)
1325 {
1326 if (j != device_id)
1327 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1328 else
1329 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1330 if (!compiled_data->tensors.internals[i + j * internal_size])
1331 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1332 }
1333 }
1334 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1335}
1336
1337static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1338{
1339 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1340 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1341}
1342
1343static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1344{
1345 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1345, __extension__ __PRETTY_FUNCTION__
); }))
;
1346 int i, j;
1347 for (i = 0; i < tensor_size; i++)
1348 {
1349 if (!tensors[i])
1350 continue;
1351 const int d = tensor_symbols[i].d;
1352 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1353 continue;
1354 for (j = 1; j < parallel_count; j++)
1355 if (tensors[i + j * tensor_size])
1356 {
1357 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1358 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1359 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1360 }
1361 }
1362}
1363
1364static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1365{
1366 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1366, __extension__ __PRETTY_FUNCTION__
); }))
;
1367 int i, j;
1368 for (i = 0; i < tensor_size; i++)
1369 {
1370 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1371 for (j = 1; j < parallel_count; j++)
1372 {
1373 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1374 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1375 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1376 { // We shouldn't allocate this, free it up.
1377 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1378 tensors[i + j * tensor_size] = 0;
1379 }
1380 }
1381 }
1382}
1383
1384static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1385{
1386 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1386, __extension__ __PRETTY_FUNCTION__
); }))
;
1387 int i, j;
1388 for (i = 0; i < tensor_size; i++)
1389 {
1390 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1391 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1392 continue;
1393 if (graph)
1394 {
1395 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1396 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1397 tensor_symbol = alias_to;
1398 }
1399 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1400 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1401 {
1402 const ccv_nnc_tensor_bind_t retained_bind = {
1403 .symbol = tensor_symbol,
1404 .tensor = tensor
1405 };
1406 ccv_array_push(tensor_binds, &retained_bind);
1407 }
1408 for (j = 1; j < parallel_count; j++)
1409 {
1410 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1411 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1412 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1413 {
1414 const ccv_nnc_tensor_bind_t bind = {
1415 .symbol = copy,
1416 .tensor = tensors[i + j * tensor_size]
1417 };
1418 ccv_array_push(tensor_binds, &bind);
1419 }
1420 }
1421 }
1422}
1423
1424static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1425{
1426 if (compiled_data->graph)
1427 ccv_nnc_graph_free(compiled_data->graph);
1428 compiled_data->graph = 0;
1429 compiled_data->is_test = 0;
1430 if (compiled_data->tensor_arena)
1431 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1432 compiled_data->tensor_arena = 0;
1433 if (compiled_data->graph_exec_arena)
1434 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1435 compiled_data->graph_exec_arena = 0;
1436 if (compiled_data->backward.from_ops)
1437 ccfreefree(compiled_data->backward.from_ops);
1438 compiled_data->backward.from_ops = 0;
1439 if (compiled_data->evaluate.schedule)
1440 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1441 compiled_data->evaluate.schedule = 0;
1442 if (compiled_data->backward.schedule)
1443 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1444 compiled_data->backward.schedule = 0;
1445}
1446
1447static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1448{
1449 if (compiled_data->gradients)
1450 ccfreefree(compiled_data->gradients);
1451 compiled_data->gradients = 0;
1452 if (compiled_data->updated_parameters)
1453 ccfreefree(compiled_data->updated_parameters);
1454 compiled_data->updated_parameters = 0;
1455 compiled_data->update_nodes = 0;
1456 compiled_data->saved_aux = 0;
1457}
1458
1459static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1460{
1461 if (compiled_data->backward.gradients)
1462 ccfreefree(compiled_data->backward.gradients);
1463 compiled_data->backward.gradients = 0;
1464 if (compiled_data->backward.accum)
1465 ccv_nnc_graph_free(compiled_data->backward.accum);
1466 compiled_data->backward.accum = 0;
1467 if (compiled_data->backward.tensor_arena)
1468 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1469 compiled_data->backward.tensor_arena = 0;
1470 if (compiled_data->backward.graph_exec_arena)
1471 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1472 compiled_data->backward.graph_exec_arena = 0;
1473}
1474
1475static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1476{
1477 if (compiled_data->apply_gradients.graph)
1478 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1479 compiled_data->apply_gradients.graph = 0;
1480 if (compiled_data->apply_gradients.tensor_arena)
1481 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1482 compiled_data->apply_gradients.tensor_arena = 0;
1483 if (compiled_data->apply_gradients.graph_exec_arena)
1484 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1485 compiled_data->apply_gradients.graph_exec_arena = 0;
1486}
1487
1488// Compile the graph to run ccv_cnnp_model_fit
1489static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1490{
1491 int i, j;
1492 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1493 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1493, __extension__ __PRETTY_FUNCTION__
); }))
;
1494 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1495 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1496 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1496, __extension__ __PRETTY_FUNCTION__
); }))
;
1497 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1497
, __extension__ __PRETTY_FUNCTION__); }))
;
1498 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1498, __extension__ __PRETTY_FUNCTION__
); }))
;
1499 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1500 {
1501 _ccv_cnnp_model_set_rewindables(model);
1502 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1503 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1504 _ccv_cnnp_model_rewind_graph(model);
1505 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1506 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1507 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1508 }
1509 const int tensors_init = !!compiled_data->tensors_init.v;
1510 if (!tensors_init)
1511 _ccv_cnnp_model_tensors_init(model, compiled_data);
1512 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1513 // Check if it is not fully allocated, if it is not, init_1.
1514 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1515 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1516 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1516, __extension__ __PRETTY_FUNCTION__); }))
;
1517 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1517, __extension__ __PRETTY_FUNCTION__); }))
;
1518 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1518
, __extension__ __PRETTY_FUNCTION__); }))
;
1519 const int input_size_per_p = input_size / parallel_count;
1520 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1521 const int output_size_per_p = output_size / parallel_count;
1522 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1523 const int fit_size_per_p = fit_size / parallel_count;
1524 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1525 const int parameter_size = compiled_data->parameters->rnum;
1526 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1527 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1528 const int internal_size = compiled_data->internals->rnum;
1529 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1530 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1531 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1532 ccv_array_free(tensor_binds);
1533 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1534 if (tensors_init && parallel_count > 1)
1535 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1536 // If tensor is not init'ed, we need to init states first.
1537 if (_ccv_cnnp_any_to_init(compiled_data))
1538 {
1539 ccv_nnc_tensor_init_states_t tensor_init_states = {
1540 .parallel_count = parallel_count,
1541 .graph = model->graph,
1542 .compiled_data = compiled_data,
1543 .tensor_arena = compiled_data->tensor_arena
1544 };
1545 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1546 }
1547 compiled_data->is_test = 0;
1548 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1549 // No need to set because it is default to training mode.
1550 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1551 for (i = 0; i < saved_aux_size * parameter_size; i++)
1552 {
1553 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1554 continue;
1555 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1556 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1557 for (j = 1; j < parallel_count; j++)
1558 {
1559 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1560 if (copy)
1561 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1562 }
1563 }
1564 const int evaluate_to_size = compiled_data->evaluate.to_size;
1565 compiled_data->evaluate.to_op_size = 0;
1566 for (i = 0; i < evaluate_to_size; i++)
1567 {
1568 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1569 if (to.graph)
1570 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1571 }
1572 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1573 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1574}
1575
1576ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1577{
1578 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1579 if (!compiled_data || !compiled_data->graph)
1580 return 0;
1581 return ccv_nnc_graph_default_stream(compiled_data->graph);
1582}
1583
1584uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1585{
1586 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1587 if (!compiled_data || !compiled_data->tensor_arena)
1588 return 0;
1589 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1590}
1591
1592static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1593{
1594 int i, j;
1595 for (i = 0; i < tensor_size; i++)
1596 {
1597 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1598 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1599 continue;
1600 if (graph)
1601 {
1602 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1603 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1604 tensor_symbol = alias_to;
1605 }
1606 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1607 for (j = 1; j < parallel_count; j++)
1608 {
1609 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1610 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1611 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1612 }
1613 }
1614}
1615
1616void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1617{
1618 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1619 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1619, __extension__ __PRETTY_FUNCTION__); }))
;
1620 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1621 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1621, __extension__ __PRETTY_FUNCTION__
); }))
;
1622 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1622, __extension__ __PRETTY_FUNCTION__
); }))
;
1623 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1623
, __extension__ __PRETTY_FUNCTION__); }))
;
1624 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1624, __extension__ __PRETTY_FUNCTION__); }))
;
1625 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1626 {
1627 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1628 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1629 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1630 // Compile the symbolic graph down only when needed.
1631 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1632 } else {
1633 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1633, __extension__ __PRETTY_FUNCTION__); }))
;
1634 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1634, __extension__ __PRETTY_FUNCTION__); }))
;
1635 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1635
, __extension__ __PRETTY_FUNCTION__); }))
;
1636 const int input_size_per_p = input_size / parallel_count;
1637 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1638 const int output_size_per_p = output_size / parallel_count;
1639 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1640 const int fit_size_per_p = fit_size / parallel_count;
1641 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1642 }
1643 if (compiled_data->is_test)
1644 {
1645 compiled_data->is_test = 0;
1646 ccv_nnc_graph_exec_update_t update = {
1647 .parallel_count = parallel_count,
1648 .graph = model->graph,
1649 .graph_exec_arena = compiled_data->graph_exec_arena,
1650 };
1651 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1652 }
1653 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1654}
1655
1656// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1657static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1658{
1659 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1660 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1661 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1662 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1662, __extension__ __PRETTY_FUNCTION__
); }))
;
1663 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1663, __extension__ __PRETTY_FUNCTION__
); }))
;
1664 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1665 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1666 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1667 {
1668 const int evaluate_to_size = compiled_data->evaluate.to_size;
1669 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1670 _ccv_cnnp_model_set_rewindables(model);
1671 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1672 0, 0,
1673 0, 0, 0,
1674 0, 0, 0,
1675 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1676 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1677 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1678 int i, j;
1679 for (i = 0; i < evaluate_to_size; i++)
1680 for (j = 1; j < parallel_count; j++)
1681 {
1682 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1683 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1684 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1685 }
1686 }
1687 const int tensors_init = !!compiled_data->tensors_init.v;
1688 if (!tensors_init)
1689 _ccv_cnnp_model_tensors_init(model, compiled_data);
1690 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1691 // Check if it is not fully allocated, if it is not, init_1.
1692 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1693 const int tensor_parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1694 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1695 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1695, __extension__ __PRETTY_FUNCTION__); }))
;
1696 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1696, __extension__ __PRETTY_FUNCTION__); }))
;
1697 const int input_size_per_p = input_size / parallel_count;
1698 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1699 const int output_size_per_p = output_size / parallel_count;
1700 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1701 const int parameter_size = compiled_data->parameters->rnum;
1702 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, tensor_parallel_count, tensor_binds);
1703 const int internal_size = compiled_data->internals->rnum;
1704 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count);
1705 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count, tensor_binds);
1706 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1707 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1708 ccv_array_free(tensor_binds);
1709 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1710 // If tensor is not init'ed, we need to init states first.
1711 if (tensors_init && tensor_parallel_count > 1)
1712 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, tensor_parallel_count);
1713 if (_ccv_cnnp_any_to_init(compiled_data))
1714 {
1715 ccv_nnc_tensor_init_states_t tensor_init_states = {
1716 .parallel_count = tensor_parallel_count,
1717 .graph = model->graph,
1718 .compiled_data = compiled_data,
1719 .tensor_arena = compiled_data->tensor_arena
1720 };
1721 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1722 }
1723 compiled_data->is_test = 1;
1724 ccv_nnc_graph_exec_update_t update = {
1725 .parallel_count = parallel_count,
1726 .graph = model->graph,
1727 .graph_exec_arena = compiled_data->graph_exec_arena,
1728 };
1729 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1730 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1731 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1732}
1733
1734static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1735{
1736 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1736, __extension__ __PRETTY_FUNCTION__
); }))
;
1737 const int parameter_size = compiled_data->parameters->rnum;
1738 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1739 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1740 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1741 int i, j;
1742 for (i = 0; i < parameter_size; i++)
1743 {
1744 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1745 {
1746 compiled_data->tensors.gradients[i] = 0;
1747 compiled_data->tensors.accum_gradients[i] = 0;
1748 for (j = 1; j < parallel_count; j++)
1749 {
1750 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1751 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1752 }
1753 continue;
1754 }
1755 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1756 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1757 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1758 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1759 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1760 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1761 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1762 for (j = 1; j < parallel_count; j++)
1763 {
1764 if (j != device_id)
1765 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1766 else
1767 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1768 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1769 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1770 }
1771 }
1772}
1773
1774static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1775{
1776 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1777 return 1;
1778 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1779 return 0;
1780 int i;
1781 for (i = 0; i < input_size; i++)
1782 if (!(disable_outgrad & ((uint64_t)1 << i)))
1783 return 0;
1784 return 1;
1785}
1786
1787// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1788// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1789static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1790{
1791 int i, j;
1792 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1793 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1794 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1794, __extension__ __PRETTY_FUNCTION__
); }))
;
1795 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1796 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1797 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1797, __extension__ __PRETTY_FUNCTION__
); }))
;
1798 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1798, __extension__ __PRETTY_FUNCTION__
); }))
;
1799 // There shouldn't be a loss function if we evaluate with multistage jit.
1800 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1800, __extension__ __PRETTY_FUNCTION__
); }))
;
1801 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1802 {
1803 _ccv_cnnp_model_set_rewindables(model);
1804 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1805 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1806 _ccv_cnnp_model_rewind_graph(model);
1807 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1808 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1809 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1810 }
1811 const int tensors_init = !!compiled_data->tensors_init.v;
1812 if (!tensors_init)
1813 _ccv_cnnp_model_tensors_init(model, compiled_data);
1814 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1815 // Check if it is not fully allocated, if it is not, init_1.
1816 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1817 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1818 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1818, __extension__ __PRETTY_FUNCTION__); }))
;
1819 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1819, __extension__ __PRETTY_FUNCTION__); }))
;
1820 const int input_size_per_p = input_size / parallel_count;
1821 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1822 const int output_size_per_p = output_size / parallel_count;
1823 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1824 const int parameter_size = compiled_data->parameters->rnum;
1825 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1826 const int internal_size = compiled_data->internals->rnum;
1827 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1828 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1829 if (!compiled_data->tensors.gradients)
1830 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1831 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1832 if (compiled_data->backward.to_size > 0)
1833 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1834 else
1835 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1836 ccv_array_free(tensor_binds);
1837 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1838 if (tensors_init && parallel_count > 1)
1839 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1840 // If tensor is not init'ed, we need to init states first.
1841 if (_ccv_cnnp_any_to_init(compiled_data))
1842 {
1843 ccv_nnc_tensor_init_states_t tensor_init_states = {
1844 .parallel_count = parallel_count,
1845 .graph = model->graph,
1846 .compiled_data = compiled_data,
1847 .tensor_arena = compiled_data->tensor_arena
1848 };
1849 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1850 }
1851 compiled_data->is_test = is_test;
1852 ccv_nnc_graph_exec_update_t update = {
1853 .parallel_count = parallel_count,
1854 .graph = model->graph,
1855 .graph_exec_arena = compiled_data->graph_exec_arena,
1856 };
1857 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1858 const int evaluate_to_size = compiled_data->evaluate.to_size;
1859 compiled_data->evaluate.to_op_size = 0;
1860 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1861 for (i = 0; i < evaluate_to_size; i++)
1862 {
1863 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1864 if (to_op.graph)
1865 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1866 const int* tos;
1867 int to_size;
1868 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1869 for (j = 0; j < to_size; j++)
1870 {
1871 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1872 .d = tos[j],
1873 .graph = model->graph
1874 });
1875 if (to_op.graph)
1876 ccv_array_add_unique_int(backward_from, to_op.d);
1877 }
1878 }
1879 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1879, __extension__
__PRETTY_FUNCTION__); }))
;
1880 compiled_data->backward.from_op_size = backward_from->rnum;
1881 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1882 for (i = 0; i < backward_from->rnum; i++)
1883 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1884 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1885 .graph = compiled_data->graph,
1886 };
1887 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1888 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1889 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1890 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1891 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1892 const int source_size = compiled_data->graph->sources->rnum;
1893 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1893, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1894 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1895 visited[(idx >> 5)] |= (1u << (idx & 31));
1896 } ccv_nnc_graph_visit_endfor} }
1897 ccv_nnc_graph_visit_free(visit);
1898 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1899 const int destination_size = compiled_data->graph->destinations->rnum;
1900 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1900, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1901 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1902 visited[(idx >> 5)] |= (1u << (idx & 31));
1903 } ccv_nnc_graph_visit_endfor} }
1904 ccv_nnc_graph_visit_free(visit);
1905 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1906 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1907 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1908 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1909 {
1910 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1910, __extension__ __PRETTY_FUNCTION__
); }))
;
1911 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1912 ccv_array_add_unique_int(backward_from, idx);
1913 }
1914 } ccv_nnc_graph_visit_endfor} }
1915 ccv_nnc_graph_visit_free(visit);
1916 ccfreefree(visited);
1917 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1918 {
1919 compiled_data->backward.from_op_size = backward_from->rnum;
1920 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1921 for (i = 0; i < backward_from->rnum; i++)
1922 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1923 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1924 .graph = compiled_data->graph,
1925 };
1926 }
1927 ccv_array_free(backward_from);
1928 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1929 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1930}
1931
1932void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1933{
1934 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1935 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1935, __extension__ __PRETTY_FUNCTION__); }))
;
1936 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1937 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1937, __extension__ __PRETTY_FUNCTION__
); }))
;
1938 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1938, __extension__ __PRETTY_FUNCTION__
); }))
;
1939 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1939, __extension__ __PRETTY_FUNCTION__); }))
;
1940 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1941 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1942 if (!compiled_data->graph || mode_mismatch)
1943 {
1944 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1945 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1946 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1947 if (params.requires_grad)
1948 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1949 else
1950 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1951 } else {
1952 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1953 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1953, __extension__ __PRETTY_FUNCTION__); }))
;
1954 const int input_size_per_p = input_size / parallel_count;
1955 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1956 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1956, __extension__ __PRETTY_FUNCTION__); }))
;
1957 const int output_size_per_p = output_size / parallel_count;
1958 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1959 }
1960 if (compiled_data->is_test != params.is_test)
1961 {
1962 compiled_data->is_test = params.is_test;
1963 ccv_nnc_graph_exec_update_t update = {
1964 .parallel_count = parallel_count,
1965 .graph = model->graph,
1966 .graph_exec_arena = compiled_data->graph_exec_arena,
1967 };
1968 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1969 }
1970}
1971
1972void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1973{
1974 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1975 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1975, __extension__ __PRETTY_FUNCTION__); }))
;
1976 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1977 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1978 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1979 else {
1980 if (!compiled_data->evaluate.schedule)
1981 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1982 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1983 }
1984}
1985
1986// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1987// Particularly, this method compiles the accumulator graph.
1988static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1989{
1990 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1991 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1991, __extension__ __PRETTY_FUNCTION__); }))
;
1992 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1992, __extension__ __PRETTY_FUNCTION__
); }))
;
1993 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1994 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1995 const int parameter_size = compiled_data->parameters->rnum;
1996 int i, j;
1997 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1998 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1999 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
2000 for (i = 0; i < parameter_size; i++)
2001 for (j = 0; j < parallel_count; j++)
2002 if (compiled_data->tensors.gradients[i + j * parameter_size])
2003 {
2004 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
2005 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
2006 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
2007 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2008 ccv_nnc_tensor_symbol_t inputs[2];
2009 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2010 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2011 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2012 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
2013 } else {
2014 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2015 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2016 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2017 }
2018 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
2019 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
2020 {
2021 ccv_nnc_symbolic_graph_free(accum);
2022 // Create empty graph.
2023 compiled_data->backward.accum = ccv_nnc_graph_new();
2024 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
2025 return;
2026 }
2027 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2028 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2029 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
2030 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2031 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
2032 ccv_nnc_symbolic_graph_free(accum);
2033 ccv_array_free(tensor_binds);
2034 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
2035}
2036
2037void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
2038{
2039 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2040 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2040, __extension__ __PRETTY_FUNCTION__); }))
;
2041 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2041, __extension__ __PRETTY_FUNCTION__
); }))
;
2042 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2043 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 2043, __extension__ __PRETTY_FUNCTION__
); }))
;
2044 if (outgrad_size > 0)
2045 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 2045, __extension__ __PRETTY_FUNCTION__
); }))
; }
2046 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2046, __extension__ __PRETTY_FUNCTION__); }))
;
2047 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2047, __extension__ __PRETTY_FUNCTION__
); }))
;
2048 const int parameter_size = compiled_data->parameters->rnum;
2049 // If we need to accumulate the gradients now, do jit on accumulator.
2050 if (compiled_data->backward.count > 0)
2051 {
2052 if (!compiled_data->backward.accum)
2053 _ccv_cnnp_model_multistage_jit_1(model);
2054 else if (compiled_data->backward.count == 1) {
2055 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2056 int i;
2057 for (i = 0; i < parameter_size * parallel_count; i++)
2058 {
2059 ccv_nnc_tensor_t* tensor;
2060 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2061 }
2062 if (compiled_data->backward.tensor_arena)
2063 {
2064 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2065 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2066 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2067 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2068 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2069 }
2070 }
2071 }
2072 const int ingrad_size_per_p = model->output_size;
2073 const int outgrad_size_per_p = compiled_data->outgrad_size;
2074 int i, j;
2075 for (i = 0; i < ingrad_size_per_p; i++)
2076 {
2077 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2078 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2079 {
2080 // Set it to 1 if it is not specified.
2081 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2082 if (ingrad_tensor)
2083 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2084 for (j = 1; j < parallel_count; j++)
2085 {
2086 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2087 if (ingrad_tensor)
2088 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2089 }
2090 } else {
2091 // Make sure the length matches, in case it is an alias.
2092 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2092, __extension__ __PRETTY_FUNCTION__
); }))
;
2093 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2094 for (j = 1; j < parallel_count; j++)
2095 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2096 }
2097 }
2098 if (outgrad_size > 0)
2099 {
2100 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2100, __extension__ __PRETTY_FUNCTION__
); }))
;
2101 for (i = 0; i < outgrad_size_per_p; i++)
2102 if (outgrads[i])
2103 {
2104 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2105 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2106 for (j = 1; j < parallel_count; j++)
2107 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2108 }
2109 } else {
2110 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2111, __extension__ __PRETTY_FUNCTION__
); }))
2111 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2111, __extension__ __PRETTY_FUNCTION__
); }))
;
2112 }
2113 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2114 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2115 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2116 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2117 if (!compiled_data->backward.schedule)
2118 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2119 // Run the backward pass.
2120 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2121 // If we need to run accumulation round, do that now.
2122 if (compiled_data->backward.count > 0)
2123 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2124 // Update the count, this determines whether we need to accumulate or not.
2125 ++compiled_data->backward.count;
2126}
2127
2128// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2129// Particularly, this method compiles the parameter update graph.
2130static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2131{
2132 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2133 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2133, __extension__ __PRETTY_FUNCTION__
); }))
;
2134 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2135 const int parameter_size = compiled_data->parameters->rnum;
2136 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2137 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2138 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2139 // Bind accumulated gradients.
2140 if (compiled_data->backward.count > 1)
2141 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2142 else
2143 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2144 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2145 int i, j;
2146 for (i = 0; i < compiled_data->backward.to_size; i++)
2147 {
2148 const int* tos;
2149 int to_size;
2150 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2151 for (j = 0; j < to_size; j++)
2152 {
2153 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2154 // gradients graph.
2155 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2156 .d = tos[j],
2157 .graph = model->graph,
2158 });
2159 if (!exec.graph)
2160 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2161 }
2162 }
2163 const int from_size = apply_gradients_from->rnum;
2164 if (from_size == 0)
2165 {
2166 ccv_array_free(apply_gradients_from);
2167 ccv_array_free(tensor_binds);
2168 return;
2169 }
2170 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2171 for (i = 0; i < from_size; i++)
2172 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2173 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2174 .graph = model->graph
2175 };
2176 ccv_array_free(apply_gradients_from);
2177 // It can only ends with updates on the parameters.
2178 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2179 for (i = 0; i < parameter_size; i++)
2180 {
2181 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2182 continue;
2183 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2184 for (j = 1; j < parallel_count; j++)
2185 {
2186 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2187 ccv_array_push(tos, &copy);
2188 }
2189 }
2190 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2191 ccv_array_free(tos);
2192 ccv_array_free(tensor_binds);
2193 ccfreefree(froms);
2194 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2195 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2196 {
2197 // Skip on no tensor.
2198 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2199 continue;
2200 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2201 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2202 for (j = 1; j < parallel_count; j++)
2203 {
2204 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2205 if (copy)
2206 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2207 }
2208 }
2209 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2210}
2211
2212void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2213{
2214 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2215 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2215, __extension__ __PRETTY_FUNCTION__); }))
;
2216 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2216, __extension__ __PRETTY_FUNCTION__
); }))
;
2217 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2218 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2218, __extension__ __PRETTY_FUNCTION__); }))
;
2219 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2219, __extension__ __PRETTY_FUNCTION__
); }))
;
2220 // Skip if there is no backward pass.
2221 if (compiled_data->backward.count <= 0)
2222 return;
2223 // Skip if there is no parameters.
2224 if (compiled_data->parameters->rnum == 0)
2225 {
2226 compiled_data->backward.count = 0;
2227 return;
2228 }
2229 if (!compiled_data->apply_gradients.graph)
2230 _ccv_cnnp_model_multistage_jit_2(model);
2231 else {
2232 const int parameter_size = compiled_data->parameters->rnum;
2233 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2234 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2235 if (compiled_data->backward.count > 1)
2236 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2237 else
2238 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2239 }
2240 if (compiled_data->apply_gradients.graph)
2241 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2242 // Reset backward count to 0.
2243 compiled_data->backward.count = 0;
2244}
2245
2246void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2247{
2248 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2249 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2250 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2250, __extension__ __PRETTY_FUNCTION__
); }))
;
2251 const int tensors_init = !!compiled_data->tensors_init.v;
2252 int this_tensor_init = tensors_init;
2253 if (!tensors_init)
2254 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2255 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2256 // Check if it is not fully allocated, if it is not, init_1.
2257 this_tensor_init = 0;
2258 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2259 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2260 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2261 if (param_ref < 0)
2262 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2262
, __extension__ __PRETTY_FUNCTION__); }))
; }
2263 else
2264 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2264, __extension__ __PRETTY_FUNCTION__
); }))
; }
2265 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2266 ccv_array_free(parameter_indices);
2267 const int parameter_size = compiled_data->parameters->rnum;
2268 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2268
, __extension__ __PRETTY_FUNCTION__); }))
;
2269 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2269, __extension__ __PRETTY_FUNCTION__
); }))
;
2270 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
2271 int i;
2272 if (!this_tensor_init)
2273 {
2274 if (compiled_data->tensors.parameters[d])
2275 {
2276 for (i = 1; i < parallel_count; i++)
2277 { assert(compiled_data->tensors.parameters[d + i * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[d + i *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[d + i * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[d + i * parameter_size]"
, "ccv_cnnp_model.c", 2277, __extension__ __PRETTY_FUNCTION__
); }))
; }
2278 this_tensor_init = 1;
2279 } else {
2280 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
;
2281 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2282 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2283 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2284 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2285 compiled_data->tensors.parameters[d] = ccv_nnc_tensor_new(0, info, 0);
2286 for (i = 1; i < parallel_count; i++)
2287 {
2288 if (i != device_id)
2289 CCV_TENSOR_SET_DEVICE_ID(info.type, i)(info.type) = (((info.type) & ~0xfff00) | (((i) & 0xfff
) << 8))
;
2290 else
2291 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2292 compiled_data->tensors.parameters[d + i * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2293 }
2294 }
2295 }
2296 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2297 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2297, __extension__
__PRETTY_FUNCTION__); }))
;
2298 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2299 for (i = 1; i < parallel_count; i++)
2300 {
2301 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2302 if (copy_tensor)
2303 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2304 }
2305 // Mark this symbol as init'ed.
2306 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2307 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2308 init_v[s >> 5] |= (1u << (s & 0x1f));
2309 // If we just allocated this tensor, now it is time to check if we need to mark it as fully allocated.
2310 if (!this_tensor_init)
2311 {
2312 if (ccv_cnnp_model_tensors_any_to_alloc(model, compiled_data))
2313 compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)compiled_data->tensors_init.v | (uintptr_t)1);
2314 else // Remove the flag.
2315 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2316 }
2317}
2318
2319void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2320{
2321 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2322 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2323 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2323, __extension__ __PRETTY_FUNCTION__
); }))
;
2324 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2324, __extension__ __PRETTY_FUNCTION__
); }))
;
2325 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2326 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2327 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2328 if (param_ref < 0)
2329 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2329
, __extension__ __PRETTY_FUNCTION__); }))
; }
2330 else
2331 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2331, __extension__ __PRETTY_FUNCTION__
); }))
; }
2332 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2333 ccv_array_free(parameter_indices);
2334 const int parameter_size = compiled_data->parameters->rnum;
2335 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2335
, __extension__ __PRETTY_FUNCTION__); }))
;
2336 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2336, __extension__ __PRETTY_FUNCTION__
); }))
;
2337 // We don't need to consider parallel_count, every parameter on each device is identical.
2338 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2339 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2339, __extension__
__PRETTY_FUNCTION__); }))
;
2340 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2341}
2342
2343ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2344{
2345 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2346 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2347 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2347, __extension__ __PRETTY_FUNCTION__
); }))
;
2348 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2348, __extension__ __PRETTY_FUNCTION__
); }))
;
2349 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2350 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2351 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2352 if (param_ref < 0)
2353 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2353
, __extension__ __PRETTY_FUNCTION__); }))
; }
2354 else
2355 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2355, __extension__ __PRETTY_FUNCTION__
); }))
; }
2356 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2357 ccv_array_free(parameter_indices);
2358 const int parameter_size = compiled_data->parameters->rnum;
2359 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2359
, __extension__ __PRETTY_FUNCTION__); }))
;
2360 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2360, __extension__ __PRETTY_FUNCTION__
); }))
;
2361 // We don't need to consider parallel_count, every parameter on each device is identical.
2362 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2363 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2363, __extension__
__PRETTY_FUNCTION__); }))
;
2364 return tensor->info;
2365}
2366
2367const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2368{
2369 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2370 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2371 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2371, __extension__ __PRETTY_FUNCTION__
); }))
;
2372 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2373 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2374 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2375 if (param_ref < 0)
2376 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2376
, __extension__ __PRETTY_FUNCTION__); }))
; }
2377 else
2378 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2378, __extension__ __PRETTY_FUNCTION__
); }))
; }
2379 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2380 ccv_array_free(parameter_indices);
2381 const int parameter_size = compiled_data->parameters->rnum;
2382 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2382
, __extension__ __PRETTY_FUNCTION__); }))
;
2383 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2383, __extension__ __PRETTY_FUNCTION__
); }))
;
2384 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2385}
2386
2387int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2388{
2389 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2389, __extension__ __PRETTY_FUNCTION__
); }))
;
2390 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2391 return compiled_data->parameters->rnum;
2392}
2393
2394uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2395{
2396 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2396, __extension__ __PRETTY_FUNCTION__
); }))
;
2397 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2398 const int parameter_size = compiled_data->parameters->rnum;
2399 int i;
2400 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2401 uint64_t size = 0;
2402 const int tensors_init = !!compiled_data->tensors_init.v;
2403 uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
: 0;
2404 for (i = 0; i < parameter_size; i++)
2405 {
2406 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2407 if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i])
2408 {
2409 ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info;
2410 size += ccv_nnc_tensor_data_size(params);
2411 continue;
2412 }
2413 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2414 .graph = graph,
2415 .d = d
2416 });
2417 size += ccv_nnc_tensor_data_size(params);
2418 }
2419 return size;
2420}
2421
2422int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type)
2423{
2424 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2424, __extension__ __PRETTY_FUNCTION__
); }))
;
2425 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2426 if (count != compiled_data->parameters->rnum)
2427 return 0;
2428 if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2429 CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) <<
8))
;
2430 int i;
2431 // We don't need to consider parallel_count, every parameter on each device is identical.
2432 for (i = 0; i < count; i++)
2433 {
2434 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2435 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2436 {
2437 tensors[i] = 0;
2438 continue;
2439 }
2440 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2441 if (tensor->info.type == type)
2442 tensors[i] = tensor;
2443 else {
2444 ccv_nnc_tensor_param_t info = tensor->info;
2445 info.type = type;
2446 tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet.
2447 }
2448 }
2449 for (i = 0; i < count; i++)
2450 {
2451 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2452 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2453 continue;
2454 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2455 // Now initiate transfer. We should do this one on a stream.
2456 if (tensor->info.type != type)
2457 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2458 }
2459 // Copy names and remove parameters.
2460 for (i = 0; i < count; i++)
2461 {
2462 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i];
2463 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2464 {
2465 names[i] = 0;
2466 continue;
2467 }
2468 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2469 const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof
(1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2470 names[i] = ccmallocmalloc(name_len + 1);
2471 names[i][name_len] = 0;
2472 memcpy(names[i], name, name_len);
2473 if (tensor->info.type == type)
2474 compiled_data->tensors.parameters[i] = 0; // Only move when it is moved.
2475 }
2476 return 1;
2477}
2478
2479KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null, which participates in a condition later
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Assuming 'new_keys' is non-null, which participates in a condition later
39
Taking false branch
40
Taking true branch
41
Storing uninitialized value
42
Assuming 'new_vals' is non-null, which participates in a condition later
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2479
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Taking true branch
58
Assuming the condition is true
59
Assuming the condition is true
60
The value 1 is assigned to 'i'
61
Taking false branch
62
Assuming the condition is false
63
Assuming the condition is false
64
'?' condition is false
65
Returning the value 1
2480
2481void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates)
2482{
2483 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2483, __extension__ __PRETTY_FUNCTION__
); }))
;
2484 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2485 int i;
2486 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2487 if (count != compiled_data->parameters->rnum)
2488 {
2489 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2490 // Build the map between name and the index.
2491 for (i = 0; i < count; i++)
2492 {
2493 int ret;
2494 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret);
2495 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2495
, __extension__ __PRETTY_FUNCTION__); }))
;
2496 kh_val(id_map, k)((id_map)->vals[k]) = i;
2497 }
2498 }
2499 const int parameter_size = compiled_data->parameters->rnum;
2500 int* copy_back = 0;
2501 const int tensors_init = !!compiled_data->tensors_init.v;
2502 if (!tensors_init)
2503 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2504 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2505 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2506 for (i = 0; i < parameter_size; i++)
2507 {
2508 int j = i;
2509 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2510 if (i >= 0 || strncmp(name, names[i], 1023) != 0)
2511 {
2512 // Build the map.
2513 if (id_map == 0)
2514 {
2515 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2516 for (j = 0; j < count; j++)
2517 {
2518 int ret;
2519 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret);
2520 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2520
, __extension__ __PRETTY_FUNCTION__); }))
;
2521 kh_val(id_map, k)((id_map)->vals[k]) = j;
2522 }
2523 }
2524 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name);
2525 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2526 continue;
2527 j = kh_val(id_map, k)((id_map)->vals[k]);
2528 }
2529 if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read.
2530 { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters
[i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t
)compiled_data->tensors.parameters[i] & (uintptr_t)1))
; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)"
, "ccv_cnnp_model.c", 2530, __extension__ __PRETTY_FUNCTION__
); }))
; }
2531 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
2532 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2533 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2534 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2535 const int d = parameter.d;
2536 if (info.type == tensors[j]->info.type && invalidates) // Can move.
2537 {
2538 // Deallocate it if needed.
2539 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2540 if (compiled_data->tensors.parameters[i])
2541 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2542 compiled_data->tensors.parameters[i] = tensors[j];
2543 tensors[j] = 0;
2544 } else {
2545 if (!compiled_data->tensors.parameters[i])
2546 { // Not allocated, to allocate first.
2547 // Create new one, make sure we create this by having the right parameters.
2548 const int type = info.type;
2549 info = tensors[j]->info;
2550 info.type = type; // Revert back the type.
2551 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
2552 }
2553 if (!copy_back)
2554 copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int));
2555 copy_back[i] = j + 1;
2556 }
2557 init_v[d >> 5] |= (1u << (d & 0x1f));
2558 // Create this tensor for other data parallel allocations.
2559 info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
2560 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2561 for (j = 1; j < parallel_count; j++)
2562 if (!compiled_data->tensors.parameters[i + j * parameter_size])
2563 {
2564 if (j != device_id)
2565 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
2566 else
2567 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2568 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2569 }
2570 // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
2571 }
2572 if (id_map)
2573 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2574 // Now do the transfer.
2575 if (copy_back)
2576 {
2577 for (i = 0; i < parameter_size; i++)
2578 {
2579 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[i]) & ~(uintptr_t)1))
;
2580 if (copy_back[i] == 0)
2581 continue;
2582 const int j = copy_back[i] - 1;
2583 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2584 }
2585 ccfreefree(copy_back);
2586 }
2587}
2588
2589ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2590{
2591 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2592 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2592, __extension__ __PRETTY_FUNCTION__); }))
;
2593 const int parameter_size = compiled_data->parameters->rnum;
2594 int i;
2595 for (i = 0; i < parameter_size; i++)
2596 {
2597 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2598 if (first(model, name, context))
2599 return ccv_cnnp_model_parameters(model, -1, i);
2600 }
2601 return 0;
2602}
2603
2604ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2605{
2606 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2607 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2607, __extension__ __PRETTY_FUNCTION__); }))
;
2608 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2609 const int parameter_size = compiled_data->parameters->rnum;
2610 int i;
2611 for (i = 0; i < parameter_size; i++)
2612 {
2613 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2614 if (filter(model, name, context))
2615 {
2616 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2617 ccv_array_push(parameters, &parameter);
2618 }
2619 }
2620 return parameters;
2621
2622}
2623
2624CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2625{
2626 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2627 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2627, __extension__ __PRETTY_FUNCTION__); }))
;
2628 const int tensors_init = !!compiled_data->tensors_init.v;
2629 if (!tensors_init) // If nothing initialized, we return parameter 0.
2630 return ccv_cnnp_model_parameters(model, -1, 0);
2631 const int parameter_size = compiled_data->parameters->rnum;
2632 int i;
2633 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2634 for (i = 0; i < parameter_size; i++)
2635 {
2636 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2637 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2638 return ccv_cnnp_model_parameters(model, -1, i);
2639 }
2640 return 0;
2641}
2642
2643static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2644{
2645 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2646 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2646, __extension__
__PRETTY_FUNCTION__); }))
;
2647 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2648 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2649 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2650 return to_parameter_indices;
2651}
2652
2653static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2654{
2655 // If the model is not compiled yet. Compile them now.
2656 if (!model->graph)
2657 {
2658 model->graph = ccv_nnc_symbolic_graph_new();
2659 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2659, __extension__ __PRETTY_FUNCTION__
); }))
;
2660 const int input_size = from_model->input_size;
2661 ccv_nnc_tensor_param_t input_params[input_size];
2662 int i;
2663 for (i = 0; i < input_size; i++)
2664 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2665 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2666 model->parallel_count = from_model->parallel_count;
2667 model->memory_compression = from_model->memory_compression;
2668 model->memory_reduction = from_model->memory_reduction;
2669 model->gradient_checkpointing = from_model->gradient_checkpointing;
2670 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2671 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2672 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2673 }
2674 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2675 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2675, __extension__ __PRETTY_FUNCTION__
); }))
;
2676 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2677 if (!to_tensors_init)
2678 {
2679 if (only_init_0)
2680 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2681 else
2682 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2683 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2684 // Check if it is not fully allocated, if it is not, init_1.
2685 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2686 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2686, __extension__ __PRETTY_FUNCTION__
); }))
;
2687 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2688 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2689 if (*from_param_ref < 0 && *param_ref >= 0)
2690 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2690, __extension__ __PRETTY_FUNCTION__
); }))
; }
2691 else if (*from_param_ref >= 0)
2692 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2692, __extension__ __PRETTY_FUNCTION__
); }))
; }
2693 if (*param_ref < 0 && *from_param_ref >= 0)
2694 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2694, __extension__ __PRETTY_FUNCTION__); }))
; }
2695 else if (*param_ref >= 0)
2696 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2696, __extension__ __PRETTY_FUNCTION__
); }))
; }
2697}
2698
2699void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2700{
2701 ccv_array_t* to_parameter_indices;
2702 int to_param_ref;
2703 ccv_array_t* from_parameter_indices;
2704 int from_param_ref;
2705 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2706 // Should be exactly the same tensor.
2707 if (to_param_ref < 0 && from_param_ref < 0)
2708 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2708, __extension__ __PRETTY_FUNCTION__
); }))
; }
2709 // To models.
2710 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2711 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2711, __extension__ __PRETTY_FUNCTION__
); }))
;
2712 // From models.
2713 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2714 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2715 const int to_parameter_size = to_compiled_data->parameters->rnum;
2716 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2717 int i, j;
2718 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2719 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2720 for (i = 0; i < rnum; i++)
2721 {
2722 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2723 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2723, __extension__ __PRETTY_FUNCTION__); }))
;
2724 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2724, __extension__ __PRETTY_FUNCTION__
); }))
;
2725 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2726 // If the original is not init'ed. We cannot copy from.
2727 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2728 continue;
2729 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2730 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2730, __extension__ __PRETTY_FUNCTION__); }))
;
2731 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2731, __extension__ __PRETTY_FUNCTION__
); }))
;
2732 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2733 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2733, __extension__
__PRETTY_FUNCTION__); }))
;
2734 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2735 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2735, __extension__
__PRETTY_FUNCTION__); }))
;
2736 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2737 for (j = 1; j < parallel_count; j++)
2738 {
2739 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2740 if (copy_tensor)
2741 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2742 }
2743 // Mark this symbol as init'ed.
2744 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2745 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2746 }
2747 ccv_array_free(to_parameter_indices);
2748 ccv_array_free(from_parameter_indices);
2749}
2750
2751void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2752{
2753 ccv_array_t* to_parameter_indices;
2754 int to_param_ref;
2755 ccv_array_t* from_parameter_indices;
2756 int from_param_ref;
2757 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2758 // Should be exactly the same tensor.
2759 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2760 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2760, __extension__ __PRETTY_FUNCTION__
); }))
; }
2761 // To models.
2762 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2763 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2763, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2764 // From models.
2765 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2766 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2767 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2767, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2768 const int from_parameter_size = from_compiled_data->parameters->rnum;
2769 const int to_parameter_size = to_compiled_data->parameters->rnum;
2770 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2771 int i, j;
2772 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2773 char* updated_name = 0;
2774 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2775 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2776 for (i = 0; i < rnum; i++)
2777 {
2778 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2779 // Need to figure out how to use the renamer here.
2780 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2781 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2781, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2782 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2782, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2783 if (renamer
18.1
'renamer' is non-null
)
2784 {
2785 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2786 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2787 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2788 updated_name = (char*)ccmallocmalloc(1024);
2789 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2790 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2791 memcpy(updated_name, src_name, src_name_len);
2792 updated_name[src_name_len] = 0;
2793 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2794 continue; // Skip this.
2795 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2796 {
2797 // Nothing changed.
2798 } else {
2799 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2800 {
2801 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2802 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is >= 'from_parameter_size'
55
Loop condition is false. Execution continues on line 2810
2803 {
2804 int ret;
2805 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
2806 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2806
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2807 kh_val(id_map, k)((id_map)->vals[k]) = j;
2808 }
2809 }
2810 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
56
Calling 'kh_get_ccv_cnnp_parameter_id'
66
Returning from 'kh_get_ccv_cnnp_parameter_id'
67
'k' initialized to 1
2811 if (k
67.1
'k' is not equal to field 'n_buckets'
== kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
68
Taking false branch
2812 continue;
2813 src_d = kh_val(id_map, k)((id_map)->vals[k]);
69
Assigned value is garbage or undefined
2814 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2814, __extension__ __PRETTY_FUNCTION__); }))
;
2815 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2815, __extension__
__PRETTY_FUNCTION__); }))
;
2816 }
2817 }
2818 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2818, __extension__ __PRETTY_FUNCTION__); }))
;
2819 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2819, __extension__
__PRETTY_FUNCTION__); }))
;
2820 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2821 // If the original is not init'ed. We cannot share from.
2822 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2823 continue;
2824 for (j = 0; j < parallel_count; j++)
2825 {
2826 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2827 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2827, __extension__
__PRETTY_FUNCTION__); }))
;
2828 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2829 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2830 ccv_nnc_tensor_free(dest);
2831 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2832 }
2833 // Mark this symbol as init'ed.
2834 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2835 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2836 }
2837 ccv_array_free(to_parameter_indices);
2838 ccv_array_free(from_parameter_indices);
2839 if (id_map)
2840 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2841 if (updated_name)
2842 ccfreefree(updated_name);
2843 // Mark it as incomplete so we will call init_1.
2844 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2845 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2846 else // Remove the flag.
2847 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2848}
2849
2850ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2851{
2852 if (!compiled_data->stream_map)
2853 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2854 int ret = 0;
2855 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2856 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2856, __extension__ __PRETTY_FUNCTION__); }))
;
2857 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2858 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2859 if (ret != 0)
2860 {
2861 stream = ccv_nnc_stream_context_new(type);
2862 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2863 }
2864 return stream;
2865}
2866
2867void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2868{
2869 ccv_array_t* to_parameter_indices;
2870 int to_param_ref;
2871 ccv_array_t* from_parameter_indices;
2872 int from_param_ref;
2873 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2874 // Should be exactly the same tensor.
2875 if (to_param_ref < 0 && from_param_ref < 0)
2876 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2876, __extension__ __PRETTY_FUNCTION__
); }))
; }
2877 // To models.
2878 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2879 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2879, __extension__ __PRETTY_FUNCTION__
); }))
;
2880 // From models.
2881 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2882 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2883 const int to_parameter_size = to_compiled_data->parameters->rnum;
2884 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2885 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2885, __extension__ __PRETTY_FUNCTION__
); }))
;
2886 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2886, __extension__ __PRETTY_FUNCTION__
); }))
;
2887 int i, j;
2888 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2889 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2890 for (i = 0; i < aux_in_size; i++)
2891 inputs[i + 2] = aux_ins[i];
2892 for (i = 0; i < aux_out_size; i++)
2893 outputs[i + 1] = aux_outs[i];
2894 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2895 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2896 for (i = 0; i < rnum; i++)
2897 {
2898 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2899 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2899, __extension__ __PRETTY_FUNCTION__); }))
;
2900 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2900, __extension__ __PRETTY_FUNCTION__
); }))
;
2901 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2902 // If the original is not init'ed. We cannot copy from.
2903 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2904 continue;
2905 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2906 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2906, __extension__ __PRETTY_FUNCTION__); }))
;
2907 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2907, __extension__ __PRETTY_FUNCTION__
); }))
;
2908 if (parallel_count > 1)
2909 {
2910 ccv_nnc_stream_context_t* streams[parallel_count];
2911 ccv_nnc_stream_signal_t* signal;
2912 if (stream_context)
2913 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2914 for (j = 0; j < parallel_count; j++)
2915 {
2916 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2917 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2918 if (!dest || !src)
2919 {
2920 streams[j] = 0;
2921 continue;
2922 }
2923 // At the moment, can only handle them on the same device.
2924 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2924, __extension__ __PRETTY_FUNCTION__
); }))
;
2925 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2925, __extension__ __PRETTY_FUNCTION__
); }))
;
2926 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2927 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2928 int type = stream_type;
2929 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2930 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2931 // Wait signal to finish.
2932 if (stream_context)
2933 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2934 inputs[0] = outputs[0] = dest;
2935 inputs[1] = src;
2936 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2937 if (stream_context)
2938 {
2939 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2940 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2941 }
2942 streams[j] = stream_0;
2943 }
2944 // If this should be blocking, blocking it.
2945 if (!stream_context)
2946 for (j = 0; j < parallel_count; j++)
2947 if (streams[j])
2948 ccv_nnc_stream_context_wait(streams[j]);
2949 } else {
2950 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2951 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2951, __extension__
__PRETTY_FUNCTION__); }))
;
2952 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2953 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2953, __extension__
__PRETTY_FUNCTION__); }))
;
2954 inputs[0] = outputs[0] = dest;
2955 inputs[1] = src;
2956 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2957 }
2958 // Mark this symbol as init'ed.
2959 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2960 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2961 }
2962 ccv_array_free(to_parameter_indices);
2963 ccv_array_free(from_parameter_indices);
2964}
2965
2966void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2967{
2968 int to_param_ref;
2969 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2970 // To models.
2971 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2972 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2972, __extension__ __PRETTY_FUNCTION__
); }))
;
2973 // Tensor has to be inited already.
2974 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2974, __extension__ __PRETTY_FUNCTION__
); }))
;
2975 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2975, __extension__ __PRETTY_FUNCTION__
); }))
;
2976 // From models.
2977 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2978 const int to_parameter_size = to_compiled_data->parameters->rnum;
2979 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2980 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2980, __extension__ __PRETTY_FUNCTION__
); }))
;
2981 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2981, __extension__ __PRETTY_FUNCTION__
); }))
;
2982 int i, j;
2983 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2984 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2985 for (i = 0; i < aux_in_size; i++)
2986 inputs[i + 1] = aux_ins[i];
2987 for (i = 0; i < aux_out_size; i++)
2988 outputs[i + 1] = aux_outs[i];
2989 for (i = 0; i < rnum; i++)
2990 {
2991 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2992 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2992, __extension__ __PRETTY_FUNCTION__); }))
;
2993 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2993, __extension__ __PRETTY_FUNCTION__
); }))
;
2994 if (parallel_count > 1)
2995 {
2996 ccv_nnc_stream_context_t* streams[parallel_count];
2997 ccv_nnc_stream_signal_t* signal;
2998 if (stream_context)
2999 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
3000 for (j = 0; j < parallel_count; j++)
3001 {
3002 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
3003 if (!dest)
3004 {
3005 streams[j] = 0;
3006 continue;
3007 }
3008 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3009 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3010 int type = stream_type;
3011 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3012 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3013 // Wait signal to finish.
3014 if (stream_context)
3015 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3016 inputs[0] = outputs[0] = dest;
3017 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3018 if (stream_context)
3019 {
3020 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3021 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3022 }
3023 streams[j] = stream_0;
3024 }
3025 // If this should be blocking, blocking it.
3026 if (!stream_context)
3027 for (j = 0; j < parallel_count; j++)
3028 if (streams[j])
3029 ccv_nnc_stream_context_wait(streams[j]);
3030 } else {
3031 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
3032 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3032, __extension__
__PRETTY_FUNCTION__); }))
;
3033 inputs[0] = outputs[0] = dest;
3034 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3035 }
3036 // No need to mark this symbol as init'ed, it is already.
3037 }
3038 ccv_array_free(to_parameter_indices);
3039}
3040
3041void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
3042{
3043 int to_param_ref;
3044 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3045 // To models.
3046 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
3047 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 3047, __extension__ __PRETTY_FUNCTION__
); }))
;
3048 // Tensor has to be inited already.
3049 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 3049, __extension__ __PRETTY_FUNCTION__
); }))
;
3050 ccv_nnc_tensor_t** tensor_gradients;
3051 if (to_compiled_data->backward.count > 1)
3052 tensor_gradients = to_compiled_data->tensors.accum_gradients;
3053 else
3054 tensor_gradients = to_compiled_data->tensors.gradients;
3055 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 3055, __extension__ __PRETTY_FUNCTION__
); }))
;
3056 // From models.
3057 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3058 const int to_parameter_size = to_compiled_data->parameters->rnum;
3059 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3060 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 3060, __extension__ __PRETTY_FUNCTION__
); }))
;
3061 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 3061, __extension__ __PRETTY_FUNCTION__
); }))
;
3062 int i, j;
3063 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
3064 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
3065 for (i = 0; i < aux_in_size; i++)
3066 inputs[i + 1] = aux_ins[i];
3067 for (i = 0; i < aux_out_size; i++)
3068 outputs[i + 1] = aux_outs[i];
3069 for (i = 0; i < rnum; i++)
3070 {
3071 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3072 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3072, __extension__ __PRETTY_FUNCTION__); }))
;
3073 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3073, __extension__ __PRETTY_FUNCTION__
); }))
;
3074 if (parallel_count > 1)
3075 {
3076 ccv_nnc_stream_context_t* streams[parallel_count];
3077 ccv_nnc_stream_signal_t* signal;
3078 if (stream_context)
3079 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
3080 for (j = 0; j < parallel_count; j++)
3081 {
3082 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
3083 if (!dest)
3084 {
3085 streams[j] = 0;
3086 continue;
3087 }
3088 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3089 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3090 int type = stream_type;
3091 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3092 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3093 // Wait signal to finish.
3094 if (stream_context)
3095 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3096 inputs[0] = outputs[0] = dest;
3097 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3098 if (stream_context)
3099 {
3100 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3101 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3102 }
3103 streams[j] = stream_0;
3104 }
3105 // If this should be blocking, blocking it.
3106 if (!stream_context)
3107 for (j = 0; j < parallel_count; j++)
3108 if (streams[j])
3109 ccv_nnc_stream_context_wait(streams[j]);
3110 } else {
3111 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
3112 if (!dest)
3113 continue;
3114 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3114, __extension__
__PRETTY_FUNCTION__); }))
;
3115 inputs[0] = outputs[0] = dest;
3116 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3117 }
3118 // No need to mark this symbol as init'ed, it is already.
3119 }
3120 ccv_array_free(to_parameter_indices);
3121}
3122
3123void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
3124{
3125 // Only CUDA backend has this feature.
3126#ifdef HAVE_CUDA1
3127 int to_param_ref;
3128 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3129 // To models.
3130 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3131 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3131, __extension__ __PRETTY_FUNCTION__); }))
;
3132 // Tensor has to be inited already.
3133 assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0)
, __extension__ ({ if (!!compiled_data->tensors_init.v) ; else
__assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c"
, 3133, __extension__ __PRETTY_FUNCTION__); }))
;
3134 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 3134, __extension__ __PRETTY_FUNCTION__
); }))
;
3135 // From models.
3136 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3137 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3138 int i;
3139 for (i = 0; i < rnum; i++)
3140 {
3141 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3142 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3142, __extension__ __PRETTY_FUNCTION__); }))
;
3143 assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3143, __extension__ __PRETTY_FUNCTION__
); }))
;
3144 if (parallel_count > 1)
3145 {
3146 assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect."
) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect."
) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\""
, "ccv_cnnp_model.c", 3146, __extension__ __PRETTY_FUNCTION__
); }))
;
3147 } else {
3148 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[dest_d]) & ~(uintptr_t)1))
;
3149 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 3149, __extension__
__PRETTY_FUNCTION__); }))
;
3150 ccv_nnc_tensor_param_t params = src->info;
3151 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY)
3152 continue;
3153 const size_t size = ccv_nnc_tensor_data_size(params);
3154 if (size <= 0)
3155 continue;
3156 const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1);
3157 const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0);
3158 ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t));
3159 tensor->dataof = 0;
3160 tensor->alias_ref = 0;
3161 tensor->sig = 0;
3162 tensor->refcount = 1;
3163 tensor->info = params;
3164 if (tfb)
3165 {
3166 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2];
3167 // This corresponding to mat->step
3168 tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype
) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12
] * (((((params.datatype) & 0xFF000) | params.dim[2])) &
0xFFF) + 3) & -4)
;
3169 } else // This won't be recognized by ccv_dense_matrix_t
3170 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000);
3171 // Remove this flag so it can be deallocated as usual.
3172 tensor->type &= ~CCV_NO_DATA_ALLOC;
3173 assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY
) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00
) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY"
, "ccv_cnnp_model.c", 3173, __extension__ __PRETTY_FUNCTION__
); }))
;
3174 void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size);
3175 if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
3176 {
3177 tensor->data.u8 = (uint8_t*)ptr;
3178 tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
3179 } else {
3180 // Allocation failed.
3181 ccfreefree(tensor);
3182 continue;
3183 }
3184 // TODO: Cannot run this on the stream context yet, due to allocation and deallocations.
3185 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0);
3186 cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size);
3187 compiled_data->tensors.parameters[dest_d] = tensor;
3188 // Can free out the old one.
3189 if (should_free)
3190 ccv_nnc_tensor_free(src);
3191 }
3192 // No need to mark this symbol as init'ed, it is already.
3193 }
3194 ccv_array_free(to_parameter_indices);
3195#endif
3196}
3197
3198ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
3199{
3200 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3201 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3201, __extension__ __PRETTY_FUNCTION__); }))
;
3202 return compiled_data->minimize.minimizer;
3203}
3204
3205void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
3206{
3207 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3208 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3208, __extension__ __PRETTY_FUNCTION__); }))
;
3209 const int parameter_size = compiled_data->parameters->rnum;
3210 if (parameter_size == 0)
3211 return;
3212 if (reset)
3213 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 3213, __extension__ __PRETTY_FUNCTION__
); }))
; }
3214 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3215 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
3216 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
3217 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
3218 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3219 // We update all parameters, at this point, we have one minimizer.
3220 if (set_parameters == 0 || set_parameter_size == 0)
3221 compiled_data->minimize.minimizer = minimizer;
3222 int i;
3223 if (set_parameters && set_parameter_size)
3224 {
3225 // I need to save what's the minimizer along with this.
3226 if (!compiled_data->minimize.parameters)
3227 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
3228 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
3229 set_minimizer_for_parameter->minimizer = minimizer;
3230 set_minimizer_for_parameter->parameter_size = set_parameter_size;
3231 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
3232 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
3233 }
3234 // If reset is true, clear the parameters array.
3235 if (reset && compiled_data->minimize.parameters)
3236 {
3237 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3238 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3239 ccv_array_clear(compiled_data->minimize.parameters);
3240 }
3241 if (!compiled_data->update_nodes)
3242 return;
3243 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
3244 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 3244, __extension__ __PRETTY_FUNCTION__); }))
;
3245 if (saved_aux_size > old_max_saved_aux_size)
3246 {
3247 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 3247, __extension__ __PRETTY_FUNCTION__
); }))
;
3248 // Reallocate first, move them around later.
3249 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
3250 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
3251 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
3252 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
3253 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
3254 }
3255 int flag = 0;
3256 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3257 if (set_parameters && set_parameter_size)
3258 {
3259 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
3260 for (i = 0; i < set_parameter_size; i++)
3261 {
3262 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
3263 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 3263, __extension__ __PRETTY_FUNCTION__
); }))
;
3264 const int old_rnum = parameter_indices->rnum;
3265 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
3266 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
3267 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 3267, __extension__ __PRETTY_FUNCTION__
); }))
;
3268 if (param_ref >= 0)
3269 {
3270 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 3270, __extension__ __PRETTY_FUNCTION__
); }))
;
3271 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
3272 parameter_indices->rnum = old_rnum + 1;
3273 }
3274 }
3275 // We may have duplicated indices, but that is OK, we will set it twice.
3276 for (i = 0; i < parameter_indices->rnum; i++)
3277 {
3278 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
3279 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
3280 flag = 1;
3281 }
3282 ccv_array_free(parameter_indices);
3283 } else {
3284 for (i = 0; i < parameter_size; i++)
3285 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
3286 flag = 1;
3287 if (compiled_data->minimize.parameters)
3288 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
3289 flag = 1;
3290 }
3291 if (flag)
3292 {
3293 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
3294 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
3295 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3296 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3297 }
3298}
3299
3300void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
3301{
3302 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3303 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3303, __extension__ __PRETTY_FUNCTION__); }))
;
3304 compiled_data->compile_params = compile_params;
3305}
3306
3307void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
3308{
3309 if (model->graph && out_size > 0)
3310 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
3311 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
3312 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
3313 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
3314 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
3315 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
3316 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
3317}
3318
3319void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
3320{
3321 if (model->graph)
3322 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
3323}
3324
3325static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
3326{
3327 int i;
3328 const int parameter_size = compiled_data->parameters->rnum;
3329 ccv_array_free(compiled_data->parameters);
3330 if (compiled_data->parameter_flags)
3331 ccfreefree(compiled_data->parameter_flags);
3332 const int internal_size = compiled_data->internals->rnum;
3333 ccv_array_free(compiled_data->internals);
3334 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 3334, __extension__ __PRETTY_FUNCTION__
); }))
;
3335 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 3335, __extension__ __PRETTY_FUNCTION__
); }))
;
3336 for (i = 0; i < parameter_size; i++)
3337 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3338 ccv_array_free(compiled_data->ids.parameters);
3339 for (i = 0; i < internal_size; i++)
3340 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3341 ccv_array_free(compiled_data->ids.internals);
3342 const int parallel_count = compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_root_parallel_count(model);
3343 if (compiled_data->tensors.parameters)
3344 {
3345 for (i = 0; i < parameter_size * parallel_count; i++)
3346 // If it is not marked as not belonging, we can free it.
3347 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3348 if (compiled_data->tensors.parameters[i])
3349 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3350 for (i = 0; i < internal_size * parallel_count; i++)
3351 if (compiled_data->tensors.internals[i])
3352 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3353 ccfreefree(compiled_data->tensors.parameters);
3354 }
3355 if (compiled_data->tensors.gradients)
3356 {
3357 for (i = 0; i < parameter_size * parallel_count; i++)
3358 {
3359 if (compiled_data->tensors.gradients[i])
3360 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3361 if (compiled_data->tensors.accum_gradients[i])
3362 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3363 }
3364 ccfreefree(compiled_data->tensors.gradients);
3365 }
3366 if (compiled_data->minimize.parameters)
3367 {
3368 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3369 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3370 ccv_array_free(compiled_data->minimize.parameters);
3371 }
3372 if (compiled_data->rewindables)
3373 ccv_array_free(compiled_data->rewindables);
3374 if (compiled_data->tensors_init.v)
3375 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3376 if (compiled_data->evaluate.tos)
3377 ccfreefree(compiled_data->evaluate.tos);
3378 compiled_data->evaluate.tos = 0;
3379 if (compiled_data->stream_map)
3380 {
3381 khiter_t k;
3382 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3383 {
3384 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3385 continue;
3386 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3387 ccv_nnc_stream_context_free(stream);
3388 }
3389 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3390 }
3391 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3392 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3393 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3394 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3395 if (compiled_data->gradient_checkpoints)
3396 {
3397 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3398 {
3399 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3400 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3400, __extension__ __PRETTY_FUNCTION__
); }))
;
3401 ccfreefree(checkpoint->inputs);
3402 ccv_array_free(checkpoint->tensor_symbols);
3403 }
3404 ccv_array_free(compiled_data->gradient_checkpoints);
3405 }
3406 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3407 ccfreefree(compiled_data);
3408}
3409
3410void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3411{
3412 ccv_cnnp_model_deinit(model);
3413 if (model->isa->dealloc)
3414 model->isa->dealloc(model);
3415 if (model->io)
3416 {
3417 int i;
3418 for (i = 0; i < model->io->rnum; i++)
3419 {
3420 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3421 if (model_io->outgoings)
3422 ccv_array_free(model_io->outgoings);
3423 if (model_io->incomings)
3424 ccv_array_free(model_io->incomings);
3425 if (model_io->dependencies)
3426 ccv_array_free(model_io->dependencies);
3427 ccfreefree(model_io);
3428 }
3429 ccv_array_free(model->io);
3430 }
3431 if (model->parameter_indices)
3432 ccv_array_free(model->parameter_indices);
3433 if (model->inputs)
3434 ccfreefree(model->inputs);
3435 if (model->graph)
3436 ccv_nnc_symbolic_graph_free(model->graph);
3437 if (model->compiled_data)
3438 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3439 if (model->name)
3440 ccfreefree(model->name);
3441 ccfreefree(model);
3442}
3443
3444void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3445{
3446 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3447 if (!compiled_data)
3448 return;
3449 if (compiled_data->graph)
3450 ccv_nnc_graph_cancel(compiled_data->graph);
3451 if (compiled_data->apply_gradients.graph)
3452 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3453}
3454
3455void ccv_cnnp_model_async_enter(ccv_cnnp_model_t* const model)
3456{
3457 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3458 if (!compiled_data)
3459 return;
3460 if (compiled_data->graph)
3461 ccv_nnc_graph_async_enter(compiled_data->graph);
3462 if (compiled_data->apply_gradients.graph)
3463 ccv_nnc_graph_async_enter(compiled_data->apply_gradients.graph);
3464}
3465
3466void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3467{
3468 model->exec_flags = flags;
3469}
3470
3471int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3472{
3473 return model->exec_flags;
3474}