Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2810, column 11
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2026-05-19-050436-2739807-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7#include "_ccv_nnc_symbolic_graph.h"
8#ifdef HAVE_CUDA1
9#include "gpu/ccv_nnc_compat.h"
10#endif
11
12// MARK - Level-5 API
13
14ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
15{
16 if (!model->io)
17 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
18 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
19 model_io->param_ref = 0;
20 model_io->param_sel = 0;
21 model_io->visit = 0;
22 model_io->model = model;
23 model_io->dependencies = 0;
24 model_io->dependents = 0;
25 model_io->outgoings = 0;
26 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
27 ccv_array_push(model->io, &model_io);
28 if (input_size > 0)
29 {
30 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
31 ccv_array_resize(model_io->incomings, input_size);
32 int i;
33 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
34 for (i = 0; i < input_size; i++)
35 {
36 if (!inputs[i]->outgoings)
37 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
38 ccv_array_push(inputs[i]->outgoings, &model_io);
39 }
40 } else {
41 model_io->incomings = 0;
42 }
43 return model_io;
44}
45
46void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
47{
48 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 48, __extension__ __PRETTY_FUNCTION__);
}))
;
49 if (!model_io->dependencies)
50 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
51 int i, j;
52 for (i = 0; i < dependency_size; i++)
53 {
54 int flag = 0;
55 // Check if it is already exist or not.
56 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
57 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
58 flag = 1;
59 if (flag)
60 continue;
61 ccv_array_push(model_io->dependencies, dependencies + i);
62 ++dependencies[i]->dependents;
63 }
64}
65
66int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
67{
68 return model->output_size;
69}
70
71int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
72{
73 // If the model is compiled, it is default to 1 unless it is not.
74 if (model->compiled_data)
75 return model->is_trainable >= 0 ? model->is_trainable : 1;
76 return model->is_trainable;
77}
78
79ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
80{
81 if (!model->io)
82 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
83 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
84 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
85 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
86 model_io->visit = 0;
87 model_io->model = model;
88 model_io->outputs = 0;
89 model_io->dependencies = 0;
90 model_io->dependents = 0;
91 model_io->incomings = 0;
92 model_io->outgoings = 0;
93 ccv_array_push(model->io, &model_io);
94 return model_io;
95}
96
97void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
98{
99 model->notify_hook.func = func;
100 model->notify_hook.context = context;
101}
102
103void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
104{
105 if (model->notify_hook.func)
106 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
107 if (model->isa->notify)
108 model->isa->notify(model, tag, payload);
109}
110
111static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
112{
113 int i, j;
114 for (i = 0; i < graph_exec_symbol_size; i++)
115 {
116 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
117 // Check whether this tensor symbol has any duplicate.
118 for (j = i + 1; j < graph_exec_symbol_size;)
119 {
120 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
121 // If there is a same tensor symbol, remove it.
122 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
123 {
124 if (j + 1 < graph_exec_symbol_size)
125 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
126 --graph_exec_symbol_size;
127 continue;
128 }
129 ++j;
130 }
131 }
132 return graph_exec_symbol_size;
133}
134
135void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
136{
137 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
138 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
139 int i;
140 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
141 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
142 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
143 {
144 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
145 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
146 {
147 // Only add to parameter_indices if it is trainable.
148 if (add_to_array_context->add_parameter_indices)
149 ccv_array_add_unique_int(model->parameter_indices, i);
150 // Found it, return, don't add it.
151 return;
152 }
153 }
154 // Only add to parameter_indices if it is trainable.
155 if (add_to_array_context->add_parameter_indices)
156 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
157 // This is a new one, no need to add_unique_int, it is unique.
158 ccv_array_push(add_to_array_context->symbols, &symbol);
159 if (add_to_array_context->trainables)
160 ccv_array_push(add_to_array_context->trainables, &is_trainable);
161 char id[2048];
162 id[0] = add_to_array_context->prefix;
163 id[1] = '-';
164 int total_len = 2;
165 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
166 {
167 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
168 int len;
169 if (name->name && name->name[0] != '\0')
170 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
171 else
172 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
173 total_len += len;
174 if (total_len >= 2047)
175 break;
176 }
177 if (total_len < 2047)
178 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
179 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 179, __extension__ __PRETTY_FUNCTION__)
; }))
;
180 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
181 memcpy(heap_id, id, total_len + 1);
182 ccv_array_push(add_to_array_context->ids, &heap_id);
183 ++add_to_array_context->sequence->it;
184}
185
186static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
187{
188 compiled_data->f = compiled_data->fits + output_size;
189 compiled_data->xpu_alloc.mp_hdr = -1;
190 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
191 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
192 compiled_data->gradient_checkpoints = gradient_checkpoints;
193}
194
195static int _ccv_cnnp_model_root_parallel_count(const ccv_cnnp_model_t* const model)
196{
197 return ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
198}
199
200static int _ccv_cnnp_model_effective_parallel_count(const ccv_cnnp_model_t* const model)
201{
202 int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
203 if (model->graph && model->graph->data_parallel.count > parallel_count)
204 parallel_count = model->graph->data_parallel.count;
205 return parallel_count;
206}
207
208static int _ccv_cnnp_compiled_data_parallel_count(const ccv_cnnp_model_t* const model, const ccv_cnnp_compiled_data_t* const compiled_data)
209{
210 return compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_effective_parallel_count(model);
211}
212
213ccv_nnc_tensor_symbol_t ccv_cnnp_model_get_symbol(ccv_cnnp_model_t* const self, const ccv_nnc_tensor_symbol_t symbol)
214{
215 assert(self->data)((void) sizeof ((self->data) ? 1 : 0), __extension__ ({ if
(self->data) ; else __assert_fail ("self->data", "ccv_cnnp_model.c"
, 215, __extension__ __PRETTY_FUNCTION__); }))
;
216 ccv_cnnp_model_build_data_t* const build_data = (ccv_cnnp_model_build_data_t*)self->data;
217 if (build_data->parallel_count <= 1 || build_data->parallel_rank == 0)
218 return symbol;
219 const int rank = build_data->parallel_rank;
220 assert(rank > 0)((void) sizeof ((rank > 0) ? 1 : 0), __extension__ ({ if (
rank > 0) ; else __assert_fail ("rank > 0", "ccv_cnnp_model.c"
, 220, __extension__ __PRETTY_FUNCTION__); }))
;
221 assert(rank < build_data->parallel_count)((void) sizeof ((rank < build_data->parallel_count) ? 1
: 0), __extension__ ({ if (rank < build_data->parallel_count
) ; else __assert_fail ("rank < build_data->parallel_count"
, "ccv_cnnp_model.c", 221, __extension__ __PRETTY_FUNCTION__)
; }))
;
222 ccv_nnc_symbolic_graph_t* const graph = (ccv_nnc_symbolic_graph_t*)symbol.graph;
223 ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, symbol, rank);
224 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
225 return copy;
226 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, symbol);
227 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY)
228 CCV_TENSOR_SET_DEVICE_ID(params.type, rank)(params.type) = (((params.type) & ~0xfff00) | (((rank) &
0xfff) << 8))
;
229 copy = ccv_nnc_tensor_symbol_new(graph, params, 0);
230 ccv_nnc_tensor_symbol_set_copy(graph, symbol, rank, copy);
231 return copy;
232}
233
234typedef struct {
235 void* old_graph_exec_symbol_new_hook_context;
236 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
237 ccv_nnc_symbolic_graph_t* graph;
238 ccv_cnnp_model_build_data_t* build_data;
239} ccv_cnnp_model_set_exec_flags_context_t;
240
241static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
242{
243 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
244 if (flags_context->build_data->exec_flags)
245 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
246 if (flags_context->old_graph_exec_symbol_new_hook)
247 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
248}
249
250static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
251{
252 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 252, __extension__ __PRETTY_FUNCTION__); }))
;
253 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
254 int i;
255 for (i = 0; i < input_size; i++)
256 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
257 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
258 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
259 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
260 ccv_cnnp_model_sequence_t model_sequence = {
261 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
262 };
263 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
264 .add_parameter_indices = 1,
265 .prefix = 't',
266 .sequence = &model_sequence,
267 .symbols = parameters,
268 .ids = parameter_ids,
269 .trainables = parameter_trainables,
270 };
271 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
272 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
273 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
274 .add_parameter_indices = 0,
275 .prefix = 'r',
276 .sequence = &model_sequence,
277 .symbols = internals,
278 .ids = internal_ids,
279 .trainables = 0,
280 };
281 ccv_cnnp_model_build_data_t build_data = {
282 .exec_flags = 0,
283 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
284 .parallel_count = 1,
285 .parallel_rank = 0,
286 .model_sequence = &model_sequence,
287 .add_to_array = ccv_cnnp_model_add_to_array,
288 .parameters = parameters,
289 .context = {
290 .add_to_parameter = &add_to_parameter_context,
291 .add_to_output = &add_to_output_context,
292 },
293 .gradient_checkpoints = 0,
294 };
295 model->data = &build_data;
296 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
297 .graph = model->graph,
298 .build_data = &build_data,
299 .old_graph_exec_symbol_new_hook = 0,
300 .old_graph_exec_symbol_new_hook_context = 0
301 };
302 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
303 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
304 // Reset back to previous hook.
305 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
306 for (i = 0; i < model->output_size; i++)
307 {
308 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
309 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
310 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
311 continue;
312 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
313 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
314 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
315 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
316 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
317 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
318 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
319 }
320 model->data = 0;
321 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
322 if (model_sequence.sequences)
323 ccv_array_free(model_sequence.sequences);
324 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
325 int not_trainables = 0;
326 // Assert no parameter is alias.
327 for (i = 0; i < parameters->rnum; i++)
328 {
329 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
330 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
331 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 331, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
332 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
333 not_trainables = 1;
334 }
335 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 335, __extension__ __PRETTY_FUNCTION__)
; }))
;
336 uint64_t* parameter_flags = 0;
337 if (not_trainables)
338 {
339 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
340 for (i = 0; i < parameter_trainables->rnum; i++)
341 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
342 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
343 }
344 ccv_array_free(parameter_trainables);
345 // Assert no internal is alias.
346 for (i = 0; i < internals->rnum; i++)
347 {
348 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
349 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
350 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 350, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
351 }
352 const int output_size = model->output_size;
353 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
354 const int parameters_rnum = parameters->rnum;
355 if (input_size > 0)
356 {
357 ccv_array_resize(parameters, parameters_rnum + input_size);
358 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
359 }
360 ccv_nnc_symbolic_graph_simplify(model->graph,
361 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
362 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
363 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
364 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
365 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
366 model->outputs, output_size,
367 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
368 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
369 // Size it down.
370 parameters->rnum = parameters_rnum;
371 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
372 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
373 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
374 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 374, __extension__ __PRETTY_FUNCTION__)
; }))
;
375 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
376 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
377 compiled_data->loss = loss;
378 if (loss.cmd == CCV_NNC_NOOP)
379 {
380 // If no loss function provided, there is no fits.
381 for (i = 0; i < output_size; i++)
382 {
383 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
384 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
385 if (alias_to.d < 0)
386 compiled_data->f[i] = model->outputs[i];
387 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
388 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
389 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
390 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
391 int j;
392 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
393 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 393, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
394 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
395 }
396 }
397 } else {
398 for (i = 0; i < output_size; i++)
399 {
400 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
401 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
402 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
403 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
404 }
405 }
406 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
407 ccv_nnc_symbolic_graph_simplify(model->graph,
408 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
409 0, 0, // No need to provide binds at this point.
410 compiled_data->f, model->output_size,
411 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
412 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
413 // If inputs are from GPU, stream type is GPU.
414 compiled_data->parameters = parameters;
415 compiled_data->parameter_flags = parameter_flags;
416 compiled_data->internals = internals;
417 compiled_data->ids.parameters = parameter_ids;
418 compiled_data->ids.internals = internal_ids;
419 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
420}
421
422static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
423{
424 ccv_array_t* const stack = (ccv_array_t*)context;
425 ccv_array_push(stack, &symbol.d);
426}
427
428static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
429{
430 const ccv_nnc_tensor_symbol_t src_symbol = {
431 .d = src_index,
432 .graph = src_graph
433 };
434 const ccv_nnc_tensor_symbol_t dest_symbol = {
435 .d = dest_index,
436 .graph = dest_graph
437 };
438 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
439 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
440 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
441 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
442 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
443 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
444}
445
446static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
447{
448 const ccv_nnc_tensor_symbol_t src_symbol = {
449 .d = src_index,
450 .graph = src_graph
451 };
452 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
453 const ccv_nnc_tensor_symbol_t dest_symbol = {
454 .d = dest_index,
455 .graph = dest_graph
456 };
457 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
458 if (src_params.dim[0] == 0 || dest_params.dim[0] == 0)
459 return 1;
460 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
461}
462
463static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
464static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
465
466typedef struct {
467 int parallel_count;
468 ccv_nnc_symbolic_graph_t* graph;
469 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
470} ccv_nnc_graph_exec_update_t;
471
472static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
473{
474 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
475 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
476 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
477 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
478 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
479 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
480 const int parallel_count = graph_exec_update->parallel_count;
481 int i;
482 for (i = 1; i < parallel_count; i++)
483 {
484 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
485 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
486 {
487 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
488 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
489 }
490 }
491}
492
493void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
494{
495 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 495, __extension__ __PRETTY_FUNCTION__); }))
;
496 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 496, __extension__ __PRETTY_FUNCTION__)
; }))
;
497 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 497, __extension__ __PRETTY_FUNCTION__); }))
;
498 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
499 init->graph = ccv_nnc_symbolic_graph_new();
500 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
501 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
502 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
503 init->parallel_count = model->parallel_count;
504 init->memory_compression = model->memory_compression;
505 init->memory_reduction = model->memory_reduction;
506 init->gradient_checkpointing = model->gradient_checkpointing;
507 init->compiled_data->stream_type = model->compiled_data->stream_type;
508 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
509 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
510 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
511 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
512 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
513 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
514 int i, j;
515 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
516 for (i = 0; i < compiled_data->parameters->rnum; i++)
517 {
518 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
519 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 519, __extension__ __PRETTY_FUNCTION__)
; }))
;
520 }
521 for (i = 0; i < compiled_data->internals->rnum; i++)
522 {
523 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
524 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 524, __extension__ __PRETTY_FUNCTION__)
; }))
;
525 }
526 // Update inputs.
527 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 527, __extension__ __PRETTY_FUNCTION__)
; }))
;
528 for (i = 0; i < model->input_size; i++)
529 if (model->inputs[i].d >= 0)
530 {
531 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 531, __extension__ __PRETTY_FUNCTION__)
; }))
;
532 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
533 }
534 // Update outputs.
535 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 535, __extension__ __PRETTY_FUNCTION__)
; }))
;
536 for (i = 0; i < model->output_size; i++)
537 {
538 if (model->outputs[i].d >= 0)
539 {
540 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 540, __extension__
__PRETTY_FUNCTION__); }))
;
541 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
542 }
543 if (model->outputs[i].d != model->compiled_data->f[i].d)
544 {
545 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 545, __extension__ __PRETTY_FUNCTION__)
; }))
;
546 if (model->compiled_data->f[i].d >= 0)
547 {
548 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 548, __extension__ __PRETTY_FUNCTION__)
; }))
;
549 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
550 }
551 }
552 }
553 // Go through the graph to set tensor on matching symbols
554 for (i = 0; i < stack->rnum; i++)
555 {
556 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
557 // If exceed range, skip.
558 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
559 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
560 continue;
561 const ccv_nnc_graph_exec_symbol_t src_symbol = {
562 .d = d,
563 .graph = init->graph
564 };
565 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
566 .d = d,
567 .graph = model->graph
568 };
569 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
570 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
571 // If the name doesn't match, skip.
572 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
573 continue;
574 // Now get all the inputs and outputs, if matches, set them.
575 const int* src_inputs;
576 int src_input_size;
577 const int* src_outputs;
578 int src_output_size;
579 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
580 const int* dest_inputs;
581 int dest_input_size;
582 const int* dest_outputs;
583 int dest_output_size;
584 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
585 // We may have unmatched input / output size because this is the minimizer and it has
586 // different saved_aux (for example, when we shrunk with CMD_NOOP).
587 if (src_input_size != dest_input_size)
588 continue;
589 if (src_output_size != dest_output_size)
590 continue;
591 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
592 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
593 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
594 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
595 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
596 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
597 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
598 // a new exec symbol.
599 for (j = 0; j < src_input_size; j++)
600 if (src_inputs[j] >= 0)
601 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
602 for (j = 0; j < src_output_size; j++)
603 if (src_outputs[j] >= 0)
604 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
605 }
606 ccv_array_free(stack);
607 // After this, we get all tensors in the model graph resolved through tensor_auto.
608 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
609 // Verify symbols we get matches.
610 const int parameter_size = compiled_data->parameters->rnum;
611 for (i = 0; i < parameter_size; i++)
612 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 612, __extension__ __PRETTY_FUNCTION__)
; }))
; }
613 const int internal_size = compiled_data->internals->rnum;
614 for (i = 0; i < internal_size; i++)
615 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 615, __extension__ __PRETTY_FUNCTION__)
; }))
; }
616 // Go through compiled data.
617 if (compiled_data->tensor_arena)
618 {
619 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
620 if (flag == 0 && compiled_data->graph_exec_arena)
621 {
622 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
623 // Since we will reinit, if we previously set is_test, we need to set it again.
624 if (compiled_data->is_test)
625 {
626 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
627 ccv_nnc_graph_exec_update_t update = {
628 .parallel_count = parallel_count,
629 .graph = model->graph,
630 .graph_exec_arena = compiled_data->graph_exec_arena,
631 };
632 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
633 }
634 } else
635 // Free-up tensor arena & graph exec arena.
636 _ccv_cnnp_compiled_data_graph_free(compiled_data);
637 }
638 // There are other compiled graphs, for accum and apply gradients.
639 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
640 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
641 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
642 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
643 // That is why we don't update these compiled graphs at all this point.
644 // Free the model, we've already "absorbed" it.
645 ccv_cnnp_model_free(init);
646}
647
648void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
649{
650 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 650, __extension__ __PRETTY_FUNCTION__)
; }))
;
651 if (model->input_size == 0)
652 model->input_size = input_size;
653 if (!model->graph) // The graph is not compiled yet.
654 {
655 model->graph = ccv_nnc_symbolic_graph_new();
656 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
657 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 657, __extension__ __PRETTY_FUNCTION__)
; }))
;
658 int i, flag = 0;
659 for (i = 0; !flag && i < input_size; i++)
660 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
661 // If inputs are from GPU, stream type is GPU.
662 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
663 model->compiled_data->minimize.minimizer = minimizer;
664 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
665 } else {
666 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
667 // And then absorb the "new model" to the old one.
668 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
669 ccv_cnnp_model_absorb(model, init, inputs, input_size);
670 // Reset minimizer.
671 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
672 }
673}
674
675ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
676{
677 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
678 new_model->is_trainable = is_trainable;
679 return new_model;
680}
681
682void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
683{
684 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 684, __extension__ __PRETTY_FUNCTION__); }))
;
685 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 685, __extension__ __PRETTY_FUNCTION__)
; }))
;
686 ccv_nnc_symbolic_graph_t* const graph = model->graph;
687 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
688 int i;
689 for (i = 0; i < output_size; i++)
690 {
691 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 691, __extension__ __PRETTY_FUNCTION__)
; }))
;
692 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
693 }
694}
695
696void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
697{
698 if (workspace_size == model->workspace_size)
699 return;
700 model->workspace_size = workspace_size;
701 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
702 if (compiled_data && compiled_data->graph)
703 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
704}
705
706size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
707{
708 return model->workspace_size;
709}
710
711void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
712{
713 if (parallel == 0)
714 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
715 else
716 model->parallel_count = parallel;
717 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
718 if (compiled_data)
719 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 719, __extension__ __PRETTY_FUNCTION__)
; }))
; }
720}
721
722void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
723{
724 model->max_stream_count = max_stream_count;
725 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
726 if (compiled_data)
727 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 727, __extension__ __PRETTY_FUNCTION__)
; }))
; }
728}
729
730void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
731{
732 model->memory_compression = memory_compression;
733 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
734 if (compiled_data)
735 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 735, __extension__ __PRETTY_FUNCTION__)
; }))
; }
736}
737
738void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
739{
740 model->memory_reduction = memory_reduction;
741 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
742 if (compiled_data)
743 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 743, __extension__ __PRETTY_FUNCTION__)
; }))
; }
744}
745
746void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
747{
748 model->gradient_checkpointing = gradient_checkpointing;
749}
750
751int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
752{
753 return model->gradient_checkpointing;
754}
755
756typedef struct {
757 int parallel_count;
758 ccv_nnc_symbolic_graph_t* graph;
759 ccv_cnnp_compiled_data_t* compiled_data;
760 ccv_nnc_tensor_arena_t* tensor_arena;
761} ccv_nnc_tensor_init_states_t;
762
763static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
764{
765 int i;
766 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
767 for (i = 0; i < compiled_data->parameters->rnum; i++)
768 {
769 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
770 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
771 return 1;
772 }
773 for (i = 0; i < compiled_data->internals->rnum; i++)
774 {
775 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
776 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
777 return 1;
778 }
779 return 0;
780}
781
782static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
783{
784 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
785 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
786 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
787 if (!output_tensor)
788 return;
789 const int d = output_symbol.d;
790 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 790, __extension__ __PRETTY_FUNCTION__)
; }))
;
791 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
792 if (init_v[d >> 5] & (1u << (d & 0x1f)))
793 return;
794 init_v[d >> 5] |= (1u << (d & 0x1f));
795 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
796 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
797 const int parallel_count = tensor_init_states->parallel_count;
798 int i;
799 for (i = 1; i < parallel_count; i++)
800 {
801 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
802 if (copy)
803 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
804 }
805}
806
807// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
808// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
809static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
810{
811 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 811, __extension__ __PRETTY_FUNCTION__); }))
;
812 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 812, __extension__ __PRETTY_FUNCTION__)
; }))
;
813 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
814 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 814, __extension__
__PRETTY_FUNCTION__); }))
;
815 int i;
816 for (i = 0; i < compiled_data->rewindables->rnum; i++)
817 {
818 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
819 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
820 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
821 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
822 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
823 }
824 ccv_array_clear(compiled_data->rewindables);
825 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
826}
827
828static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
829{
830 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
831 .type = CCV_CNNP_REWIND_TENSOR,
832 .tensor = symbol
833 };
834 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
835 ccv_array_push(rewind_symbols, &rewind_symbol);
836}
837
838static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
839{
840 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
841 .type = CCV_CNNP_REWIND_TENSOR,
842 .tensor = symbol
843 };
844 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
845 ccv_array_push(rewind_symbols, &rewind_symbol);
846}
847
848static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
849{
850 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
851 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
852 .graph_exec = symbol
853 };
854 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
855 ccv_array_push(rewind_symbols, &rewind_symbol);
856}
857
858static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
859{
860 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
861 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
862 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
863 int i;
864 for (i = 1; i < parallel_count; i++)
865 {
866 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
867 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
868 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
869 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
870 }
871}
872
873static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
874{
875 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 875, __extension__ __PRETTY_FUNCTION__); }))
;
876 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 876, __extension__ __PRETTY_FUNCTION__); }))
;
877 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
878 int i;
879 for (i = 1; i < parallel_count; i++)
880 {
881 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
882 if (copy_symbol.graph)
883 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
884 }
885 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
886 if (graph_exec_arena)
887 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
888 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
889 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
890 if (gradient_graph_exec_arena)
891 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
892}
893
894static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
895{
896 int this_parameter_flag = 0;
897 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
898 return this_parameter_flag;
899 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
900 int j, k;
901 // For no-op, we can preserve previous saved_aux_size.
902 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
903 {
904 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
905 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
906 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
907 // make sure some model parameters don't update if we don't want them to.
908 int old_saved_aux_size;
909 if (old_minimizer.cmd == CCV_NNC_NOOP)
910 {
911 int input_size;
912 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
913 if (input_size < 2) // This is not legit.
914 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
915 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
916 old_saved_aux_size = input_size - 2;
917 } else
918 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
919 if (old_saved_aux_size != saved_aux_size)
920 {
921 this_parameter_flag = 1;
922 if (saved_aux_size > old_saved_aux_size)
923 {
924 // Allocate new tensor symbols.
925 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
926 for (j = old_saved_aux_size; j < saved_aux_size; j++)
927 {
928 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
929 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
930 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
931 for (k = 1; k < parallel_count; k++)
932 {
933 ccv_nnc_tensor_param_t dev_info = info;
934 if (k != device_id)
935 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
936 else
937 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
938 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
939 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
940 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
941 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
942 }
943 }
944 } else {
945 for (j = saved_aux_size; j < old_saved_aux_size; j++)
946 {
947 for (k = 1; k < parallel_count; k++)
948 {
949 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
950 if (src_copy.d >= 0)
951 {
952 ccv_nnc_tensor_symbol_free(graph, src_copy);
953 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
954 }
955 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
956 if (dest_copy.d >= 0)
957 {
958 ccv_nnc_tensor_symbol_free(graph, dest_copy);
959 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
960 }
961 }
962 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
963 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
964 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
965 }
966 }
967 }
968 }
969 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
970 if (this_parameter_flag)
971 {
972 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
973 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
974 const int* inputs = 0;
975 int input_size = 0;
976 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
977 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 977, __extension__ __PRETTY_FUNCTION__)
; }))
;
978 update_inputs[0].d = inputs[0];
979 update_inputs[0].graph = graph;
980 update_inputs[1].d = inputs[1];
981 update_inputs[1].graph = graph;
982 update_outputs[0] = updated_parameters[parameter_indice];
983 for (j = 0; j < saved_aux_size; j++)
984 {
985 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
986 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
987 }
988 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
989 for (k = 1; k < parallel_count; k++)
990 {
991 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
992 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 992, __extension__ __PRETTY_FUNCTION__); }))
;
993 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
994 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 994, __extension__ __PRETTY_FUNCTION__)
; }))
;
995 update_inputs[0].d = inputs[0];
996 update_inputs[0].graph = graph;
997 update_inputs[1].d = inputs[1];
998 update_inputs[1].graph = graph;
999 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
1000 for (j = 0; j < saved_aux_size; j++)
1001 {
1002 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
1003 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
1004 }
1005 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
1006 }
1007 }
1008 return this_parameter_flag;
1009}
1010
1011typedef struct {
1012 int parameter_size;
1013 ccv_nnc_cmd_t minimizer;
1014 ccv_cnnp_model_io_t parameters[1];
1015} ccv_cnnp_set_minimizer_for_parameter_t;
1016
1017static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
1018{
1019 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1020 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1020, __extension__ __PRETTY_FUNCTION__); }))
;
1021 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1022 // We update all parameters, at this point, we have one minimizer.
1023 const int parameter_size = compiled_data->parameters->rnum;
1024 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
1025 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
1026 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 1026, __extension__ __PRETTY_FUNCTION__); }))
;
1027 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1028 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1028, __extension__ __PRETTY_FUNCTION__
); }))
;
1029 ccv_array_t* const parameters = compiled_data->minimize.parameters;
1030 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1031 int i, j, flag = 0;
1032 for (i = 0; i < parameters->rnum; i++)
1033 {
1034 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
1035 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
1036 {
1037 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
1038 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 1038, __extension__ __PRETTY_FUNCTION__
); }))
;
1039 const int old_rnum = parameter_indices->rnum;
1040 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
1041 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
1042 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 1042, __extension__ __PRETTY_FUNCTION__
); }))
;
1043 if (param_ref >= 0)
1044 {
1045 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1045, __extension__ __PRETTY_FUNCTION__
); }))
;
1046 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
1047 parameter_indices->rnum = old_rnum + 1;
1048 }
1049 }
1050 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1051 // We may have duplicated indices, but that is OK, we will set it twice.
1052 for (j = 0; j < parameter_indices->rnum; j++)
1053 {
1054 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1055 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1055, __extension__ __PRETTY_FUNCTION__
); }))
;
1056 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1057 flag = 1;
1058 }
1059 ccv_array_clear(parameter_indices);
1060 }
1061 ccv_array_free(parameter_indices);
1062 return flag;
1063}
1064
1065static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1066{
1067 if (new_saved_aux_size == old_saved_aux_size)
1068 return;
1069 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1069, __extension__ __PRETTY_FUNCTION__
); }))
;
1070 int i, j;
1071 for (i = parameter_size - 1; i >= 0; i--)
1072 {
1073 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1074 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1075 for (j = old_saved_aux_size - 1; j >= 0; j--)
1076 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1077 }
1078}
1079
1080static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1081{
1082 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1083 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1083, __extension__ __PRETTY_FUNCTION__); }))
;
1084 if (!compiled_data->rewindables)
1085 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1086 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1087 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1088 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1089}
1090
1091static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1092{
1093 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1094 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1094, __extension__ __PRETTY_FUNCTION__
); }))
;
1095 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1095, __extension__ __PRETTY_FUNCTION__
); }))
;
1096 const int evaluate_to_size = compiled_data->evaluate.to_size;
1097 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1097, __extension__ __PRETTY_FUNCTION__
); }))
;
1098 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1099 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1099, __extension__ __PRETTY_FUNCTION__
); }))
;
1100 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1101 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1102 int i, j;
1103 const int output_size = model->output_size;
1104 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1104, __extension__ __PRETTY_FUNCTION__
); }))
;
1105 if (fits)
1106 for (i = 0; i < output_size; i++)
1107 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1108 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1109 const int parameter_size = compiled_data->parameters->rnum;
1110 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1111 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1112 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1113 int parameter_size_maybe_more = parameter_size;
1114 compiled_data->disable_outgrad = disable_outgrad;
1115 int outgrad_size;
1116 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1117 outgrad_size = 0;
1118 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1119 outgrad_size = model->input_size;
1120 else {
1121 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1121, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1122 outgrad_size = 0;
1123 for (i = 0; i < model->input_size; i++)
1124 if (!(disable_outgrad & ((uint64_t)1 << i)))
1125 ++outgrad_size;
1126 }
1127 compiled_data->outgrad_size = outgrad_size;
1128 parameter_size_maybe_more += outgrad_size;
1129 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1130 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1131 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1132 compiled_data->backward.to_size = parameter_size_maybe_more;
1133 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1134 if (compiled_data->parameter_flags)
1135 {
1136 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1137 for (i = 0; i < parameter_size; i++)
1138 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1139 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1140 else
1141 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1142 }
1143 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1144 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1145 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1146 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1147 else { // Compute minimize with gradients including selected inputs.
1148 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1148, __extension__ __PRETTY_FUNCTION__
); }))
;
1149 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1149, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1150 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1150, __extension__ __PRETTY_FUNCTION__
); }))
;
1151 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1152 j = 0;
1153 for (i = 0; i < model->input_size; i++)
1154 if (!(disable_outgrad & ((uint64_t)1 << i)))
1155 outgrads[j++] = model->inputs[i];
1156 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1157 }
1158 if (compiled_data->parameter_flags)
1159 ccfreefree(parameters);
1160 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1161 if (compiled_data->minimize.parameters)
1162 _ccv_cnnp_apply_parameters_with_minimizer(model);
1163 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1164 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1165 for (i = 0; i < output_size; i++)
1166 {
1167 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1168 // Init this to 1 so we can backprop.
1169 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1170 }
1171 compiled_data->backward.to_size = 0;
1172 for (i = 0; i < parameter_size_maybe_more; i++)
1173 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1174 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1175 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1176 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1177 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1178 {
1179 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1180 continue;
1181 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1182 const int* tos;
1183 int to_size;
1184 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1185 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1186 {
1187 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1188 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1189 int flag = 0;
1190 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1191 for (j = i - 1; !flag && j >= 0; j--)
1192 if (j + outgrad_destination_start < destination_count)
1193 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1194 if (!flag) // Only if we cannot find it, we add it.
1195 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1196 }
1197 }
1198 if (parallel_count > 1)
1199 {
1200 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1201 0, 0,
1202 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1203 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1204 0, 0, 0,
1205 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1206 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1207 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1208 for (i = 0; i < evaluate_to_size; i++)
1209 for (j = 1; j < parallel_count; j++)
1210 {
1211 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1212 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1213 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1214 }
1215 const int backward_to_size = compiled_data->backward.to_size;
1216 for (i = 0; i < backward_to_size; i++)
1217 for (j = 1; j < parallel_count; j++)
1218 {
1219 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1220 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1221 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1222 }
1223 }
1224 // Only use memory compression if we are in gradient parameter mode.
1225 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1226 {
1227 if (model->memory_compression)
1228 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1229 if (model->memory_reduction)
1230 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1231 }
1232 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1233 compiled_data->gradient_mode = gradient_mode;
1234}
1235
1236void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1237{
1238 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1238, __extension__ __PRETTY_FUNCTION__
); }))
;
1239 const int parameter_size = compiled_data->parameters->rnum;
1240 const int parallel_count = _ccv_cnnp_model_effective_parallel_count(model);
1241 compiled_data->parallel_count = parallel_count;
1242 const int internal_size = compiled_data->internals->rnum;
1243 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1244 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1245 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1246 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1247}
1248
1249int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1250{
1251 int i, j;
1252 const int parameter_size = compiled_data->parameters->rnum;
1253 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1254 const int internal_size = compiled_data->internals->rnum;
1255 for (i = 0; i < parameter_size; i++)
1256 {
1257 // parameters has to be allocated all together.
1258 if (compiled_data->tensors.parameters[i])
1259 {
1260 for (j = 1; j < parallel_count; j++)
1261 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1261, __extension__ __PRETTY_FUNCTION__
); }))
; }
1262 continue;
1263 }
1264 return 1;
1265 }
1266 for (i = 0; i < internal_size; i++)
1267 {
1268 if (!compiled_data->tensors.internals[i])
1269 return 1;
1270 for (j = 1; j < parallel_count; j++)
1271 if (!compiled_data->tensors.internals[i + j * internal_size])
1272 return 1;
1273 }
1274 return 0;
1275}
1276
1277void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1278{
1279 int i, j;
1280 const int parameter_size = compiled_data->parameters->rnum;
1281 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1282 compiled_data->parallel_count = parallel_count;
1283 const int internal_size = compiled_data->internals->rnum;
1284 for (i = 0; i < parameter_size; i++)
1285 {
1286 // parameters has to be allocated all together.
1287 if (compiled_data->tensors.parameters[i])
1288 {
1289 for (j = 1; j < parallel_count; j++)
1290 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1290, __extension__ __PRETTY_FUNCTION__
); }))
; }
1291 continue;
1292 }
1293 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1294 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1295 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1296 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1297 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1298 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1299 for (j = 1; j < parallel_count; j++)
1300 {
1301 if (j != device_id)
1302 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1303 else
1304 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1305 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1306 }
1307 }
1308 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1309 for (i = 0; i < internal_size; i++)
1310 {
1311 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1312 const int d = retained.d;
1313 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1314 continue;
1315 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1316 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1317 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1318 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1319 if (!compiled_data->tensors.internals[i])
1320 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1321 for (j = 1; j < parallel_count; j++)
1322 {
1323 if (j != device_id)
1324 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1325 else
1326 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1327 if (!compiled_data->tensors.internals[i + j * internal_size])
1328 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1329 }
1330 }
1331 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1332}
1333
1334static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1335{
1336 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1337 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1338}
1339
1340static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1341{
1342 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1342, __extension__ __PRETTY_FUNCTION__
); }))
;
1343 int i, j;
1344 for (i = 0; i < tensor_size; i++)
1345 {
1346 if (!tensors[i])
1347 continue;
1348 const int d = tensor_symbols[i].d;
1349 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1350 continue;
1351 for (j = 1; j < parallel_count; j++)
1352 if (tensors[i + j * tensor_size])
1353 {
1354 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1355 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1356 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1357 }
1358 }
1359}
1360
1361static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1362{
1363 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1363, __extension__ __PRETTY_FUNCTION__
); }))
;
1364 int i, j;
1365 for (i = 0; i < tensor_size; i++)
1366 {
1367 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1368 for (j = 1; j < parallel_count; j++)
1369 {
1370 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1371 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1372 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1373 { // We shouldn't allocate this, free it up.
1374 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1375 tensors[i + j * tensor_size] = 0;
1376 }
1377 }
1378 }
1379}
1380
1381static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1382{
1383 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1383, __extension__ __PRETTY_FUNCTION__
); }))
;
1384 int i, j;
1385 for (i = 0; i < tensor_size; i++)
1386 {
1387 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1388 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1389 continue;
1390 if (graph)
1391 {
1392 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1393 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1394 tensor_symbol = alias_to;
1395 }
1396 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1397 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1398 {
1399 const ccv_nnc_tensor_bind_t retained_bind = {
1400 .symbol = tensor_symbol,
1401 .tensor = tensor
1402 };
1403 ccv_array_push(tensor_binds, &retained_bind);
1404 }
1405 for (j = 1; j < parallel_count; j++)
1406 {
1407 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1408 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1409 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1410 {
1411 const ccv_nnc_tensor_bind_t bind = {
1412 .symbol = copy,
1413 .tensor = tensors[i + j * tensor_size]
1414 };
1415 ccv_array_push(tensor_binds, &bind);
1416 }
1417 }
1418 }
1419}
1420
1421static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1422{
1423 if (compiled_data->graph)
1424 ccv_nnc_graph_free(compiled_data->graph);
1425 compiled_data->graph = 0;
1426 compiled_data->is_test = 0;
1427 if (compiled_data->tensor_arena)
1428 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1429 compiled_data->tensor_arena = 0;
1430 if (compiled_data->graph_exec_arena)
1431 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1432 compiled_data->graph_exec_arena = 0;
1433 if (compiled_data->backward.from_ops)
1434 ccfreefree(compiled_data->backward.from_ops);
1435 compiled_data->backward.from_ops = 0;
1436 if (compiled_data->evaluate.schedule)
1437 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1438 compiled_data->evaluate.schedule = 0;
1439 if (compiled_data->backward.schedule)
1440 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1441 compiled_data->backward.schedule = 0;
1442}
1443
1444static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1445{
1446 if (compiled_data->gradients)
1447 ccfreefree(compiled_data->gradients);
1448 compiled_data->gradients = 0;
1449 if (compiled_data->updated_parameters)
1450 ccfreefree(compiled_data->updated_parameters);
1451 compiled_data->updated_parameters = 0;
1452 compiled_data->update_nodes = 0;
1453 compiled_data->saved_aux = 0;
1454}
1455
1456static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1457{
1458 if (compiled_data->backward.gradients)
1459 ccfreefree(compiled_data->backward.gradients);
1460 compiled_data->backward.gradients = 0;
1461 if (compiled_data->backward.accum)
1462 ccv_nnc_graph_free(compiled_data->backward.accum);
1463 compiled_data->backward.accum = 0;
1464 if (compiled_data->backward.tensor_arena)
1465 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1466 compiled_data->backward.tensor_arena = 0;
1467 if (compiled_data->backward.graph_exec_arena)
1468 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1469 compiled_data->backward.graph_exec_arena = 0;
1470}
1471
1472static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1473{
1474 if (compiled_data->apply_gradients.graph)
1475 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1476 compiled_data->apply_gradients.graph = 0;
1477 if (compiled_data->apply_gradients.tensor_arena)
1478 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1479 compiled_data->apply_gradients.tensor_arena = 0;
1480 if (compiled_data->apply_gradients.graph_exec_arena)
1481 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1482 compiled_data->apply_gradients.graph_exec_arena = 0;
1483}
1484
1485// Compile the graph to run ccv_cnnp_model_fit
1486static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1487{
1488 int i, j;
1489 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1490 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1490, __extension__ __PRETTY_FUNCTION__
); }))
;
1491 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1492 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1493 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1493, __extension__ __PRETTY_FUNCTION__
); }))
;
1494 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1494
, __extension__ __PRETTY_FUNCTION__); }))
;
1495 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1495, __extension__ __PRETTY_FUNCTION__
); }))
;
1496 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1497 {
1498 _ccv_cnnp_model_set_rewindables(model);
1499 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1500 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1501 _ccv_cnnp_model_rewind_graph(model);
1502 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1503 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1504 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1505 }
1506 const int tensors_init = !!compiled_data->tensors_init.v;
1507 if (!tensors_init)
1508 _ccv_cnnp_model_tensors_init(model, compiled_data);
1509 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1510 // Check if it is not fully allocated, if it is not, init_1.
1511 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1512 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1513 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1513, __extension__ __PRETTY_FUNCTION__); }))
;
1514 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1514, __extension__ __PRETTY_FUNCTION__); }))
;
1515 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1515
, __extension__ __PRETTY_FUNCTION__); }))
;
1516 const int input_size_per_p = input_size / parallel_count;
1517 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1518 const int output_size_per_p = output_size / parallel_count;
1519 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1520 const int fit_size_per_p = fit_size / parallel_count;
1521 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1522 const int parameter_size = compiled_data->parameters->rnum;
1523 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1524 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1525 const int internal_size = compiled_data->internals->rnum;
1526 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1527 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1528 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1529 ccv_array_free(tensor_binds);
1530 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1531 if (tensors_init && parallel_count > 1)
1532 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1533 // If tensor is not init'ed, we need to init states first.
1534 if (_ccv_cnnp_any_to_init(compiled_data))
1535 {
1536 ccv_nnc_tensor_init_states_t tensor_init_states = {
1537 .parallel_count = parallel_count,
1538 .graph = model->graph,
1539 .compiled_data = compiled_data,
1540 .tensor_arena = compiled_data->tensor_arena
1541 };
1542 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1543 }
1544 compiled_data->is_test = 0;
1545 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1546 // No need to set because it is default to training mode.
1547 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1548 for (i = 0; i < saved_aux_size * parameter_size; i++)
1549 {
1550 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1551 continue;
1552 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1553 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1554 for (j = 1; j < parallel_count; j++)
1555 {
1556 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1557 if (copy)
1558 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1559 }
1560 }
1561 const int evaluate_to_size = compiled_data->evaluate.to_size;
1562 compiled_data->evaluate.to_op_size = 0;
1563 for (i = 0; i < evaluate_to_size; i++)
1564 {
1565 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1566 if (to.graph)
1567 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1568 }
1569 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1570 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1571}
1572
1573ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1574{
1575 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1576 if (!compiled_data || !compiled_data->graph)
1577 return 0;
1578 return ccv_nnc_graph_default_stream(compiled_data->graph);
1579}
1580
1581uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1582{
1583 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1584 if (!compiled_data || !compiled_data->tensor_arena)
1585 return 0;
1586 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1587}
1588
1589static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1590{
1591 int i, j;
1592 for (i = 0; i < tensor_size; i++)
1593 {
1594 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1595 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1596 continue;
1597 if (graph)
1598 {
1599 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1600 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1601 tensor_symbol = alias_to;
1602 }
1603 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1604 for (j = 1; j < parallel_count; j++)
1605 {
1606 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1607 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1608 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1609 }
1610 }
1611}
1612
1613void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1614{
1615 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1616 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1616, __extension__ __PRETTY_FUNCTION__); }))
;
1617 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1618 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1618, __extension__ __PRETTY_FUNCTION__
); }))
;
1619 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1619, __extension__ __PRETTY_FUNCTION__
); }))
;
1620 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1620
, __extension__ __PRETTY_FUNCTION__); }))
;
1621 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1621, __extension__ __PRETTY_FUNCTION__); }))
;
1622 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1623 {
1624 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1625 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1626 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1627 // Compile the symbolic graph down only when needed.
1628 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1629 } else {
1630 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1630, __extension__ __PRETTY_FUNCTION__); }))
;
1631 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1631, __extension__ __PRETTY_FUNCTION__); }))
;
1632 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1632
, __extension__ __PRETTY_FUNCTION__); }))
;
1633 const int input_size_per_p = input_size / parallel_count;
1634 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1635 const int output_size_per_p = output_size / parallel_count;
1636 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1637 const int fit_size_per_p = fit_size / parallel_count;
1638 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1639 }
1640 if (compiled_data->is_test)
1641 {
1642 compiled_data->is_test = 0;
1643 ccv_nnc_graph_exec_update_t update = {
1644 .parallel_count = parallel_count,
1645 .graph = model->graph,
1646 .graph_exec_arena = compiled_data->graph_exec_arena,
1647 };
1648 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1649 }
1650 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1651}
1652
1653// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1654static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1655{
1656 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1657 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1658 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1659 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1659, __extension__ __PRETTY_FUNCTION__
); }))
;
1660 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1660, __extension__ __PRETTY_FUNCTION__
); }))
;
1661 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1662 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1663 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1664 {
1665 const int evaluate_to_size = compiled_data->evaluate.to_size;
1666 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1667 _ccv_cnnp_model_set_rewindables(model);
1668 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1669 0, 0,
1670 0, 0, 0,
1671 0, 0, 0,
1672 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1673 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1674 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1675 int i, j;
1676 for (i = 0; i < evaluate_to_size; i++)
1677 for (j = 1; j < parallel_count; j++)
1678 {
1679 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1680 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1681 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1682 }
1683 }
1684 const int tensors_init = !!compiled_data->tensors_init.v;
1685 if (!tensors_init)
1686 _ccv_cnnp_model_tensors_init(model, compiled_data);
1687 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1688 // Check if it is not fully allocated, if it is not, init_1.
1689 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1690 const int tensor_parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1691 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1692 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1692, __extension__ __PRETTY_FUNCTION__); }))
;
1693 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1693, __extension__ __PRETTY_FUNCTION__); }))
;
1694 const int input_size_per_p = input_size / parallel_count;
1695 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1696 const int output_size_per_p = output_size / parallel_count;
1697 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1698 const int parameter_size = compiled_data->parameters->rnum;
1699 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, tensor_parallel_count, tensor_binds);
1700 const int internal_size = compiled_data->internals->rnum;
1701 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count);
1702 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count, tensor_binds);
1703 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1704 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1705 ccv_array_free(tensor_binds);
1706 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1707 // If tensor is not init'ed, we need to init states first.
1708 if (tensors_init && tensor_parallel_count > 1)
1709 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, tensor_parallel_count);
1710 if (_ccv_cnnp_any_to_init(compiled_data))
1711 {
1712 ccv_nnc_tensor_init_states_t tensor_init_states = {
1713 .parallel_count = tensor_parallel_count,
1714 .graph = model->graph,
1715 .compiled_data = compiled_data,
1716 .tensor_arena = compiled_data->tensor_arena
1717 };
1718 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1719 }
1720 compiled_data->is_test = 1;
1721 ccv_nnc_graph_exec_update_t update = {
1722 .parallel_count = parallel_count,
1723 .graph = model->graph,
1724 .graph_exec_arena = compiled_data->graph_exec_arena,
1725 };
1726 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1727 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1728 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1729}
1730
1731static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1732{
1733 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1733, __extension__ __PRETTY_FUNCTION__
); }))
;
1734 const int parameter_size = compiled_data->parameters->rnum;
1735 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1736 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1737 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1738 int i, j;
1739 for (i = 0; i < parameter_size; i++)
1740 {
1741 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1742 {
1743 compiled_data->tensors.gradients[i] = 0;
1744 compiled_data->tensors.accum_gradients[i] = 0;
1745 for (j = 1; j < parallel_count; j++)
1746 {
1747 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1748 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1749 }
1750 continue;
1751 }
1752 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1753 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1754 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1755 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1756 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1757 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1758 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1759 for (j = 1; j < parallel_count; j++)
1760 {
1761 if (j != device_id)
1762 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1763 else
1764 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1765 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1766 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1767 }
1768 }
1769}
1770
1771static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1772{
1773 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1774 return 1;
1775 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1776 return 0;
1777 int i;
1778 for (i = 0; i < input_size; i++)
1779 if (!(disable_outgrad & ((uint64_t)1 << i)))
1780 return 0;
1781 return 1;
1782}
1783
1784// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1785// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1786static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1787{
1788 int i, j;
1789 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1790 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1791 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1791, __extension__ __PRETTY_FUNCTION__
); }))
;
1792 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1793 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1794 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1794, __extension__ __PRETTY_FUNCTION__
); }))
;
1795 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1795, __extension__ __PRETTY_FUNCTION__
); }))
;
1796 // There shouldn't be a loss function if we evaluate with multistage jit.
1797 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1797, __extension__ __PRETTY_FUNCTION__
); }))
;
1798 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1799 {
1800 _ccv_cnnp_model_set_rewindables(model);
1801 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1802 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1803 _ccv_cnnp_model_rewind_graph(model);
1804 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1805 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1806 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1807 }
1808 const int tensors_init = !!compiled_data->tensors_init.v;
1809 if (!tensors_init)
1810 _ccv_cnnp_model_tensors_init(model, compiled_data);
1811 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1812 // Check if it is not fully allocated, if it is not, init_1.
1813 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1814 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1815 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1815, __extension__ __PRETTY_FUNCTION__); }))
;
1816 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1816, __extension__ __PRETTY_FUNCTION__); }))
;
1817 const int input_size_per_p = input_size / parallel_count;
1818 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1819 const int output_size_per_p = output_size / parallel_count;
1820 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1821 const int parameter_size = compiled_data->parameters->rnum;
1822 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1823 const int internal_size = compiled_data->internals->rnum;
1824 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1825 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1826 if (!compiled_data->tensors.gradients)
1827 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1828 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1829 if (compiled_data->backward.to_size > 0)
1830 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1831 else
1832 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1833 ccv_array_free(tensor_binds);
1834 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1835 if (tensors_init && parallel_count > 1)
1836 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1837 // If tensor is not init'ed, we need to init states first.
1838 if (_ccv_cnnp_any_to_init(compiled_data))
1839 {
1840 ccv_nnc_tensor_init_states_t tensor_init_states = {
1841 .parallel_count = parallel_count,
1842 .graph = model->graph,
1843 .compiled_data = compiled_data,
1844 .tensor_arena = compiled_data->tensor_arena
1845 };
1846 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1847 }
1848 compiled_data->is_test = is_test;
1849 ccv_nnc_graph_exec_update_t update = {
1850 .parallel_count = parallel_count,
1851 .graph = model->graph,
1852 .graph_exec_arena = compiled_data->graph_exec_arena,
1853 };
1854 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1855 const int evaluate_to_size = compiled_data->evaluate.to_size;
1856 compiled_data->evaluate.to_op_size = 0;
1857 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1858 for (i = 0; i < evaluate_to_size; i++)
1859 {
1860 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1861 if (to_op.graph)
1862 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1863 const int* tos;
1864 int to_size;
1865 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1866 for (j = 0; j < to_size; j++)
1867 {
1868 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1869 .d = tos[j],
1870 .graph = model->graph
1871 });
1872 if (to_op.graph)
1873 ccv_array_add_unique_int(backward_from, to_op.d);
1874 }
1875 }
1876 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1876, __extension__
__PRETTY_FUNCTION__); }))
;
1877 compiled_data->backward.from_op_size = backward_from->rnum;
1878 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1879 for (i = 0; i < backward_from->rnum; i++)
1880 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1881 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1882 .graph = compiled_data->graph,
1883 };
1884 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1885 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1886 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1887 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1888 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1889 const int source_size = compiled_data->graph->sources->rnum;
1890 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1890, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1891 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1892 visited[(idx >> 5)] |= (1u << (idx & 31));
1893 } ccv_nnc_graph_visit_endfor} }
1894 ccv_nnc_graph_visit_free(visit);
1895 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1896 const int destination_size = compiled_data->graph->destinations->rnum;
1897 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1897, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1897, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1898 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1899 visited[(idx >> 5)] |= (1u << (idx & 31));
1900 } ccv_nnc_graph_visit_endfor} }
1901 ccv_nnc_graph_visit_free(visit);
1902 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1903 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1904 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1905 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1906 {
1907 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1907, __extension__ __PRETTY_FUNCTION__
); }))
;
1908 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1909 ccv_array_add_unique_int(backward_from, idx);
1910 }
1911 } ccv_nnc_graph_visit_endfor} }
1912 ccv_nnc_graph_visit_free(visit);
1913 ccfreefree(visited);
1914 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1915 {
1916 compiled_data->backward.from_op_size = backward_from->rnum;
1917 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1918 for (i = 0; i < backward_from->rnum; i++)
1919 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1920 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1921 .graph = compiled_data->graph,
1922 };
1923 }
1924 ccv_array_free(backward_from);
1925 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1926 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1927}
1928
1929void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1930{
1931 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1932 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1932, __extension__ __PRETTY_FUNCTION__); }))
;
1933 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1934 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1934, __extension__ __PRETTY_FUNCTION__
); }))
;
1935 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1935, __extension__ __PRETTY_FUNCTION__
); }))
;
1936 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1936, __extension__ __PRETTY_FUNCTION__); }))
;
1937 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1938 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1939 if (!compiled_data->graph || mode_mismatch)
1940 {
1941 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1942 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1943 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1944 if (params.requires_grad)
1945 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1946 else
1947 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1948 } else {
1949 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1950 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1950, __extension__ __PRETTY_FUNCTION__); }))
;
1951 const int input_size_per_p = input_size / parallel_count;
1952 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1953 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1953, __extension__ __PRETTY_FUNCTION__); }))
;
1954 const int output_size_per_p = output_size / parallel_count;
1955 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1956 }
1957 if (compiled_data->is_test != params.is_test)
1958 {
1959 compiled_data->is_test = params.is_test;
1960 ccv_nnc_graph_exec_update_t update = {
1961 .parallel_count = parallel_count,
1962 .graph = model->graph,
1963 .graph_exec_arena = compiled_data->graph_exec_arena,
1964 };
1965 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1966 }
1967}
1968
1969void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1970{
1971 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1972 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1972, __extension__ __PRETTY_FUNCTION__); }))
;
1973 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1974 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1975 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1976 else {
1977 if (!compiled_data->evaluate.schedule)
1978 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1979 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1980 }
1981}
1982
1983// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1984// Particularly, this method compiles the accumulator graph.
1985static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1986{
1987 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1988 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1988, __extension__ __PRETTY_FUNCTION__); }))
;
1989 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1989, __extension__ __PRETTY_FUNCTION__
); }))
;
1990 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1991 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1992 const int parameter_size = compiled_data->parameters->rnum;
1993 int i, j;
1994 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1995 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1996 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1997 for (i = 0; i < parameter_size; i++)
1998 for (j = 0; j < parallel_count; j++)
1999 if (compiled_data->tensors.gradients[i + j * parameter_size])
2000 {
2001 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
2002 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
2003 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
2004 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2005 ccv_nnc_tensor_symbol_t inputs[2];
2006 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2007 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2008 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2009 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
2010 } else {
2011 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2012 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2013 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2014 }
2015 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
2016 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
2017 {
2018 ccv_nnc_symbolic_graph_free(accum);
2019 // Create empty graph.
2020 compiled_data->backward.accum = ccv_nnc_graph_new();
2021 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
2022 return;
2023 }
2024 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2025 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2026 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
2027 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2028 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
2029 ccv_nnc_symbolic_graph_free(accum);
2030 ccv_array_free(tensor_binds);
2031 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
2032}
2033
2034void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
2035{
2036 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2037 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2037, __extension__ __PRETTY_FUNCTION__); }))
;
2038 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2038, __extension__ __PRETTY_FUNCTION__
); }))
;
2039 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2040 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 2040, __extension__ __PRETTY_FUNCTION__
); }))
;
2041 if (outgrad_size > 0)
2042 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 2042, __extension__ __PRETTY_FUNCTION__
); }))
; }
2043 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2043, __extension__ __PRETTY_FUNCTION__); }))
;
2044 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2044, __extension__ __PRETTY_FUNCTION__
); }))
;
2045 const int parameter_size = compiled_data->parameters->rnum;
2046 // If we need to accumulate the gradients now, do jit on accumulator.
2047 if (compiled_data->backward.count > 0)
2048 {
2049 if (!compiled_data->backward.accum)
2050 _ccv_cnnp_model_multistage_jit_1(model);
2051 else if (compiled_data->backward.count == 1) {
2052 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2053 int i;
2054 for (i = 0; i < parameter_size * parallel_count; i++)
2055 {
2056 ccv_nnc_tensor_t* tensor;
2057 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2058 }
2059 if (compiled_data->backward.tensor_arena)
2060 {
2061 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2062 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2063 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2064 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2065 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2066 }
2067 }
2068 }
2069 const int ingrad_size_per_p = model->output_size;
2070 const int outgrad_size_per_p = compiled_data->outgrad_size;
2071 int i, j;
2072 for (i = 0; i < ingrad_size_per_p; i++)
2073 {
2074 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2075 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2076 {
2077 // Set it to 1 if it is not specified.
2078 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2079 if (ingrad_tensor)
2080 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2081 for (j = 1; j < parallel_count; j++)
2082 {
2083 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2084 if (ingrad_tensor)
2085 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2086 }
2087 } else {
2088 // Make sure the length matches, in case it is an alias.
2089 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2089, __extension__ __PRETTY_FUNCTION__
); }))
;
2090 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2091 for (j = 1; j < parallel_count; j++)
2092 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2093 }
2094 }
2095 if (outgrad_size > 0)
2096 {
2097 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2097, __extension__ __PRETTY_FUNCTION__
); }))
;
2098 for (i = 0; i < outgrad_size_per_p; i++)
2099 if (outgrads[i])
2100 {
2101 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2102 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2103 for (j = 1; j < parallel_count; j++)
2104 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2105 }
2106 } else {
2107 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2108, __extension__ __PRETTY_FUNCTION__
); }))
2108 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2108, __extension__ __PRETTY_FUNCTION__
); }))
;
2109 }
2110 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2111 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2112 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2113 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2114 if (!compiled_data->backward.schedule)
2115 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2116 // Run the backward pass.
2117 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2118 // If we need to run accumulation round, do that now.
2119 if (compiled_data->backward.count > 0)
2120 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2121 // Update the count, this determines whether we need to accumulate or not.
2122 ++compiled_data->backward.count;
2123}
2124
2125// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2126// Particularly, this method compiles the parameter update graph.
2127static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2128{
2129 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2130 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2130, __extension__ __PRETTY_FUNCTION__
); }))
;
2131 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2132 const int parameter_size = compiled_data->parameters->rnum;
2133 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2134 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2135 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2136 // Bind accumulated gradients.
2137 if (compiled_data->backward.count > 1)
2138 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2139 else
2140 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2141 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2142 int i, j;
2143 for (i = 0; i < compiled_data->backward.to_size; i++)
2144 {
2145 const int* tos;
2146 int to_size;
2147 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2148 for (j = 0; j < to_size; j++)
2149 {
2150 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2151 // gradients graph.
2152 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2153 .d = tos[j],
2154 .graph = model->graph,
2155 });
2156 if (!exec.graph)
2157 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2158 }
2159 }
2160 const int from_size = apply_gradients_from->rnum;
2161 if (from_size == 0)
2162 {
2163 ccv_array_free(apply_gradients_from);
2164 ccv_array_free(tensor_binds);
2165 return;
2166 }
2167 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2168 for (i = 0; i < from_size; i++)
2169 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2170 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2171 .graph = model->graph
2172 };
2173 ccv_array_free(apply_gradients_from);
2174 // It can only ends with updates on the parameters.
2175 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2176 for (i = 0; i < parameter_size; i++)
2177 {
2178 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2179 continue;
2180 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2181 for (j = 1; j < parallel_count; j++)
2182 {
2183 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2184 ccv_array_push(tos, &copy);
2185 }
2186 }
2187 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2188 ccv_array_free(tos);
2189 ccv_array_free(tensor_binds);
2190 ccfreefree(froms);
2191 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2192 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2193 {
2194 // Skip on no tensor.
2195 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2196 continue;
2197 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2198 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2199 for (j = 1; j < parallel_count; j++)
2200 {
2201 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2202 if (copy)
2203 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2204 }
2205 }
2206 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2207}
2208
2209void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2210{
2211 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2212 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2212, __extension__ __PRETTY_FUNCTION__); }))
;
2213 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2213, __extension__ __PRETTY_FUNCTION__
); }))
;
2214 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2215 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2215, __extension__ __PRETTY_FUNCTION__); }))
;
2216 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2216, __extension__ __PRETTY_FUNCTION__
); }))
;
2217 // Skip if there is no backward pass.
2218 if (compiled_data->backward.count <= 0)
2219 return;
2220 // Skip if there is no parameters.
2221 if (compiled_data->parameters->rnum == 0)
2222 {
2223 compiled_data->backward.count = 0;
2224 return;
2225 }
2226 if (!compiled_data->apply_gradients.graph)
2227 _ccv_cnnp_model_multistage_jit_2(model);
2228 else {
2229 const int parameter_size = compiled_data->parameters->rnum;
2230 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2231 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2232 if (compiled_data->backward.count > 1)
2233 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2234 else
2235 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2236 }
2237 if (compiled_data->apply_gradients.graph)
2238 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2239 // Reset backward count to 0.
2240 compiled_data->backward.count = 0;
2241}
2242
2243void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2244{
2245 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2246 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2247 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2247, __extension__ __PRETTY_FUNCTION__
); }))
;
2248 const int tensors_init = !!compiled_data->tensors_init.v;
2249 int this_tensor_init = tensors_init;
2250 if (!tensors_init)
2251 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2252 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2253 // Check if it is not fully allocated, if it is not, init_1.
2254 this_tensor_init = 0;
2255 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2256 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2257 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2258 if (param_ref < 0)
2259 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2259
, __extension__ __PRETTY_FUNCTION__); }))
; }
2260 else
2261 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2261, __extension__ __PRETTY_FUNCTION__
); }))
; }
2262 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2263 ccv_array_free(parameter_indices);
2264 const int parameter_size = compiled_data->parameters->rnum;
2265 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2265
, __extension__ __PRETTY_FUNCTION__); }))
;
2266 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2266, __extension__ __PRETTY_FUNCTION__
); }))
;
2267 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
2268 int i;
2269 if (!this_tensor_init)
2270 {
2271 if (compiled_data->tensors.parameters[d])
2272 {
2273 for (i = 1; i < parallel_count; i++)
2274 { assert(compiled_data->tensors.parameters[d + i * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[d + i *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[d + i * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[d + i * parameter_size]"
, "ccv_cnnp_model.c", 2274, __extension__ __PRETTY_FUNCTION__
); }))
; }
2275 this_tensor_init = 1;
2276 } else {
2277 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
;
2278 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2279 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2280 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2281 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2282 compiled_data->tensors.parameters[d] = ccv_nnc_tensor_new(0, info, 0);
2283 for (i = 1; i < parallel_count; i++)
2284 {
2285 if (i != device_id)
2286 CCV_TENSOR_SET_DEVICE_ID(info.type, i)(info.type) = (((info.type) & ~0xfff00) | (((i) & 0xfff
) << 8))
;
2287 else
2288 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2289 compiled_data->tensors.parameters[d + i * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2290 }
2291 }
2292 }
2293 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2294 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2294, __extension__
__PRETTY_FUNCTION__); }))
;
2295 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2296 for (i = 1; i < parallel_count; i++)
2297 {
2298 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2299 if (copy_tensor)
2300 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2301 }
2302 // Mark this symbol as init'ed.
2303 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2304 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2305 init_v[s >> 5] |= (1u << (s & 0x1f));
2306 // If we just allocated this tensor, now it is time to check if we need to mark it as fully allocated.
2307 if (!this_tensor_init)
2308 {
2309 if (ccv_cnnp_model_tensors_any_to_alloc(model, compiled_data))
2310 compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)compiled_data->tensors_init.v | (uintptr_t)1);
2311 else // Remove the flag.
2312 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2313 }
2314}
2315
2316void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2317{
2318 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2319 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2320 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2320, __extension__ __PRETTY_FUNCTION__
); }))
;
2321 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2321, __extension__ __PRETTY_FUNCTION__
); }))
;
2322 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2323 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2324 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2325 if (param_ref < 0)
2326 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2326
, __extension__ __PRETTY_FUNCTION__); }))
; }
2327 else
2328 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2328, __extension__ __PRETTY_FUNCTION__
); }))
; }
2329 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2330 ccv_array_free(parameter_indices);
2331 const int parameter_size = compiled_data->parameters->rnum;
2332 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2332
, __extension__ __PRETTY_FUNCTION__); }))
;
2333 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2333, __extension__ __PRETTY_FUNCTION__
); }))
;
2334 // We don't need to consider parallel_count, every parameter on each device is identical.
2335 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2336 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2336, __extension__
__PRETTY_FUNCTION__); }))
;
2337 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2338}
2339
2340ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2341{
2342 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2343 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2344 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2344, __extension__ __PRETTY_FUNCTION__
); }))
;
2345 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2345, __extension__ __PRETTY_FUNCTION__
); }))
;
2346 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2347 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2348 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2349 if (param_ref < 0)
2350 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2350
, __extension__ __PRETTY_FUNCTION__); }))
; }
2351 else
2352 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2352, __extension__ __PRETTY_FUNCTION__
); }))
; }
2353 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2354 ccv_array_free(parameter_indices);
2355 const int parameter_size = compiled_data->parameters->rnum;
2356 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2356
, __extension__ __PRETTY_FUNCTION__); }))
;
2357 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2357, __extension__ __PRETTY_FUNCTION__
); }))
;
2358 // We don't need to consider parallel_count, every parameter on each device is identical.
2359 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2360 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2360, __extension__
__PRETTY_FUNCTION__); }))
;
2361 return tensor->info;
2362}
2363
2364const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2365{
2366 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2367 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2368 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2368, __extension__ __PRETTY_FUNCTION__
); }))
;
2369 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2370 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2371 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2372 if (param_ref < 0)
2373 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2373
, __extension__ __PRETTY_FUNCTION__); }))
; }
2374 else
2375 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2375, __extension__ __PRETTY_FUNCTION__
); }))
; }
2376 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2377 ccv_array_free(parameter_indices);
2378 const int parameter_size = compiled_data->parameters->rnum;
2379 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2379
, __extension__ __PRETTY_FUNCTION__); }))
;
2380 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2380, __extension__ __PRETTY_FUNCTION__
); }))
;
2381 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2382}
2383
2384int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2385{
2386 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2386, __extension__ __PRETTY_FUNCTION__
); }))
;
2387 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2388 return compiled_data->parameters->rnum;
2389}
2390
2391uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2392{
2393 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2393, __extension__ __PRETTY_FUNCTION__
); }))
;
2394 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2395 const int parameter_size = compiled_data->parameters->rnum;
2396 int i;
2397 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2398 uint64_t size = 0;
2399 const int tensors_init = !!compiled_data->tensors_init.v;
2400 uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
: 0;
2401 for (i = 0; i < parameter_size; i++)
2402 {
2403 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2404 if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i])
2405 {
2406 ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info;
2407 size += ccv_nnc_tensor_data_size(params);
2408 continue;
2409 }
2410 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2411 .graph = graph,
2412 .d = d
2413 });
2414 size += ccv_nnc_tensor_data_size(params);
2415 }
2416 return size;
2417}
2418
2419int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type)
2420{
2421 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2421, __extension__ __PRETTY_FUNCTION__
); }))
;
2422 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2423 if (count != compiled_data->parameters->rnum)
2424 return 0;
2425 if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2426 CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) <<
8))
;
2427 int i;
2428 // We don't need to consider parallel_count, every parameter on each device is identical.
2429 for (i = 0; i < count; i++)
2430 {
2431 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2432 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2433 {
2434 tensors[i] = 0;
2435 continue;
2436 }
2437 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2438 if (tensor->info.type == type)
2439 tensors[i] = tensor;
2440 else {
2441 ccv_nnc_tensor_param_t info = tensor->info;
2442 info.type = type;
2443 tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet.
2444 }
2445 }
2446 for (i = 0; i < count; i++)
2447 {
2448 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2449 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2450 continue;
2451 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2452 // Now initiate transfer. We should do this one on a stream.
2453 if (tensor->info.type != type)
2454 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2455 }
2456 // Copy names and remove parameters.
2457 for (i = 0; i < count; i++)
2458 {
2459 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i];
2460 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2461 {
2462 names[i] = 0;
2463 continue;
2464 }
2465 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2466 const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof
(1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2467 names[i] = ccmallocmalloc(name_len + 1);
2468 names[i][name_len] = 0;
2469 memcpy(names[i], name, name_len);
2470 if (tensor->info.type == type)
2471 compiled_data->tensors.parameters[i] = 0; // Only move when it is moved.
2472 }
2473 return 1;
2474}
2475
2476KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null, which participates in a condition later
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Assuming 'new_keys' is non-null, which participates in a condition later
39
Taking false branch
40
Taking true branch
41
Storing uninitialized value
42
Assuming 'new_vals' is non-null, which participates in a condition later
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2476
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Taking true branch
58
Assuming the condition is true
59
Assuming the condition is true
60
The value 1 is assigned to 'i'
61
Taking false branch
62
Assuming the condition is false
63
Assuming the condition is false
64
'?' condition is false
65
Returning the value 1
2477
2478void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates)
2479{
2480 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2480, __extension__ __PRETTY_FUNCTION__
); }))
;
2481 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2482 int i;
2483 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2484 if (count != compiled_data->parameters->rnum)
2485 {
2486 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2487 // Build the map between name and the index.
2488 for (i = 0; i < count; i++)
2489 {
2490 int ret;
2491 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret);
2492 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2492
, __extension__ __PRETTY_FUNCTION__); }))
;
2493 kh_val(id_map, k)((id_map)->vals[k]) = i;
2494 }
2495 }
2496 const int parameter_size = compiled_data->parameters->rnum;
2497 int* copy_back = 0;
2498 const int tensors_init = !!compiled_data->tensors_init.v;
2499 if (!tensors_init)
2500 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2501 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2502 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2503 for (i = 0; i < parameter_size; i++)
2504 {
2505 int j = i;
2506 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2507 if (i >= 0 || strncmp(name, names[i], 1023) != 0)
2508 {
2509 // Build the map.
2510 if (id_map == 0)
2511 {
2512 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2513 for (j = 0; j < count; j++)
2514 {
2515 int ret;
2516 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret);
2517 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2517
, __extension__ __PRETTY_FUNCTION__); }))
;
2518 kh_val(id_map, k)((id_map)->vals[k]) = j;
2519 }
2520 }
2521 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name);
2522 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2523 continue;
2524 j = kh_val(id_map, k)((id_map)->vals[k]);
2525 }
2526 if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read.
2527 { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters
[i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t
)compiled_data->tensors.parameters[i] & (uintptr_t)1))
; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)"
, "ccv_cnnp_model.c", 2527, __extension__ __PRETTY_FUNCTION__
); }))
; }
2528 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
2529 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2530 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2531 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2532 const int d = parameter.d;
2533 if (info.type == tensors[j]->info.type && invalidates) // Can move.
2534 {
2535 // Deallocate it if needed.
2536 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2537 if (compiled_data->tensors.parameters[i])
2538 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2539 compiled_data->tensors.parameters[i] = tensors[j];
2540 tensors[j] = 0;
2541 } else {
2542 if (!compiled_data->tensors.parameters[i])
2543 { // Not allocated, to allocate first.
2544 // Create new one, make sure we create this by having the right parameters.
2545 const int type = info.type;
2546 info = tensors[j]->info;
2547 info.type = type; // Revert back the type.
2548 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
2549 }
2550 if (!copy_back)
2551 copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int));
2552 copy_back[i] = j + 1;
2553 }
2554 init_v[d >> 5] |= (1u << (d & 0x1f));
2555 // Create this tensor for other data parallel allocations.
2556 info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
2557 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2558 for (j = 1; j < parallel_count; j++)
2559 if (!compiled_data->tensors.parameters[i + j * parameter_size])
2560 {
2561 if (j != device_id)
2562 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
2563 else
2564 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2565 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2566 }
2567 // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
2568 }
2569 if (id_map)
2570 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2571 // Now do the transfer.
2572 if (copy_back)
2573 {
2574 for (i = 0; i < parameter_size; i++)
2575 {
2576 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[i]) & ~(uintptr_t)1))
;
2577 if (copy_back[i] == 0)
2578 continue;
2579 const int j = copy_back[i] - 1;
2580 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2581 }
2582 ccfreefree(copy_back);
2583 }
2584}
2585
2586ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2587{
2588 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2589 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2589, __extension__ __PRETTY_FUNCTION__); }))
;
2590 const int parameter_size = compiled_data->parameters->rnum;
2591 int i;
2592 for (i = 0; i < parameter_size; i++)
2593 {
2594 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2595 if (first(model, name, context))
2596 return ccv_cnnp_model_parameters(model, -1, i);
2597 }
2598 return 0;
2599}
2600
2601ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2602{
2603 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2604 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2604, __extension__ __PRETTY_FUNCTION__); }))
;
2605 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2606 const int parameter_size = compiled_data->parameters->rnum;
2607 int i;
2608 for (i = 0; i < parameter_size; i++)
2609 {
2610 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2611 if (filter(model, name, context))
2612 {
2613 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2614 ccv_array_push(parameters, &parameter);
2615 }
2616 }
2617 return parameters;
2618
2619}
2620
2621CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2622{
2623 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2624 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2624, __extension__ __PRETTY_FUNCTION__); }))
;
2625 const int tensors_init = !!compiled_data->tensors_init.v;
2626 if (!tensors_init) // If nothing initialized, we return parameter 0.
2627 return ccv_cnnp_model_parameters(model, -1, 0);
2628 const int parameter_size = compiled_data->parameters->rnum;
2629 int i;
2630 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2631 for (i = 0; i < parameter_size; i++)
2632 {
2633 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2634 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2635 return ccv_cnnp_model_parameters(model, -1, i);
2636 }
2637 return 0;
2638}
2639
2640static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2641{
2642 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2643 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2643, __extension__
__PRETTY_FUNCTION__); }))
;
2644 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2645 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2646 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2647 return to_parameter_indices;
2648}
2649
2650static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2651{
2652 // If the model is not compiled yet. Compile them now.
2653 if (!model->graph)
2654 {
2655 model->graph = ccv_nnc_symbolic_graph_new();
2656 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2656, __extension__ __PRETTY_FUNCTION__
); }))
;
2657 const int input_size = from_model->input_size;
2658 ccv_nnc_tensor_param_t input_params[input_size];
2659 int i;
2660 for (i = 0; i < input_size; i++)
2661 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2662 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2663 model->parallel_count = from_model->parallel_count;
2664 model->memory_compression = from_model->memory_compression;
2665 model->memory_reduction = from_model->memory_reduction;
2666 model->gradient_checkpointing = from_model->gradient_checkpointing;
2667 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2668 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2669 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2670 }
2671 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2672 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2672, __extension__ __PRETTY_FUNCTION__
); }))
;
2673 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2674 if (!to_tensors_init)
2675 {
2676 if (only_init_0)
2677 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2678 else
2679 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2680 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2681 // Check if it is not fully allocated, if it is not, init_1.
2682 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2683 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2683, __extension__ __PRETTY_FUNCTION__
); }))
;
2684 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2685 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2686 if (*from_param_ref < 0 && *param_ref >= 0)
2687 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2687, __extension__ __PRETTY_FUNCTION__
); }))
; }
2688 else if (*from_param_ref >= 0)
2689 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2689, __extension__ __PRETTY_FUNCTION__
); }))
; }
2690 if (*param_ref < 0 && *from_param_ref >= 0)
2691 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2691, __extension__ __PRETTY_FUNCTION__); }))
; }
2692 else if (*param_ref >= 0)
2693 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2693, __extension__ __PRETTY_FUNCTION__
); }))
; }
2694}
2695
2696void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2697{
2698 ccv_array_t* to_parameter_indices;
2699 int to_param_ref;
2700 ccv_array_t* from_parameter_indices;
2701 int from_param_ref;
2702 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2703 // Should be exactly the same tensor.
2704 if (to_param_ref < 0 && from_param_ref < 0)
2705 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2705, __extension__ __PRETTY_FUNCTION__
); }))
; }
2706 // To models.
2707 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2708 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2708, __extension__ __PRETTY_FUNCTION__
); }))
;
2709 // From models.
2710 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2711 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2712 const int to_parameter_size = to_compiled_data->parameters->rnum;
2713 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2714 int i, j;
2715 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2716 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2717 for (i = 0; i < rnum; i++)
2718 {
2719 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2720 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2720, __extension__ __PRETTY_FUNCTION__); }))
;
2721 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2721, __extension__ __PRETTY_FUNCTION__
); }))
;
2722 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2723 // If the original is not init'ed. We cannot copy from.
2724 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2725 continue;
2726 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2727 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2727, __extension__ __PRETTY_FUNCTION__); }))
;
2728 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2728, __extension__ __PRETTY_FUNCTION__
); }))
;
2729 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2730 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2730, __extension__
__PRETTY_FUNCTION__); }))
;
2731 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2732 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2732, __extension__
__PRETTY_FUNCTION__); }))
;
2733 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2734 for (j = 1; j < parallel_count; j++)
2735 {
2736 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2737 if (copy_tensor)
2738 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2739 }
2740 // Mark this symbol as init'ed.
2741 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2742 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2743 }
2744 ccv_array_free(to_parameter_indices);
2745 ccv_array_free(from_parameter_indices);
2746}
2747
2748void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2749{
2750 ccv_array_t* to_parameter_indices;
2751 int to_param_ref;
2752 ccv_array_t* from_parameter_indices;
2753 int from_param_ref;
2754 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2755 // Should be exactly the same tensor.
2756 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2757 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2757, __extension__ __PRETTY_FUNCTION__
); }))
; }
2758 // To models.
2759 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2760 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2760, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2761 // From models.
2762 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2763 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2764 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2764, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2765 const int from_parameter_size = from_compiled_data->parameters->rnum;
2766 const int to_parameter_size = to_compiled_data->parameters->rnum;
2767 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2768 int i, j;
2769 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2770 char* updated_name = 0;
2771 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2772 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2773 for (i = 0; i < rnum; i++)
2774 {
2775 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2776 // Need to figure out how to use the renamer here.
2777 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2778 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2778, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2779 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2779, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2780 if (renamer
18.1
'renamer' is non-null
)
2781 {
2782 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2783 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2784 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2785 updated_name = (char*)ccmallocmalloc(1024);
2786 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2787 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2788 memcpy(updated_name, src_name, src_name_len);
2789 updated_name[src_name_len] = 0;
2790 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2791 continue; // Skip this.
2792 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2793 {
2794 // Nothing changed.
2795 } else {
2796 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2797 {
2798 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2799 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is >= 'from_parameter_size'
55
Loop condition is false. Execution continues on line 2807
2800 {
2801 int ret;
2802 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
2803 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2803
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2804 kh_val(id_map, k)((id_map)->vals[k]) = j;
2805 }
2806 }
2807 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
56
Calling 'kh_get_ccv_cnnp_parameter_id'
66
Returning from 'kh_get_ccv_cnnp_parameter_id'
67
'k' initialized to 1
2808 if (k
67.1
'k' is not equal to field 'n_buckets'
== kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
68
Taking false branch
2809 continue;
2810 src_d = kh_val(id_map, k)((id_map)->vals[k]);
69
Assigned value is garbage or undefined
2811 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2811, __extension__ __PRETTY_FUNCTION__); }))
;
2812 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2812, __extension__
__PRETTY_FUNCTION__); }))
;
2813 }
2814 }
2815 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2815, __extension__ __PRETTY_FUNCTION__); }))
;
2816 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2816, __extension__
__PRETTY_FUNCTION__); }))
;
2817 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2818 // If the original is not init'ed. We cannot share from.
2819 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2820 continue;
2821 for (j = 0; j < parallel_count; j++)
2822 {
2823 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2824 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2824, __extension__
__PRETTY_FUNCTION__); }))
;
2825 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2826 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2827 ccv_nnc_tensor_free(dest);
2828 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2829 }
2830 // Mark this symbol as init'ed.
2831 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2832 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2833 }
2834 ccv_array_free(to_parameter_indices);
2835 ccv_array_free(from_parameter_indices);
2836 if (id_map)
2837 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2838 if (updated_name)
2839 ccfreefree(updated_name);
2840 // Mark it as incomplete so we will call init_1.
2841 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2842 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2843 else // Remove the flag.
2844 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2845}
2846
2847ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2848{
2849 if (!compiled_data->stream_map)
2850 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2851 int ret = 0;
2852 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2853 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2853, __extension__ __PRETTY_FUNCTION__); }))
;
2854 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2855 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2856 if (ret != 0)
2857 {
2858 stream = ccv_nnc_stream_context_new(type);
2859 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2860 }
2861 return stream;
2862}
2863
2864void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2865{
2866 ccv_array_t* to_parameter_indices;
2867 int to_param_ref;
2868 ccv_array_t* from_parameter_indices;
2869 int from_param_ref;
2870 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2871 // Should be exactly the same tensor.
2872 if (to_param_ref < 0 && from_param_ref < 0)
2873 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2873, __extension__ __PRETTY_FUNCTION__
); }))
; }
2874 // To models.
2875 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2876 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2876, __extension__ __PRETTY_FUNCTION__
); }))
;
2877 // From models.
2878 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2879 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2880 const int to_parameter_size = to_compiled_data->parameters->rnum;
2881 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2882 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2882, __extension__ __PRETTY_FUNCTION__
); }))
;
2883 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2883, __extension__ __PRETTY_FUNCTION__
); }))
;
2884 int i, j;
2885 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2886 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2887 for (i = 0; i < aux_in_size; i++)
2888 inputs[i + 2] = aux_ins[i];
2889 for (i = 0; i < aux_out_size; i++)
2890 outputs[i + 1] = aux_outs[i];
2891 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2892 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2893 for (i = 0; i < rnum; i++)
2894 {
2895 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2896 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2896, __extension__ __PRETTY_FUNCTION__); }))
;
2897 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2897, __extension__ __PRETTY_FUNCTION__
); }))
;
2898 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2899 // If the original is not init'ed. We cannot copy from.
2900 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2901 continue;
2902 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2903 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2903, __extension__ __PRETTY_FUNCTION__); }))
;
2904 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2904, __extension__ __PRETTY_FUNCTION__
); }))
;
2905 if (parallel_count > 1)
2906 {
2907 ccv_nnc_stream_context_t* streams[parallel_count];
2908 ccv_nnc_stream_signal_t* signal;
2909 if (stream_context)
2910 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2911 for (j = 0; j < parallel_count; j++)
2912 {
2913 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2914 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2915 if (!dest || !src)
2916 {
2917 streams[j] = 0;
2918 continue;
2919 }
2920 // At the moment, can only handle them on the same device.
2921 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2921, __extension__ __PRETTY_FUNCTION__
); }))
;
2922 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2922, __extension__ __PRETTY_FUNCTION__
); }))
;
2923 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2924 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2925 int type = stream_type;
2926 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2927 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2928 // Wait signal to finish.
2929 if (stream_context)
2930 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2931 inputs[0] = outputs[0] = dest;
2932 inputs[1] = src;
2933 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2934 if (stream_context)
2935 {
2936 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2937 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2938 }
2939 streams[j] = stream_0;
2940 }
2941 // If this should be blocking, blocking it.
2942 if (!stream_context)
2943 for (j = 0; j < parallel_count; j++)
2944 if (streams[j])
2945 ccv_nnc_stream_context_wait(streams[j]);
2946 } else {
2947 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2948 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2948, __extension__
__PRETTY_FUNCTION__); }))
;
2949 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2950 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2950, __extension__
__PRETTY_FUNCTION__); }))
;
2951 inputs[0] = outputs[0] = dest;
2952 inputs[1] = src;
2953 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2954 }
2955 // Mark this symbol as init'ed.
2956 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2957 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2958 }
2959 ccv_array_free(to_parameter_indices);
2960 ccv_array_free(from_parameter_indices);
2961}
2962
2963void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2964{
2965 int to_param_ref;
2966 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2967 // To models.
2968 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2969 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2969, __extension__ __PRETTY_FUNCTION__
); }))
;
2970 // Tensor has to be inited already.
2971 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2971, __extension__ __PRETTY_FUNCTION__
); }))
;
2972 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2972, __extension__ __PRETTY_FUNCTION__
); }))
;
2973 // From models.
2974 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2975 const int to_parameter_size = to_compiled_data->parameters->rnum;
2976 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2977 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2977, __extension__ __PRETTY_FUNCTION__
); }))
;
2978 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2978, __extension__ __PRETTY_FUNCTION__
); }))
;
2979 int i, j;
2980 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2981 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2982 for (i = 0; i < aux_in_size; i++)
2983 inputs[i + 1] = aux_ins[i];
2984 for (i = 0; i < aux_out_size; i++)
2985 outputs[i + 1] = aux_outs[i];
2986 for (i = 0; i < rnum; i++)
2987 {
2988 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2989 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2989, __extension__ __PRETTY_FUNCTION__); }))
;
2990 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2990, __extension__ __PRETTY_FUNCTION__
); }))
;
2991 if (parallel_count > 1)
2992 {
2993 ccv_nnc_stream_context_t* streams[parallel_count];
2994 ccv_nnc_stream_signal_t* signal;
2995 if (stream_context)
2996 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2997 for (j = 0; j < parallel_count; j++)
2998 {
2999 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
3000 if (!dest)
3001 {
3002 streams[j] = 0;
3003 continue;
3004 }
3005 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3006 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3007 int type = stream_type;
3008 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3009 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3010 // Wait signal to finish.
3011 if (stream_context)
3012 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3013 inputs[0] = outputs[0] = dest;
3014 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3015 if (stream_context)
3016 {
3017 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3018 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3019 }
3020 streams[j] = stream_0;
3021 }
3022 // If this should be blocking, blocking it.
3023 if (!stream_context)
3024 for (j = 0; j < parallel_count; j++)
3025 if (streams[j])
3026 ccv_nnc_stream_context_wait(streams[j]);
3027 } else {
3028 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
3029 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3029, __extension__
__PRETTY_FUNCTION__); }))
;
3030 inputs[0] = outputs[0] = dest;
3031 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3032 }
3033 // No need to mark this symbol as init'ed, it is already.
3034 }
3035 ccv_array_free(to_parameter_indices);
3036}
3037
3038void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
3039{
3040 int to_param_ref;
3041 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3042 // To models.
3043 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
3044 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 3044, __extension__ __PRETTY_FUNCTION__
); }))
;
3045 // Tensor has to be inited already.
3046 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 3046, __extension__ __PRETTY_FUNCTION__
); }))
;
3047 ccv_nnc_tensor_t** tensor_gradients;
3048 if (to_compiled_data->backward.count > 1)
3049 tensor_gradients = to_compiled_data->tensors.accum_gradients;
3050 else
3051 tensor_gradients = to_compiled_data->tensors.gradients;
3052 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 3052, __extension__ __PRETTY_FUNCTION__
); }))
;
3053 // From models.
3054 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3055 const int to_parameter_size = to_compiled_data->parameters->rnum;
3056 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3057 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 3057, __extension__ __PRETTY_FUNCTION__
); }))
;
3058 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 3058, __extension__ __PRETTY_FUNCTION__
); }))
;
3059 int i, j;
3060 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
3061 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
3062 for (i = 0; i < aux_in_size; i++)
3063 inputs[i + 1] = aux_ins[i];
3064 for (i = 0; i < aux_out_size; i++)
3065 outputs[i + 1] = aux_outs[i];
3066 for (i = 0; i < rnum; i++)
3067 {
3068 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3069 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3069, __extension__ __PRETTY_FUNCTION__); }))
;
3070 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3070, __extension__ __PRETTY_FUNCTION__
); }))
;
3071 if (parallel_count > 1)
3072 {
3073 ccv_nnc_stream_context_t* streams[parallel_count];
3074 ccv_nnc_stream_signal_t* signal;
3075 if (stream_context)
3076 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
3077 for (j = 0; j < parallel_count; j++)
3078 {
3079 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
3080 if (!dest)
3081 {
3082 streams[j] = 0;
3083 continue;
3084 }
3085 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3086 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3087 int type = stream_type;
3088 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3089 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3090 // Wait signal to finish.
3091 if (stream_context)
3092 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3093 inputs[0] = outputs[0] = dest;
3094 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3095 if (stream_context)
3096 {
3097 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3098 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3099 }
3100 streams[j] = stream_0;
3101 }
3102 // If this should be blocking, blocking it.
3103 if (!stream_context)
3104 for (j = 0; j < parallel_count; j++)
3105 if (streams[j])
3106 ccv_nnc_stream_context_wait(streams[j]);
3107 } else {
3108 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
3109 if (!dest)
3110 continue;
3111 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3111, __extension__
__PRETTY_FUNCTION__); }))
;
3112 inputs[0] = outputs[0] = dest;
3113 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3114 }
3115 // No need to mark this symbol as init'ed, it is already.
3116 }
3117 ccv_array_free(to_parameter_indices);
3118}
3119
3120void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
3121{
3122 // Only CUDA backend has this feature.
3123#ifdef HAVE_CUDA1
3124 int to_param_ref;
3125 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3126 // To models.
3127 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3128 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3128, __extension__ __PRETTY_FUNCTION__); }))
;
3129 // Tensor has to be inited already.
3130 assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0)
, __extension__ ({ if (!!compiled_data->tensors_init.v) ; else
__assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c"
, 3130, __extension__ __PRETTY_FUNCTION__); }))
;
3131 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 3131, __extension__ __PRETTY_FUNCTION__
); }))
;
3132 // From models.
3133 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3134 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3135 int i;
3136 for (i = 0; i < rnum; i++)
3137 {
3138 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3139 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3139, __extension__ __PRETTY_FUNCTION__); }))
;
3140 assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3140, __extension__ __PRETTY_FUNCTION__
); }))
;
3141 if (parallel_count > 1)
3142 {
3143 assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect."
) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect."
) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\""
, "ccv_cnnp_model.c", 3143, __extension__ __PRETTY_FUNCTION__
); }))
;
3144 } else {
3145 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[dest_d]) & ~(uintptr_t)1))
;
3146 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 3146, __extension__
__PRETTY_FUNCTION__); }))
;
3147 ccv_nnc_tensor_param_t params = src->info;
3148 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY)
3149 continue;
3150 const size_t size = ccv_nnc_tensor_data_size(params);
3151 if (size <= 0)
3152 continue;
3153 const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1);
3154 const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0);
3155 ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t));
3156 tensor->dataof = 0;
3157 tensor->alias_ref = 0;
3158 tensor->sig = 0;
3159 tensor->refcount = 1;
3160 tensor->info = params;
3161 if (tfb)
3162 {
3163 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2];
3164 // This corresponding to mat->step
3165 tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype
) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12
] * (((((params.datatype) & 0xFF000) | params.dim[2])) &
0xFFF) + 3) & -4)
;
3166 } else // This won't be recognized by ccv_dense_matrix_t
3167 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000);
3168 // Remove this flag so it can be deallocated as usual.
3169 tensor->type &= ~CCV_NO_DATA_ALLOC;
3170 assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY
) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00
) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY"
, "ccv_cnnp_model.c", 3170, __extension__ __PRETTY_FUNCTION__
); }))
;
3171 void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size);
3172 if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
3173 {
3174 tensor->data.u8 = (uint8_t*)ptr;
3175 tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
3176 } else {
3177 // Allocation failed.
3178 ccfreefree(tensor);
3179 continue;
3180 }
3181 // TODO: Cannot run this on the stream context yet, due to allocation and deallocations.
3182 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0);
3183 cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size);
3184 compiled_data->tensors.parameters[dest_d] = tensor;
3185 // Can free out the old one.
3186 if (should_free)
3187 ccv_nnc_tensor_free(src);
3188 }
3189 // No need to mark this symbol as init'ed, it is already.
3190 }
3191 ccv_array_free(to_parameter_indices);
3192#endif
3193}
3194
3195ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
3196{
3197 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3198 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3198, __extension__ __PRETTY_FUNCTION__); }))
;
3199 return compiled_data->minimize.minimizer;
3200}
3201
3202void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
3203{
3204 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3205 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3205, __extension__ __PRETTY_FUNCTION__); }))
;
3206 const int parameter_size = compiled_data->parameters->rnum;
3207 if (parameter_size == 0)
3208 return;
3209 if (reset)
3210 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 3210, __extension__ __PRETTY_FUNCTION__
); }))
; }
3211 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3212 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
3213 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
3214 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
3215 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3216 // We update all parameters, at this point, we have one minimizer.
3217 if (set_parameters == 0 || set_parameter_size == 0)
3218 compiled_data->minimize.minimizer = minimizer;
3219 int i;
3220 if (set_parameters && set_parameter_size)
3221 {
3222 // I need to save what's the minimizer along with this.
3223 if (!compiled_data->minimize.parameters)
3224 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
3225 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
3226 set_minimizer_for_parameter->minimizer = minimizer;
3227 set_minimizer_for_parameter->parameter_size = set_parameter_size;
3228 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
3229 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
3230 }
3231 // If reset is true, clear the parameters array.
3232 if (reset && compiled_data->minimize.parameters)
3233 {
3234 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3235 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3236 ccv_array_clear(compiled_data->minimize.parameters);
3237 }
3238 if (!compiled_data->update_nodes)
3239 return;
3240 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
3241 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 3241, __extension__ __PRETTY_FUNCTION__); }))
;
3242 if (saved_aux_size > old_max_saved_aux_size)
3243 {
3244 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 3244, __extension__ __PRETTY_FUNCTION__
); }))
;
3245 // Reallocate first, move them around later.
3246 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
3247 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
3248 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
3249 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
3250 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
3251 }
3252 int flag = 0;
3253 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3254 if (set_parameters && set_parameter_size)
3255 {
3256 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
3257 for (i = 0; i < set_parameter_size; i++)
3258 {
3259 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
3260 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 3260, __extension__ __PRETTY_FUNCTION__
); }))
;
3261 const int old_rnum = parameter_indices->rnum;
3262 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
3263 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
3264 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 3264, __extension__ __PRETTY_FUNCTION__
); }))
;
3265 if (param_ref >= 0)
3266 {
3267 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 3267, __extension__ __PRETTY_FUNCTION__
); }))
;
3268 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
3269 parameter_indices->rnum = old_rnum + 1;
3270 }
3271 }
3272 // We may have duplicated indices, but that is OK, we will set it twice.
3273 for (i = 0; i < parameter_indices->rnum; i++)
3274 {
3275 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
3276 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
3277 flag = 1;
3278 }
3279 ccv_array_free(parameter_indices);
3280 } else {
3281 for (i = 0; i < parameter_size; i++)
3282 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
3283 flag = 1;
3284 if (compiled_data->minimize.parameters)
3285 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
3286 flag = 1;
3287 }
3288 if (flag)
3289 {
3290 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
3291 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
3292 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3293 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3294 }
3295}
3296
3297void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
3298{
3299 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3300 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3300, __extension__ __PRETTY_FUNCTION__); }))
;
3301 compiled_data->compile_params = compile_params;
3302}
3303
3304void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
3305{
3306 if (model->graph && out_size > 0)
3307 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
3308 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
3309 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
3310 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
3311 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
3312 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
3313 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
3314}
3315
3316void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
3317{
3318 if (model->graph)
3319 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
3320}
3321
3322static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
3323{
3324 int i;
3325 const int parameter_size = compiled_data->parameters->rnum;
3326 ccv_array_free(compiled_data->parameters);
3327 if (compiled_data->parameter_flags)
3328 ccfreefree(compiled_data->parameter_flags);
3329 const int internal_size = compiled_data->internals->rnum;
3330 ccv_array_free(compiled_data->internals);
3331 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 3331, __extension__ __PRETTY_FUNCTION__
); }))
;
3332 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 3332, __extension__ __PRETTY_FUNCTION__
); }))
;
3333 for (i = 0; i < parameter_size; i++)
3334 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3335 ccv_array_free(compiled_data->ids.parameters);
3336 for (i = 0; i < internal_size; i++)
3337 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3338 ccv_array_free(compiled_data->ids.internals);
3339 const int parallel_count = compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_root_parallel_count(model);
3340 if (compiled_data->tensors.parameters)
3341 {
3342 for (i = 0; i < parameter_size * parallel_count; i++)
3343 // If it is not marked as not belonging, we can free it.
3344 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3345 if (compiled_data->tensors.parameters[i])
3346 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3347 for (i = 0; i < internal_size * parallel_count; i++)
3348 if (compiled_data->tensors.internals[i])
3349 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3350 ccfreefree(compiled_data->tensors.parameters);
3351 }
3352 if (compiled_data->tensors.gradients)
3353 {
3354 for (i = 0; i < parameter_size * parallel_count; i++)
3355 {
3356 if (compiled_data->tensors.gradients[i])
3357 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3358 if (compiled_data->tensors.accum_gradients[i])
3359 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3360 }
3361 ccfreefree(compiled_data->tensors.gradients);
3362 }
3363 if (compiled_data->minimize.parameters)
3364 {
3365 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3366 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3367 ccv_array_free(compiled_data->minimize.parameters);
3368 }
3369 if (compiled_data->rewindables)
3370 ccv_array_free(compiled_data->rewindables);
3371 if (compiled_data->tensors_init.v)
3372 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3373 if (compiled_data->evaluate.tos)
3374 ccfreefree(compiled_data->evaluate.tos);
3375 compiled_data->evaluate.tos = 0;
3376 if (compiled_data->stream_map)
3377 {
3378 khiter_t k;
3379 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3380 {
3381 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3382 continue;
3383 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3384 ccv_nnc_stream_context_free(stream);
3385 }
3386 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3387 }
3388 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3389 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3390 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3391 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3392 if (compiled_data->gradient_checkpoints)
3393 {
3394 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3395 {
3396 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3397 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3397, __extension__ __PRETTY_FUNCTION__
); }))
;
3398 ccfreefree(checkpoint->inputs);
3399 ccv_array_free(checkpoint->tensor_symbols);
3400 }
3401 ccv_array_free(compiled_data->gradient_checkpoints);
3402 }
3403 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3404 ccfreefree(compiled_data);
3405}
3406
3407void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3408{
3409 ccv_cnnp_model_deinit(model);
3410 if (model->isa->dealloc)
3411 model->isa->dealloc(model);
3412 if (model->io)
3413 {
3414 int i;
3415 for (i = 0; i < model->io->rnum; i++)
3416 {
3417 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3418 if (model_io->outgoings)
3419 ccv_array_free(model_io->outgoings);
3420 if (model_io->incomings)
3421 ccv_array_free(model_io->incomings);
3422 if (model_io->dependencies)
3423 ccv_array_free(model_io->dependencies);
3424 ccfreefree(model_io);
3425 }
3426 ccv_array_free(model->io);
3427 }
3428 if (model->parameter_indices)
3429 ccv_array_free(model->parameter_indices);
3430 if (model->inputs)
3431 ccfreefree(model->inputs);
3432 if (model->graph)
3433 ccv_nnc_symbolic_graph_free(model->graph);
3434 if (model->compiled_data)
3435 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3436 if (model->name)
3437 ccfreefree(model->name);
3438 ccfreefree(model);
3439}
3440
3441void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3442{
3443 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3444 if (!compiled_data)
3445 return;
3446 if (compiled_data->graph)
3447 ccv_nnc_graph_cancel(compiled_data->graph);
3448 if (compiled_data->apply_gradients.graph)
3449 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3450}
3451
3452void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3453{
3454 model->exec_flags = flags;
3455}
3456
3457int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3458{
3459 return model->exec_flags;
3460}