Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2474, column 1
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2026-05-01-233500-1493103-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7#include "_ccv_nnc_symbolic_graph.h"
8#ifdef HAVE_CUDA1
9#include "gpu/ccv_nnc_compat.h"
10#endif
11
12// MARK - Level-5 API
13
14ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
15{
16 if (!model->io)
17 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
18 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
19 model_io->param_ref = 0;
20 model_io->param_sel = 0;
21 model_io->visit = 0;
22 model_io->model = model;
23 model_io->dependencies = 0;
24 model_io->dependents = 0;
25 model_io->outgoings = 0;
26 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
27 ccv_array_push(model->io, &model_io);
28 if (input_size > 0)
29 {
30 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
31 ccv_array_resize(model_io->incomings, input_size);
32 int i;
33 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
34 for (i = 0; i < input_size; i++)
35 {
36 if (!inputs[i]->outgoings)
37 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
38 ccv_array_push(inputs[i]->outgoings, &model_io);
39 }
40 } else {
41 model_io->incomings = 0;
42 }
43 return model_io;
44}
45
46void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
47{
48 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 48, __extension__ __PRETTY_FUNCTION__);
}))
;
49 if (!model_io->dependencies)
50 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
51 int i, j;
52 for (i = 0; i < dependency_size; i++)
53 {
54 int flag = 0;
55 // Check if it is already exist or not.
56 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
57 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
58 flag = 1;
59 if (flag)
60 continue;
61 ccv_array_push(model_io->dependencies, dependencies + i);
62 ++dependencies[i]->dependents;
63 }
64}
65
66int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
67{
68 return model->output_size;
69}
70
71int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
72{
73 // If the model is compiled, it is default to 1 unless it is not.
74 if (model->compiled_data)
75 return model->is_trainable >= 0 ? model->is_trainable : 1;
76 return model->is_trainable;
77}
78
79ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
80{
81 if (!model->io)
82 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
83 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
84 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
85 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
86 model_io->visit = 0;
87 model_io->model = model;
88 model_io->outputs = 0;
89 model_io->dependencies = 0;
90 model_io->dependents = 0;
91 model_io->incomings = 0;
92 model_io->outgoings = 0;
93 ccv_array_push(model->io, &model_io);
94 return model_io;
95}
96
97void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
98{
99 model->notify_hook.func = func;
100 model->notify_hook.context = context;
101}
102
103void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
104{
105 if (model->notify_hook.func)
106 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
107 if (model->isa->notify)
108 model->isa->notify(model, tag, payload);
109}
110
111static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
112{
113 int i, j;
114 for (i = 0; i < graph_exec_symbol_size; i++)
115 {
116 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
117 // Check whether this tensor symbol has any duplicate.
118 for (j = i + 1; j < graph_exec_symbol_size;)
119 {
120 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
121 // If there is a same tensor symbol, remove it.
122 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
123 {
124 if (j + 1 < graph_exec_symbol_size)
125 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
126 --graph_exec_symbol_size;
127 continue;
128 }
129 ++j;
130 }
131 }
132 return graph_exec_symbol_size;
133}
134
135void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
136{
137 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
138 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
139 int i;
140 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
141 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
142 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
143 {
144 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
145 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
146 {
147 // Only add to parameter_indices if it is trainable.
148 if (add_to_array_context->add_parameter_indices)
149 ccv_array_add_unique_int(model->parameter_indices, i);
150 // Found it, return, don't add it.
151 return;
152 }
153 }
154 // Only add to parameter_indices if it is trainable.
155 if (add_to_array_context->add_parameter_indices)
156 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
157 // This is a new one, no need to add_unique_int, it is unique.
158 ccv_array_push(add_to_array_context->symbols, &symbol);
159 if (add_to_array_context->trainables)
160 ccv_array_push(add_to_array_context->trainables, &is_trainable);
161 char id[2048];
162 id[0] = add_to_array_context->prefix;
163 id[1] = '-';
164 int total_len = 2;
165 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
166 {
167 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
168 int len;
169 if (name->name && name->name[0] != '\0')
170 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
171 else
172 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
173 total_len += len;
174 if (total_len >= 2047)
175 break;
176 }
177 if (total_len < 2047)
178 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
179 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 179, __extension__ __PRETTY_FUNCTION__)
; }))
;
180 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
181 memcpy(heap_id, id, total_len + 1);
182 ccv_array_push(add_to_array_context->ids, &heap_id);
183 ++add_to_array_context->sequence->it;
184}
185
186static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
187{
188 compiled_data->f = compiled_data->fits + output_size;
189 compiled_data->xpu_alloc.mp_hdr = -1;
190 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
191 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
192 compiled_data->gradient_checkpoints = gradient_checkpoints;
193}
194
195static int _ccv_cnnp_model_root_parallel_count(const ccv_cnnp_model_t* const model)
196{
197 return ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
198}
199
200static int _ccv_cnnp_model_effective_parallel_count(const ccv_cnnp_model_t* const model)
201{
202 int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
203 if (model->graph && model->graph->data_parallel.count > parallel_count)
204 parallel_count = model->graph->data_parallel.count;
205 return parallel_count;
206}
207
208static int _ccv_cnnp_compiled_data_parallel_count(const ccv_cnnp_model_t* const model, const ccv_cnnp_compiled_data_t* const compiled_data)
209{
210 return compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_effective_parallel_count(model);
211}
212
213ccv_nnc_tensor_symbol_t ccv_cnnp_model_get_symbol(ccv_cnnp_model_t* const self, const ccv_nnc_tensor_symbol_t symbol)
214{
215 assert(self->data)((void) sizeof ((self->data) ? 1 : 0), __extension__ ({ if
(self->data) ; else __assert_fail ("self->data", "ccv_cnnp_model.c"
, 215, __extension__ __PRETTY_FUNCTION__); }))
;
216 ccv_cnnp_model_build_data_t* const build_data = (ccv_cnnp_model_build_data_t*)self->data;
217 if (build_data->parallel_count <= 1 || build_data->parallel_rank == 0)
218 return symbol;
219 const int rank = build_data->parallel_rank;
220 assert(rank > 0)((void) sizeof ((rank > 0) ? 1 : 0), __extension__ ({ if (
rank > 0) ; else __assert_fail ("rank > 0", "ccv_cnnp_model.c"
, 220, __extension__ __PRETTY_FUNCTION__); }))
;
221 assert(rank < build_data->parallel_count)((void) sizeof ((rank < build_data->parallel_count) ? 1
: 0), __extension__ ({ if (rank < build_data->parallel_count
) ; else __assert_fail ("rank < build_data->parallel_count"
, "ccv_cnnp_model.c", 221, __extension__ __PRETTY_FUNCTION__)
; }))
;
222 ccv_nnc_symbolic_graph_t* const graph = (ccv_nnc_symbolic_graph_t*)symbol.graph;
223 ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, symbol, rank);
224 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
225 return copy;
226 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, symbol);
227 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY)
228 CCV_TENSOR_SET_DEVICE_ID(params.type, rank)(params.type) = (((params.type) & ~0xfff00) | (((rank) &
0xfff) << 8))
;
229 copy = ccv_nnc_tensor_symbol_new(graph, params, 0);
230 ccv_nnc_tensor_symbol_set_copy(graph, symbol, rank, copy);
231 return copy;
232}
233
234typedef struct {
235 void* old_graph_exec_symbol_new_hook_context;
236 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
237 ccv_nnc_symbolic_graph_t* graph;
238 ccv_cnnp_model_build_data_t* build_data;
239} ccv_cnnp_model_set_exec_flags_context_t;
240
241static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
242{
243 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
244 if (flags_context->build_data->exec_flags)
245 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
246 if (flags_context->old_graph_exec_symbol_new_hook)
247 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
248}
249
250static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
251{
252 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 252, __extension__ __PRETTY_FUNCTION__); }))
;
253 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
254 int i;
255 for (i = 0; i < input_size; i++)
256 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
257 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
258 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
259 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
260 ccv_cnnp_model_sequence_t model_sequence = {
261 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
262 };
263 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
264 .add_parameter_indices = 1,
265 .prefix = 't',
266 .sequence = &model_sequence,
267 .symbols = parameters,
268 .ids = parameter_ids,
269 .trainables = parameter_trainables,
270 };
271 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
272 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
273 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
274 .add_parameter_indices = 0,
275 .prefix = 'r',
276 .sequence = &model_sequence,
277 .symbols = internals,
278 .ids = internal_ids,
279 .trainables = 0,
280 };
281 ccv_cnnp_model_build_data_t build_data = {
282 .exec_flags = 0,
283 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
284 .parallel_count = 1,
285 .parallel_rank = 0,
286 .model_sequence = &model_sequence,
287 .add_to_array = ccv_cnnp_model_add_to_array,
288 .parameters = parameters,
289 .context = {
290 .add_to_parameter = &add_to_parameter_context,
291 .add_to_output = &add_to_output_context,
292 },
293 .gradient_checkpoints = 0,
294 };
295 model->data = &build_data;
296 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
297 .graph = model->graph,
298 .build_data = &build_data,
299 .old_graph_exec_symbol_new_hook = 0,
300 .old_graph_exec_symbol_new_hook_context = 0
301 };
302 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
303 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
304 // Reset back to previous hook.
305 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
306 for (i = 0; i < model->output_size; i++)
307 {
308 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
309 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
310 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
311 continue;
312 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
313 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
314 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
315 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
316 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
317 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
318 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
319 }
320 model->data = 0;
321 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
322 if (model_sequence.sequences)
323 ccv_array_free(model_sequence.sequences);
324 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
325 int not_trainables = 0;
326 // Assert no parameter is alias.
327 for (i = 0; i < parameters->rnum; i++)
328 {
329 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
330 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
331 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 331, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
332 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
333 not_trainables = 1;
334 }
335 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 335, __extension__ __PRETTY_FUNCTION__)
; }))
;
336 uint64_t* parameter_flags = 0;
337 if (not_trainables)
338 {
339 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
340 for (i = 0; i < parameter_trainables->rnum; i++)
341 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
342 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
343 }
344 ccv_array_free(parameter_trainables);
345 // Assert no internal is alias.
346 for (i = 0; i < internals->rnum; i++)
347 {
348 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
349 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
350 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 350, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
351 }
352 const int output_size = model->output_size;
353 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
354 const int parameters_rnum = parameters->rnum;
355 if (input_size > 0)
356 {
357 ccv_array_resize(parameters, parameters_rnum + input_size);
358 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
359 }
360 ccv_nnc_symbolic_graph_simplify(model->graph,
361 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
362 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
363 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
364 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
365 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
366 model->outputs, output_size,
367 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
368 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
369 // Size it down.
370 parameters->rnum = parameters_rnum;
371 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
372 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
373 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
374 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 374, __extension__ __PRETTY_FUNCTION__)
; }))
;
375 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
376 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
377 compiled_data->loss = loss;
378 if (loss.cmd == CCV_NNC_NOOP)
379 {
380 // If no loss function provided, there is no fits.
381 for (i = 0; i < output_size; i++)
382 {
383 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
384 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
385 if (alias_to.d < 0)
386 compiled_data->f[i] = model->outputs[i];
387 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
388 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
389 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
390 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
391 int j;
392 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
393 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 393, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
394 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
395 }
396 }
397 } else {
398 for (i = 0; i < output_size; i++)
399 {
400 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
401 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
402 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
403 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
404 }
405 }
406 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
407 ccv_nnc_symbolic_graph_simplify(model->graph,
408 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
409 0, 0, // No need to provide binds at this point.
410 compiled_data->f, model->output_size,
411 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
412 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
413 // If inputs are from GPU, stream type is GPU.
414 compiled_data->parameters = parameters;
415 compiled_data->parameter_flags = parameter_flags;
416 compiled_data->internals = internals;
417 compiled_data->ids.parameters = parameter_ids;
418 compiled_data->ids.internals = internal_ids;
419 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
420}
421
422static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
423{
424 ccv_array_t* const stack = (ccv_array_t*)context;
425 ccv_array_push(stack, &symbol.d);
426}
427
428static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
429{
430 const ccv_nnc_tensor_symbol_t src_symbol = {
431 .d = src_index,
432 .graph = src_graph
433 };
434 const ccv_nnc_tensor_symbol_t dest_symbol = {
435 .d = dest_index,
436 .graph = dest_graph
437 };
438 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
439 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
440 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
441 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
442 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
443 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
444}
445
446static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
447{
448 const ccv_nnc_tensor_symbol_t src_symbol = {
449 .d = src_index,
450 .graph = src_graph
451 };
452 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
453 const ccv_nnc_tensor_symbol_t dest_symbol = {
454 .d = dest_index,
455 .graph = dest_graph
456 };
457 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
458 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
459}
460
461static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
462static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
463
464typedef struct {
465 int parallel_count;
466 ccv_nnc_symbolic_graph_t* graph;
467 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
468} ccv_nnc_graph_exec_update_t;
469
470static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
471{
472 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
473 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
474 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
475 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
476 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
477 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
478 const int parallel_count = graph_exec_update->parallel_count;
479 int i;
480 for (i = 1; i < parallel_count; i++)
481 {
482 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
483 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
484 {
485 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
486 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
487 }
488 }
489}
490
491void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
492{
493 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 493, __extension__ __PRETTY_FUNCTION__); }))
;
494 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 494, __extension__ __PRETTY_FUNCTION__)
; }))
;
495 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 495, __extension__ __PRETTY_FUNCTION__); }))
;
496 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
497 init->graph = ccv_nnc_symbolic_graph_new();
498 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
499 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
500 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
501 init->parallel_count = model->parallel_count;
502 init->memory_compression = model->memory_compression;
503 init->memory_reduction = model->memory_reduction;
504 init->gradient_checkpointing = model->gradient_checkpointing;
505 init->compiled_data->stream_type = model->compiled_data->stream_type;
506 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
507 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
508 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
509 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
510 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
511 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
512 int i, j;
513 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
514 for (i = 0; i < compiled_data->parameters->rnum; i++)
515 {
516 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
517 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 517, __extension__ __PRETTY_FUNCTION__)
; }))
;
518 }
519 for (i = 0; i < compiled_data->internals->rnum; i++)
520 {
521 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
522 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 522, __extension__ __PRETTY_FUNCTION__)
; }))
;
523 }
524 // Update inputs.
525 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 525, __extension__ __PRETTY_FUNCTION__)
; }))
;
526 for (i = 0; i < model->input_size; i++)
527 if (model->inputs[i].d >= 0)
528 {
529 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 529, __extension__ __PRETTY_FUNCTION__)
; }))
;
530 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
531 }
532 // Update outputs.
533 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 533, __extension__ __PRETTY_FUNCTION__)
; }))
;
534 for (i = 0; i < model->output_size; i++)
535 {
536 if (model->outputs[i].d >= 0)
537 {
538 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 538, __extension__
__PRETTY_FUNCTION__); }))
;
539 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
540 }
541 if (model->outputs[i].d != model->compiled_data->f[i].d)
542 {
543 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 543, __extension__ __PRETTY_FUNCTION__)
; }))
;
544 if (model->compiled_data->f[i].d >= 0)
545 {
546 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 546, __extension__ __PRETTY_FUNCTION__)
; }))
;
547 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
548 }
549 }
550 }
551 // Go through the graph to set tensor on matching symbols
552 for (i = 0; i < stack->rnum; i++)
553 {
554 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
555 // If exceed range, skip.
556 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
557 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
558 continue;
559 const ccv_nnc_graph_exec_symbol_t src_symbol = {
560 .d = d,
561 .graph = init->graph
562 };
563 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
564 .d = d,
565 .graph = model->graph
566 };
567 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
568 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
569 // If the name doesn't match, skip.
570 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
571 continue;
572 // Now get all the inputs and outputs, if matches, set them.
573 const int* src_inputs;
574 int src_input_size;
575 const int* src_outputs;
576 int src_output_size;
577 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
578 const int* dest_inputs;
579 int dest_input_size;
580 const int* dest_outputs;
581 int dest_output_size;
582 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
583 // We may have unmatched input / output size because this is the minimizer and it has
584 // different saved_aux (for example, when we shrunk with CMD_NOOP).
585 if (src_input_size != dest_input_size)
586 continue;
587 if (src_output_size != dest_output_size)
588 continue;
589 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
590 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
591 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
592 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
593 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
594 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
595 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
596 // a new exec symbol.
597 for (j = 0; j < src_input_size; j++)
598 if (src_inputs[j] >= 0)
599 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
600 for (j = 0; j < src_output_size; j++)
601 if (src_outputs[j] >= 0)
602 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
603 }
604 ccv_array_free(stack);
605 // After this, we get all tensors in the model graph resolved through tensor_auto.
606 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
607 // Verify symbols we get matches.
608 const int parameter_size = compiled_data->parameters->rnum;
609 for (i = 0; i < parameter_size; i++)
610 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 610, __extension__ __PRETTY_FUNCTION__)
; }))
; }
611 const int internal_size = compiled_data->internals->rnum;
612 for (i = 0; i < internal_size; i++)
613 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 613, __extension__ __PRETTY_FUNCTION__)
; }))
; }
614 // Go through compiled data.
615 if (compiled_data->tensor_arena)
616 {
617 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
618 if (flag == 0 && compiled_data->graph_exec_arena)
619 {
620 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
621 // Since we will reinit, if we previously set is_test, we need to set it again.
622 if (compiled_data->is_test)
623 {
624 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
625 ccv_nnc_graph_exec_update_t update = {
626 .parallel_count = parallel_count,
627 .graph = model->graph,
628 .graph_exec_arena = compiled_data->graph_exec_arena,
629 };
630 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
631 }
632 } else
633 // Free-up tensor arena & graph exec arena.
634 _ccv_cnnp_compiled_data_graph_free(compiled_data);
635 }
636 // There are other compiled graphs, for accum and apply gradients.
637 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
638 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
639 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
640 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
641 // That is why we don't update these compiled graphs at all this point.
642 // Free the model, we've already "absorbed" it.
643 ccv_cnnp_model_free(init);
644}
645
646void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
647{
648 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 648, __extension__ __PRETTY_FUNCTION__)
; }))
;
649 if (model->input_size == 0)
650 model->input_size = input_size;
651 if (!model->graph) // The graph is not compiled yet.
652 {
653 model->graph = ccv_nnc_symbolic_graph_new();
654 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
655 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 655, __extension__ __PRETTY_FUNCTION__)
; }))
;
656 int i, flag = 0;
657 for (i = 0; !flag && i < input_size; i++)
658 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
659 // If inputs are from GPU, stream type is GPU.
660 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
661 model->compiled_data->minimize.minimizer = minimizer;
662 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
663 } else {
664 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
665 // And then absorb the "new model" to the old one.
666 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
667 ccv_cnnp_model_absorb(model, init, inputs, input_size);
668 // Reset minimizer.
669 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
670 }
671}
672
673ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
674{
675 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
676 new_model->is_trainable = is_trainable;
677 return new_model;
678}
679
680void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
681{
682 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 682, __extension__ __PRETTY_FUNCTION__); }))
;
683 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 683, __extension__ __PRETTY_FUNCTION__)
; }))
;
684 ccv_nnc_symbolic_graph_t* const graph = model->graph;
685 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
686 int i;
687 for (i = 0; i < output_size; i++)
688 {
689 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 689, __extension__ __PRETTY_FUNCTION__)
; }))
;
690 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
691 }
692}
693
694void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
695{
696 if (workspace_size == model->workspace_size)
697 return;
698 model->workspace_size = workspace_size;
699 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
700 if (compiled_data && compiled_data->graph)
701 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
702}
703
704size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
705{
706 return model->workspace_size;
707}
708
709void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
710{
711 if (parallel == 0)
712 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
713 else
714 model->parallel_count = parallel;
715 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
716 if (compiled_data)
717 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 717, __extension__ __PRETTY_FUNCTION__)
; }))
; }
718}
719
720void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
721{
722 model->max_stream_count = max_stream_count;
723 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
724 if (compiled_data)
725 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 725, __extension__ __PRETTY_FUNCTION__)
; }))
; }
726}
727
728void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
729{
730 model->memory_compression = memory_compression;
731 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
732 if (compiled_data)
733 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 733, __extension__ __PRETTY_FUNCTION__)
; }))
; }
734}
735
736void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
737{
738 model->memory_reduction = memory_reduction;
739 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
740 if (compiled_data)
741 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 741, __extension__ __PRETTY_FUNCTION__)
; }))
; }
742}
743
744void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
745{
746 model->gradient_checkpointing = gradient_checkpointing;
747}
748
749int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
750{
751 return model->gradient_checkpointing;
752}
753
754typedef struct {
755 int parallel_count;
756 ccv_nnc_symbolic_graph_t* graph;
757 ccv_cnnp_compiled_data_t* compiled_data;
758 ccv_nnc_tensor_arena_t* tensor_arena;
759} ccv_nnc_tensor_init_states_t;
760
761static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
762{
763 int i;
764 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
765 for (i = 0; i < compiled_data->parameters->rnum; i++)
766 {
767 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
768 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
769 return 1;
770 }
771 for (i = 0; i < compiled_data->internals->rnum; i++)
772 {
773 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
774 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
775 return 1;
776 }
777 return 0;
778}
779
780static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
781{
782 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
783 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
784 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
785 if (!output_tensor)
786 return;
787 const int d = output_symbol.d;
788 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 788, __extension__ __PRETTY_FUNCTION__)
; }))
;
789 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
790 if (init_v[d >> 5] & (1u << (d & 0x1f)))
791 return;
792 init_v[d >> 5] |= (1u << (d & 0x1f));
793 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
794 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
795 const int parallel_count = tensor_init_states->parallel_count;
796 int i;
797 for (i = 1; i < parallel_count; i++)
798 {
799 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
800 if (copy)
801 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
802 }
803}
804
805// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
806// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
807static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
808{
809 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 809, __extension__ __PRETTY_FUNCTION__); }))
;
810 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 810, __extension__ __PRETTY_FUNCTION__)
; }))
;
811 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
812 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 812, __extension__
__PRETTY_FUNCTION__); }))
;
813 int i;
814 for (i = 0; i < compiled_data->rewindables->rnum; i++)
815 {
816 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
817 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
818 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
819 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
820 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
821 }
822 ccv_array_clear(compiled_data->rewindables);
823 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
824}
825
826static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
827{
828 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
829 .type = CCV_CNNP_REWIND_TENSOR,
830 .tensor = symbol
831 };
832 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
833 ccv_array_push(rewind_symbols, &rewind_symbol);
834}
835
836static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
837{
838 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
839 .type = CCV_CNNP_REWIND_TENSOR,
840 .tensor = symbol
841 };
842 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
843 ccv_array_push(rewind_symbols, &rewind_symbol);
844}
845
846static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
847{
848 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
849 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
850 .graph_exec = symbol
851 };
852 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
853 ccv_array_push(rewind_symbols, &rewind_symbol);
854}
855
856static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
857{
858 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
859 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
860 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
861 int i;
862 for (i = 1; i < parallel_count; i++)
863 {
864 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
865 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
866 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
867 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
868 }
869}
870
871static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
872{
873 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 873, __extension__ __PRETTY_FUNCTION__); }))
;
874 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 874, __extension__ __PRETTY_FUNCTION__); }))
;
875 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
876 int i;
877 for (i = 1; i < parallel_count; i++)
878 {
879 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
880 if (copy_symbol.graph)
881 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
882 }
883 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
884 if (graph_exec_arena)
885 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
886 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
887 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
888 if (gradient_graph_exec_arena)
889 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
890}
891
892static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
893{
894 int this_parameter_flag = 0;
895 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
896 return this_parameter_flag;
897 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
898 int j, k;
899 // For no-op, we can preserve previous saved_aux_size.
900 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
901 {
902 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
903 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
904 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
905 // make sure some model parameters don't update if we don't want them to.
906 int old_saved_aux_size;
907 if (old_minimizer.cmd == CCV_NNC_NOOP)
908 {
909 int input_size;
910 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
911 if (input_size < 2) // This is not legit.
912 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
913 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
914 old_saved_aux_size = input_size - 2;
915 } else
916 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
917 if (old_saved_aux_size != saved_aux_size)
918 {
919 this_parameter_flag = 1;
920 if (saved_aux_size > old_saved_aux_size)
921 {
922 // Allocate new tensor symbols.
923 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
924 for (j = old_saved_aux_size; j < saved_aux_size; j++)
925 {
926 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
927 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
928 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
929 for (k = 1; k < parallel_count; k++)
930 {
931 ccv_nnc_tensor_param_t dev_info = info;
932 if (k != device_id)
933 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
934 else
935 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
936 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
937 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
938 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
939 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
940 }
941 }
942 } else {
943 for (j = saved_aux_size; j < old_saved_aux_size; j++)
944 {
945 for (k = 1; k < parallel_count; k++)
946 {
947 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
948 if (src_copy.d >= 0)
949 {
950 ccv_nnc_tensor_symbol_free(graph, src_copy);
951 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
952 }
953 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
954 if (dest_copy.d >= 0)
955 {
956 ccv_nnc_tensor_symbol_free(graph, dest_copy);
957 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
958 }
959 }
960 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
961 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
962 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
963 }
964 }
965 }
966 }
967 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
968 if (this_parameter_flag)
969 {
970 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
971 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
972 const int* inputs = 0;
973 int input_size = 0;
974 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
975 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 975, __extension__ __PRETTY_FUNCTION__)
; }))
;
976 update_inputs[0].d = inputs[0];
977 update_inputs[0].graph = graph;
978 update_inputs[1].d = inputs[1];
979 update_inputs[1].graph = graph;
980 update_outputs[0] = updated_parameters[parameter_indice];
981 for (j = 0; j < saved_aux_size; j++)
982 {
983 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
984 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
985 }
986 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
987 for (k = 1; k < parallel_count; k++)
988 {
989 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
990 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 990, __extension__ __PRETTY_FUNCTION__); }))
;
991 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
992 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 992, __extension__ __PRETTY_FUNCTION__)
; }))
;
993 update_inputs[0].d = inputs[0];
994 update_inputs[0].graph = graph;
995 update_inputs[1].d = inputs[1];
996 update_inputs[1].graph = graph;
997 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
998 for (j = 0; j < saved_aux_size; j++)
999 {
1000 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
1001 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
1002 }
1003 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
1004 }
1005 }
1006 return this_parameter_flag;
1007}
1008
1009typedef struct {
1010 int parameter_size;
1011 ccv_nnc_cmd_t minimizer;
1012 ccv_cnnp_model_io_t parameters[1];
1013} ccv_cnnp_set_minimizer_for_parameter_t;
1014
1015static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
1016{
1017 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1018 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1018, __extension__ __PRETTY_FUNCTION__); }))
;
1019 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1020 // We update all parameters, at this point, we have one minimizer.
1021 const int parameter_size = compiled_data->parameters->rnum;
1022 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
1023 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
1024 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 1024, __extension__ __PRETTY_FUNCTION__); }))
;
1025 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1026 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1026, __extension__ __PRETTY_FUNCTION__
); }))
;
1027 ccv_array_t* const parameters = compiled_data->minimize.parameters;
1028 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1029 int i, j, flag = 0;
1030 for (i = 0; i < parameters->rnum; i++)
1031 {
1032 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
1033 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
1034 {
1035 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
1036 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 1036, __extension__ __PRETTY_FUNCTION__
); }))
;
1037 const int old_rnum = parameter_indices->rnum;
1038 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
1039 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
1040 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 1040, __extension__ __PRETTY_FUNCTION__
); }))
;
1041 if (param_ref >= 0)
1042 {
1043 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1043, __extension__ __PRETTY_FUNCTION__
); }))
;
1044 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
1045 parameter_indices->rnum = old_rnum + 1;
1046 }
1047 }
1048 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1049 // We may have duplicated indices, but that is OK, we will set it twice.
1050 for (j = 0; j < parameter_indices->rnum; j++)
1051 {
1052 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1053 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1053, __extension__ __PRETTY_FUNCTION__
); }))
;
1054 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1055 flag = 1;
1056 }
1057 ccv_array_clear(parameter_indices);
1058 }
1059 ccv_array_free(parameter_indices);
1060 return flag;
1061}
1062
1063static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1064{
1065 if (new_saved_aux_size == old_saved_aux_size)
1066 return;
1067 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1067, __extension__ __PRETTY_FUNCTION__
); }))
;
1068 int i, j;
1069 for (i = parameter_size - 1; i >= 0; i--)
1070 {
1071 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1072 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1073 for (j = old_saved_aux_size - 1; j >= 0; j--)
1074 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1075 }
1076}
1077
1078static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1079{
1080 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1081 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1081, __extension__ __PRETTY_FUNCTION__); }))
;
1082 if (!compiled_data->rewindables)
1083 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1084 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1085 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1086 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1087}
1088
1089static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1090{
1091 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1092 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1092, __extension__ __PRETTY_FUNCTION__
); }))
;
1093 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1093, __extension__ __PRETTY_FUNCTION__
); }))
;
1094 const int evaluate_to_size = compiled_data->evaluate.to_size;
1095 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1095, __extension__ __PRETTY_FUNCTION__
); }))
;
1096 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1097 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1097, __extension__ __PRETTY_FUNCTION__
); }))
;
1098 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1099 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1100 int i, j;
1101 const int output_size = model->output_size;
1102 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1102, __extension__ __PRETTY_FUNCTION__
); }))
;
1103 if (fits)
1104 for (i = 0; i < output_size; i++)
1105 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1106 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1107 const int parameter_size = compiled_data->parameters->rnum;
1108 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1109 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1110 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1111 int parameter_size_maybe_more = parameter_size;
1112 compiled_data->disable_outgrad = disable_outgrad;
1113 int outgrad_size;
1114 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1115 outgrad_size = 0;
1116 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1117 outgrad_size = model->input_size;
1118 else {
1119 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1119, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1120 outgrad_size = 0;
1121 for (i = 0; i < model->input_size; i++)
1122 if (!(disable_outgrad & ((uint64_t)1 << i)))
1123 ++outgrad_size;
1124 }
1125 compiled_data->outgrad_size = outgrad_size;
1126 parameter_size_maybe_more += outgrad_size;
1127 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1128 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1129 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1130 compiled_data->backward.to_size = parameter_size_maybe_more;
1131 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1132 if (compiled_data->parameter_flags)
1133 {
1134 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1135 for (i = 0; i < parameter_size; i++)
1136 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1137 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1138 else
1139 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1140 }
1141 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1142 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1143 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1144 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1145 else { // Compute minimize with gradients including selected inputs.
1146 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1146, __extension__ __PRETTY_FUNCTION__
); }))
;
1147 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1147, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1148 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1148, __extension__ __PRETTY_FUNCTION__
); }))
;
1149 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1150 j = 0;
1151 for (i = 0; i < model->input_size; i++)
1152 if (!(disable_outgrad & ((uint64_t)1 << i)))
1153 outgrads[j++] = model->inputs[i];
1154 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1155 }
1156 if (compiled_data->parameter_flags)
1157 ccfreefree(parameters);
1158 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1159 if (compiled_data->minimize.parameters)
1160 _ccv_cnnp_apply_parameters_with_minimizer(model);
1161 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1162 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1163 for (i = 0; i < output_size; i++)
1164 {
1165 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1166 // Init this to 1 so we can backprop.
1167 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1168 }
1169 compiled_data->backward.to_size = 0;
1170 for (i = 0; i < parameter_size_maybe_more; i++)
1171 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1172 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1173 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1174 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1175 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1176 {
1177 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1178 continue;
1179 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1180 const int* tos;
1181 int to_size;
1182 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1183 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1184 {
1185 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1186 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1187 int flag = 0;
1188 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1189 for (j = i - 1; !flag && j >= 0; j--)
1190 if (j + outgrad_destination_start < destination_count)
1191 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1192 if (!flag) // Only if we cannot find it, we add it.
1193 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1194 }
1195 }
1196 if (parallel_count > 1)
1197 {
1198 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1199 0, 0,
1200 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1201 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1202 0, 0, 0,
1203 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1204 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1205 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1206 for (i = 0; i < evaluate_to_size; i++)
1207 for (j = 1; j < parallel_count; j++)
1208 {
1209 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1210 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1211 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1212 }
1213 const int backward_to_size = compiled_data->backward.to_size;
1214 for (i = 0; i < backward_to_size; i++)
1215 for (j = 1; j < parallel_count; j++)
1216 {
1217 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1218 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1219 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1220 }
1221 }
1222 // Only use memory compression if we are in gradient parameter mode.
1223 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1224 {
1225 if (model->memory_compression)
1226 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1227 if (model->memory_reduction)
1228 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1229 }
1230 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1231 compiled_data->gradient_mode = gradient_mode;
1232}
1233
1234void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1235{
1236 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1236, __extension__ __PRETTY_FUNCTION__
); }))
;
1237 const int parameter_size = compiled_data->parameters->rnum;
1238 const int parallel_count = _ccv_cnnp_model_effective_parallel_count(model);
1239 compiled_data->parallel_count = parallel_count;
1240 const int internal_size = compiled_data->internals->rnum;
1241 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1242 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1243 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1244 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1245}
1246
1247int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1248{
1249 int i, j;
1250 const int parameter_size = compiled_data->parameters->rnum;
1251 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1252 const int internal_size = compiled_data->internals->rnum;
1253 for (i = 0; i < parameter_size; i++)
1254 {
1255 // parameters has to be allocated all together.
1256 if (compiled_data->tensors.parameters[i])
1257 {
1258 for (j = 1; j < parallel_count; j++)
1259 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1259, __extension__ __PRETTY_FUNCTION__
); }))
; }
1260 continue;
1261 }
1262 return 1;
1263 }
1264 for (i = 0; i < internal_size; i++)
1265 {
1266 if (!compiled_data->tensors.internals[i])
1267 return 1;
1268 for (j = 1; j < parallel_count; j++)
1269 if (!compiled_data->tensors.internals[i + j * internal_size])
1270 return 1;
1271 }
1272 return 0;
1273}
1274
1275void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1276{
1277 int i, j;
1278 const int parameter_size = compiled_data->parameters->rnum;
1279 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1280 compiled_data->parallel_count = parallel_count;
1281 const int internal_size = compiled_data->internals->rnum;
1282 for (i = 0; i < parameter_size; i++)
1283 {
1284 // parameters has to be allocated all together.
1285 if (compiled_data->tensors.parameters[i])
1286 {
1287 for (j = 1; j < parallel_count; j++)
1288 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1288, __extension__ __PRETTY_FUNCTION__
); }))
; }
1289 continue;
1290 }
1291 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1292 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1293 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1294 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1295 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1296 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1297 for (j = 1; j < parallel_count; j++)
1298 {
1299 if (j != device_id)
1300 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1301 else
1302 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1303 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1304 }
1305 }
1306 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1307 for (i = 0; i < internal_size; i++)
1308 {
1309 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1310 const int d = retained.d;
1311 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1312 continue;
1313 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1314 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1315 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1316 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1317 if (!compiled_data->tensors.internals[i])
1318 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1319 for (j = 1; j < parallel_count; j++)
1320 {
1321 if (j != device_id)
1322 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1323 else
1324 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1325 if (!compiled_data->tensors.internals[i + j * internal_size])
1326 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1327 }
1328 }
1329 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1330}
1331
1332static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1333{
1334 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1335 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1336}
1337
1338static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1339{
1340 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1340, __extension__ __PRETTY_FUNCTION__
); }))
;
1341 int i, j;
1342 for (i = 0; i < tensor_size; i++)
1343 {
1344 if (!tensors[i])
1345 continue;
1346 const int d = tensor_symbols[i].d;
1347 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1348 continue;
1349 for (j = 1; j < parallel_count; j++)
1350 if (tensors[i + j * tensor_size])
1351 {
1352 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1353 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1354 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1355 }
1356 }
1357}
1358
1359static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1360{
1361 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1361, __extension__ __PRETTY_FUNCTION__
); }))
;
1362 int i, j;
1363 for (i = 0; i < tensor_size; i++)
1364 {
1365 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1366 for (j = 1; j < parallel_count; j++)
1367 {
1368 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1369 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1370 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1371 { // We shouldn't allocate this, free it up.
1372 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1373 tensors[i + j * tensor_size] = 0;
1374 }
1375 }
1376 }
1377}
1378
1379static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1380{
1381 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1381, __extension__ __PRETTY_FUNCTION__
); }))
;
1382 int i, j;
1383 for (i = 0; i < tensor_size; i++)
1384 {
1385 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1386 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1387 continue;
1388 if (graph)
1389 {
1390 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1391 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1392 tensor_symbol = alias_to;
1393 }
1394 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1395 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1396 {
1397 const ccv_nnc_tensor_bind_t retained_bind = {
1398 .symbol = tensor_symbol,
1399 .tensor = tensor
1400 };
1401 ccv_array_push(tensor_binds, &retained_bind);
1402 }
1403 for (j = 1; j < parallel_count; j++)
1404 {
1405 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1406 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1407 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1408 {
1409 const ccv_nnc_tensor_bind_t bind = {
1410 .symbol = copy,
1411 .tensor = tensors[i + j * tensor_size]
1412 };
1413 ccv_array_push(tensor_binds, &bind);
1414 }
1415 }
1416 }
1417}
1418
1419static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1420{
1421 if (compiled_data->graph)
1422 ccv_nnc_graph_free(compiled_data->graph);
1423 compiled_data->graph = 0;
1424 compiled_data->is_test = 0;
1425 if (compiled_data->tensor_arena)
1426 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1427 compiled_data->tensor_arena = 0;
1428 if (compiled_data->graph_exec_arena)
1429 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1430 compiled_data->graph_exec_arena = 0;
1431 if (compiled_data->backward.from_ops)
1432 ccfreefree(compiled_data->backward.from_ops);
1433 compiled_data->backward.from_ops = 0;
1434 if (compiled_data->evaluate.schedule)
1435 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1436 compiled_data->evaluate.schedule = 0;
1437 if (compiled_data->backward.schedule)
1438 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1439 compiled_data->backward.schedule = 0;
1440}
1441
1442static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1443{
1444 if (compiled_data->gradients)
1445 ccfreefree(compiled_data->gradients);
1446 compiled_data->gradients = 0;
1447 if (compiled_data->updated_parameters)
1448 ccfreefree(compiled_data->updated_parameters);
1449 compiled_data->updated_parameters = 0;
1450 compiled_data->update_nodes = 0;
1451 compiled_data->saved_aux = 0;
1452}
1453
1454static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1455{
1456 if (compiled_data->backward.gradients)
1457 ccfreefree(compiled_data->backward.gradients);
1458 compiled_data->backward.gradients = 0;
1459 if (compiled_data->backward.accum)
1460 ccv_nnc_graph_free(compiled_data->backward.accum);
1461 compiled_data->backward.accum = 0;
1462 if (compiled_data->backward.tensor_arena)
1463 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1464 compiled_data->backward.tensor_arena = 0;
1465 if (compiled_data->backward.graph_exec_arena)
1466 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1467 compiled_data->backward.graph_exec_arena = 0;
1468}
1469
1470static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1471{
1472 if (compiled_data->apply_gradients.graph)
1473 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1474 compiled_data->apply_gradients.graph = 0;
1475 if (compiled_data->apply_gradients.tensor_arena)
1476 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1477 compiled_data->apply_gradients.tensor_arena = 0;
1478 if (compiled_data->apply_gradients.graph_exec_arena)
1479 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1480 compiled_data->apply_gradients.graph_exec_arena = 0;
1481}
1482
1483// Compile the graph to run ccv_cnnp_model_fit
1484static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1485{
1486 int i, j;
1487 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1488 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1488, __extension__ __PRETTY_FUNCTION__
); }))
;
1489 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1490 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1491 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1491, __extension__ __PRETTY_FUNCTION__
); }))
;
1492 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1492
, __extension__ __PRETTY_FUNCTION__); }))
;
1493 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1493, __extension__ __PRETTY_FUNCTION__
); }))
;
1494 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1495 {
1496 _ccv_cnnp_model_set_rewindables(model);
1497 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1498 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1499 _ccv_cnnp_model_rewind_graph(model);
1500 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1501 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1502 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1503 }
1504 const int tensors_init = !!compiled_data->tensors_init.v;
1505 if (!tensors_init)
1506 _ccv_cnnp_model_tensors_init(model, compiled_data);
1507 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1508 // Check if it is not fully allocated, if it is not, init_1.
1509 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1510 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1511 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1511, __extension__ __PRETTY_FUNCTION__); }))
;
1512 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1512, __extension__ __PRETTY_FUNCTION__); }))
;
1513 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1513
, __extension__ __PRETTY_FUNCTION__); }))
;
1514 const int input_size_per_p = input_size / parallel_count;
1515 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1516 const int output_size_per_p = output_size / parallel_count;
1517 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1518 const int fit_size_per_p = fit_size / parallel_count;
1519 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1520 const int parameter_size = compiled_data->parameters->rnum;
1521 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1522 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1523 const int internal_size = compiled_data->internals->rnum;
1524 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1525 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1526 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1527 ccv_array_free(tensor_binds);
1528 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1529 if (tensors_init && parallel_count > 1)
1530 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1531 // If tensor is not init'ed, we need to init states first.
1532 if (_ccv_cnnp_any_to_init(compiled_data))
1533 {
1534 ccv_nnc_tensor_init_states_t tensor_init_states = {
1535 .parallel_count = parallel_count,
1536 .graph = model->graph,
1537 .compiled_data = compiled_data,
1538 .tensor_arena = compiled_data->tensor_arena
1539 };
1540 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1541 }
1542 compiled_data->is_test = 0;
1543 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1544 // No need to set because it is default to training mode.
1545 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1546 for (i = 0; i < saved_aux_size * parameter_size; i++)
1547 {
1548 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1549 continue;
1550 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1551 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1552 for (j = 1; j < parallel_count; j++)
1553 {
1554 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1555 if (copy)
1556 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1557 }
1558 }
1559 const int evaluate_to_size = compiled_data->evaluate.to_size;
1560 compiled_data->evaluate.to_op_size = 0;
1561 for (i = 0; i < evaluate_to_size; i++)
1562 {
1563 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1564 if (to.graph)
1565 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1566 }
1567 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1568 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1569}
1570
1571ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1572{
1573 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1574 if (!compiled_data || !compiled_data->graph)
1575 return 0;
1576 return ccv_nnc_graph_default_stream(compiled_data->graph);
1577}
1578
1579uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1580{
1581 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1582 if (!compiled_data || !compiled_data->tensor_arena)
1583 return 0;
1584 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1585}
1586
1587static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1588{
1589 int i, j;
1590 for (i = 0; i < tensor_size; i++)
1591 {
1592 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1593 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1594 continue;
1595 if (graph)
1596 {
1597 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1598 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1599 tensor_symbol = alias_to;
1600 }
1601 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1602 for (j = 1; j < parallel_count; j++)
1603 {
1604 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1605 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1606 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1607 }
1608 }
1609}
1610
1611void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1612{
1613 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1614 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1614, __extension__ __PRETTY_FUNCTION__); }))
;
1615 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1616 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1616, __extension__ __PRETTY_FUNCTION__
); }))
;
1617 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1617, __extension__ __PRETTY_FUNCTION__
); }))
;
1618 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1618
, __extension__ __PRETTY_FUNCTION__); }))
;
1619 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1619, __extension__ __PRETTY_FUNCTION__); }))
;
1620 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1621 {
1622 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1623 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1624 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1625 // Compile the symbolic graph down only when needed.
1626 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1627 } else {
1628 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1628, __extension__ __PRETTY_FUNCTION__); }))
;
1629 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1629, __extension__ __PRETTY_FUNCTION__); }))
;
1630 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1630
, __extension__ __PRETTY_FUNCTION__); }))
;
1631 const int input_size_per_p = input_size / parallel_count;
1632 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1633 const int output_size_per_p = output_size / parallel_count;
1634 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1635 const int fit_size_per_p = fit_size / parallel_count;
1636 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1637 }
1638 if (compiled_data->is_test)
1639 {
1640 compiled_data->is_test = 0;
1641 ccv_nnc_graph_exec_update_t update = {
1642 .parallel_count = parallel_count,
1643 .graph = model->graph,
1644 .graph_exec_arena = compiled_data->graph_exec_arena,
1645 };
1646 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1647 }
1648 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1649}
1650
1651// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1652static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1653{
1654 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1655 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1656 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1657 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1657, __extension__ __PRETTY_FUNCTION__
); }))
;
1658 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1658, __extension__ __PRETTY_FUNCTION__
); }))
;
1659 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1660 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1661 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1662 {
1663 const int evaluate_to_size = compiled_data->evaluate.to_size;
1664 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1665 _ccv_cnnp_model_set_rewindables(model);
1666 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1667 0, 0,
1668 0, 0, 0,
1669 0, 0, 0,
1670 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1671 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1672 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1673 int i, j;
1674 for (i = 0; i < evaluate_to_size; i++)
1675 for (j = 1; j < parallel_count; j++)
1676 {
1677 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1678 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1679 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1680 }
1681 }
1682 const int tensors_init = !!compiled_data->tensors_init.v;
1683 if (!tensors_init)
1684 _ccv_cnnp_model_tensors_init(model, compiled_data);
1685 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1686 // Check if it is not fully allocated, if it is not, init_1.
1687 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1688 const int tensor_parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1689 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1690 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1690, __extension__ __PRETTY_FUNCTION__); }))
;
1691 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1691, __extension__ __PRETTY_FUNCTION__); }))
;
1692 const int input_size_per_p = input_size / parallel_count;
1693 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1694 const int output_size_per_p = output_size / parallel_count;
1695 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1696 const int parameter_size = compiled_data->parameters->rnum;
1697 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, tensor_parallel_count, tensor_binds);
1698 const int internal_size = compiled_data->internals->rnum;
1699 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count);
1700 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count, tensor_binds);
1701 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1702 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1703 ccv_array_free(tensor_binds);
1704 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1705 // If tensor is not init'ed, we need to init states first.
1706 if (tensors_init && tensor_parallel_count > 1)
1707 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, tensor_parallel_count);
1708 if (_ccv_cnnp_any_to_init(compiled_data))
1709 {
1710 ccv_nnc_tensor_init_states_t tensor_init_states = {
1711 .parallel_count = tensor_parallel_count,
1712 .graph = model->graph,
1713 .compiled_data = compiled_data,
1714 .tensor_arena = compiled_data->tensor_arena
1715 };
1716 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1717 }
1718 compiled_data->is_test = 1;
1719 ccv_nnc_graph_exec_update_t update = {
1720 .parallel_count = parallel_count,
1721 .graph = model->graph,
1722 .graph_exec_arena = compiled_data->graph_exec_arena,
1723 };
1724 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1725 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1726 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1727}
1728
1729static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1730{
1731 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1731, __extension__ __PRETTY_FUNCTION__
); }))
;
1732 const int parameter_size = compiled_data->parameters->rnum;
1733 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1734 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1735 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1736 int i, j;
1737 for (i = 0; i < parameter_size; i++)
1738 {
1739 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1740 {
1741 compiled_data->tensors.gradients[i] = 0;
1742 compiled_data->tensors.accum_gradients[i] = 0;
1743 for (j = 1; j < parallel_count; j++)
1744 {
1745 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1746 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1747 }
1748 continue;
1749 }
1750 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1751 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1752 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1753 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1754 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1755 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1756 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1757 for (j = 1; j < parallel_count; j++)
1758 {
1759 if (j != device_id)
1760 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1761 else
1762 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1763 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1764 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1765 }
1766 }
1767}
1768
1769static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1770{
1771 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1772 return 1;
1773 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1774 return 0;
1775 int i;
1776 for (i = 0; i < input_size; i++)
1777 if (!(disable_outgrad & ((uint64_t)1 << i)))
1778 return 0;
1779 return 1;
1780}
1781
1782// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1783// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1784static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1785{
1786 int i, j;
1787 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1788 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1789 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1789, __extension__ __PRETTY_FUNCTION__
); }))
;
1790 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1791 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1792 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1792, __extension__ __PRETTY_FUNCTION__
); }))
;
1793 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1793, __extension__ __PRETTY_FUNCTION__
); }))
;
1794 // There shouldn't be a loss function if we evaluate with multistage jit.
1795 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1795, __extension__ __PRETTY_FUNCTION__
); }))
;
1796 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1797 {
1798 _ccv_cnnp_model_set_rewindables(model);
1799 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1800 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1801 _ccv_cnnp_model_rewind_graph(model);
1802 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1803 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1804 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1805 }
1806 const int tensors_init = !!compiled_data->tensors_init.v;
1807 if (!tensors_init)
1808 _ccv_cnnp_model_tensors_init(model, compiled_data);
1809 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1810 // Check if it is not fully allocated, if it is not, init_1.
1811 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1812 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1813 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1813, __extension__ __PRETTY_FUNCTION__); }))
;
1814 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1814, __extension__ __PRETTY_FUNCTION__); }))
;
1815 const int input_size_per_p = input_size / parallel_count;
1816 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1817 const int output_size_per_p = output_size / parallel_count;
1818 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1819 const int parameter_size = compiled_data->parameters->rnum;
1820 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1821 const int internal_size = compiled_data->internals->rnum;
1822 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1823 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1824 if (!compiled_data->tensors.gradients)
1825 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1826 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1827 if (compiled_data->backward.to_size > 0)
1828 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1829 else
1830 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1831 ccv_array_free(tensor_binds);
1832 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1833 if (tensors_init && parallel_count > 1)
1834 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1835 // If tensor is not init'ed, we need to init states first.
1836 if (_ccv_cnnp_any_to_init(compiled_data))
1837 {
1838 ccv_nnc_tensor_init_states_t tensor_init_states = {
1839 .parallel_count = parallel_count,
1840 .graph = model->graph,
1841 .compiled_data = compiled_data,
1842 .tensor_arena = compiled_data->tensor_arena
1843 };
1844 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1845 }
1846 compiled_data->is_test = is_test;
1847 ccv_nnc_graph_exec_update_t update = {
1848 .parallel_count = parallel_count,
1849 .graph = model->graph,
1850 .graph_exec_arena = compiled_data->graph_exec_arena,
1851 };
1852 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1853 const int evaluate_to_size = compiled_data->evaluate.to_size;
1854 compiled_data->evaluate.to_op_size = 0;
1855 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1856 for (i = 0; i < evaluate_to_size; i++)
1857 {
1858 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1859 if (to_op.graph)
1860 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1861 const int* tos;
1862 int to_size;
1863 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1864 for (j = 0; j < to_size; j++)
1865 {
1866 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1867 .d = tos[j],
1868 .graph = model->graph
1869 });
1870 if (to_op.graph)
1871 ccv_array_add_unique_int(backward_from, to_op.d);
1872 }
1873 }
1874 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1874, __extension__
__PRETTY_FUNCTION__); }))
;
1875 compiled_data->backward.from_op_size = backward_from->rnum;
1876 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1877 for (i = 0; i < backward_from->rnum; i++)
1878 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1879 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1880 .graph = compiled_data->graph,
1881 };
1882 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1883 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1884 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1885 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1886 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1887 const int source_size = compiled_data->graph->sources->rnum;
1888 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1888, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1889 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1890 visited[(idx >> 5)] |= (1u << (idx & 31));
1891 } ccv_nnc_graph_visit_endfor} }
1892 ccv_nnc_graph_visit_free(visit);
1893 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1894 const int destination_size = compiled_data->graph->destinations->rnum;
1895 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1895, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1896 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1897 visited[(idx >> 5)] |= (1u << (idx & 31));
1898 } ccv_nnc_graph_visit_endfor} }
1899 ccv_nnc_graph_visit_free(visit);
1900 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1901 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1902 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1903 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1904 {
1905 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); }))
;
1906 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1907 ccv_array_add_unique_int(backward_from, idx);
1908 }
1909 } ccv_nnc_graph_visit_endfor} }
1910 ccv_nnc_graph_visit_free(visit);
1911 ccfreefree(visited);
1912 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1913 {
1914 compiled_data->backward.from_op_size = backward_from->rnum;
1915 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1916 for (i = 0; i < backward_from->rnum; i++)
1917 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1918 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1919 .graph = compiled_data->graph,
1920 };
1921 }
1922 ccv_array_free(backward_from);
1923 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1924 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1925}
1926
1927void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1928{
1929 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1930 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1930, __extension__ __PRETTY_FUNCTION__); }))
;
1931 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1932 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1932, __extension__ __PRETTY_FUNCTION__
); }))
;
1933 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1933, __extension__ __PRETTY_FUNCTION__
); }))
;
1934 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1934, __extension__ __PRETTY_FUNCTION__); }))
;
1935 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1936 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1937 if (!compiled_data->graph || mode_mismatch)
1938 {
1939 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1940 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1941 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1942 if (params.requires_grad)
1943 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1944 else
1945 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1946 } else {
1947 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1948 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1948, __extension__ __PRETTY_FUNCTION__); }))
;
1949 const int input_size_per_p = input_size / parallel_count;
1950 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1951 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1951, __extension__ __PRETTY_FUNCTION__); }))
;
1952 const int output_size_per_p = output_size / parallel_count;
1953 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1954 }
1955 if (compiled_data->is_test != params.is_test)
1956 {
1957 compiled_data->is_test = params.is_test;
1958 ccv_nnc_graph_exec_update_t update = {
1959 .parallel_count = parallel_count,
1960 .graph = model->graph,
1961 .graph_exec_arena = compiled_data->graph_exec_arena,
1962 };
1963 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1964 }
1965}
1966
1967void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1968{
1969 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1970 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1970, __extension__ __PRETTY_FUNCTION__); }))
;
1971 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1972 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1973 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1974 else {
1975 if (!compiled_data->evaluate.schedule)
1976 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1977 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1978 }
1979}
1980
1981// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1982// Particularly, this method compiles the accumulator graph.
1983static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1984{
1985 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1986 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1986, __extension__ __PRETTY_FUNCTION__); }))
;
1987 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1987, __extension__ __PRETTY_FUNCTION__
); }))
;
1988 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1989 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1990 const int parameter_size = compiled_data->parameters->rnum;
1991 int i, j;
1992 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1993 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1994 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1995 for (i = 0; i < parameter_size; i++)
1996 for (j = 0; j < parallel_count; j++)
1997 if (compiled_data->tensors.gradients[i + j * parameter_size])
1998 {
1999 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
2000 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
2001 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
2002 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2003 ccv_nnc_tensor_symbol_t inputs[2];
2004 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2005 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2006 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2007 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
2008 } else {
2009 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2010 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2011 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2012 }
2013 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
2014 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
2015 {
2016 ccv_nnc_symbolic_graph_free(accum);
2017 // Create empty graph.
2018 compiled_data->backward.accum = ccv_nnc_graph_new();
2019 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
2020 return;
2021 }
2022 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2023 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2024 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
2025 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2026 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
2027 ccv_nnc_symbolic_graph_free(accum);
2028 ccv_array_free(tensor_binds);
2029 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
2030}
2031
2032void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
2033{
2034 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2035 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2035, __extension__ __PRETTY_FUNCTION__); }))
;
2036 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2036, __extension__ __PRETTY_FUNCTION__
); }))
;
2037 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2038 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 2038, __extension__ __PRETTY_FUNCTION__
); }))
;
2039 if (outgrad_size > 0)
2040 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 2040, __extension__ __PRETTY_FUNCTION__
); }))
; }
2041 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2041, __extension__ __PRETTY_FUNCTION__); }))
;
2042 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2042, __extension__ __PRETTY_FUNCTION__
); }))
;
2043 const int parameter_size = compiled_data->parameters->rnum;
2044 // If we need to accumulate the gradients now, do jit on accumulator.
2045 if (compiled_data->backward.count > 0)
2046 {
2047 if (!compiled_data->backward.accum)
2048 _ccv_cnnp_model_multistage_jit_1(model);
2049 else if (compiled_data->backward.count == 1) {
2050 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2051 int i;
2052 for (i = 0; i < parameter_size * parallel_count; i++)
2053 {
2054 ccv_nnc_tensor_t* tensor;
2055 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2056 }
2057 if (compiled_data->backward.tensor_arena)
2058 {
2059 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2060 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2061 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2062 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2063 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2064 }
2065 }
2066 }
2067 const int ingrad_size_per_p = model->output_size;
2068 const int outgrad_size_per_p = compiled_data->outgrad_size;
2069 int i, j;
2070 for (i = 0; i < ingrad_size_per_p; i++)
2071 {
2072 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2073 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2074 {
2075 // Set it to 1 if it is not specified.
2076 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2077 if (ingrad_tensor)
2078 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2079 for (j = 1; j < parallel_count; j++)
2080 {
2081 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2082 if (ingrad_tensor)
2083 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2084 }
2085 } else {
2086 // Make sure the length matches, in case it is an alias.
2087 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2087, __extension__ __PRETTY_FUNCTION__
); }))
;
2088 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2089 for (j = 1; j < parallel_count; j++)
2090 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2091 }
2092 }
2093 if (outgrad_size > 0)
2094 {
2095 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2095, __extension__ __PRETTY_FUNCTION__
); }))
;
2096 for (i = 0; i < outgrad_size_per_p; i++)
2097 if (outgrads[i])
2098 {
2099 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2100 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2101 for (j = 1; j < parallel_count; j++)
2102 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2103 }
2104 } else {
2105 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2106, __extension__ __PRETTY_FUNCTION__
); }))
2106 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2106, __extension__ __PRETTY_FUNCTION__
); }))
;
2107 }
2108 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2109 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2110 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2111 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2112 if (!compiled_data->backward.schedule)
2113 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2114 // Run the backward pass.
2115 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2116 // If we need to run accumulation round, do that now.
2117 if (compiled_data->backward.count > 0)
2118 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2119 // Update the count, this determines whether we need to accumulate or not.
2120 ++compiled_data->backward.count;
2121}
2122
2123// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2124// Particularly, this method compiles the parameter update graph.
2125static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2126{
2127 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2128 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2128, __extension__ __PRETTY_FUNCTION__
); }))
;
2129 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2130 const int parameter_size = compiled_data->parameters->rnum;
2131 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2132 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2133 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2134 // Bind accumulated gradients.
2135 if (compiled_data->backward.count > 1)
2136 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2137 else
2138 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2139 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2140 int i, j;
2141 for (i = 0; i < compiled_data->backward.to_size; i++)
2142 {
2143 const int* tos;
2144 int to_size;
2145 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2146 for (j = 0; j < to_size; j++)
2147 {
2148 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2149 // gradients graph.
2150 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2151 .d = tos[j],
2152 .graph = model->graph,
2153 });
2154 if (!exec.graph)
2155 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2156 }
2157 }
2158 const int from_size = apply_gradients_from->rnum;
2159 if (from_size == 0)
2160 {
2161 ccv_array_free(apply_gradients_from);
2162 ccv_array_free(tensor_binds);
2163 return;
2164 }
2165 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2166 for (i = 0; i < from_size; i++)
2167 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2168 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2169 .graph = model->graph
2170 };
2171 ccv_array_free(apply_gradients_from);
2172 // It can only ends with updates on the parameters.
2173 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2174 for (i = 0; i < parameter_size; i++)
2175 {
2176 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2177 continue;
2178 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2179 for (j = 1; j < parallel_count; j++)
2180 {
2181 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2182 ccv_array_push(tos, &copy);
2183 }
2184 }
2185 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2186 ccv_array_free(tos);
2187 ccv_array_free(tensor_binds);
2188 ccfreefree(froms);
2189 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2190 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2191 {
2192 // Skip on no tensor.
2193 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2194 continue;
2195 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2196 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2197 for (j = 1; j < parallel_count; j++)
2198 {
2199 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2200 if (copy)
2201 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2202 }
2203 }
2204 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2205}
2206
2207void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2208{
2209 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2210 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2210, __extension__ __PRETTY_FUNCTION__); }))
;
2211 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2211, __extension__ __PRETTY_FUNCTION__
); }))
;
2212 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2213 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2213, __extension__ __PRETTY_FUNCTION__); }))
;
2214 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2214, __extension__ __PRETTY_FUNCTION__
); }))
;
2215 // Skip if there is no backward pass.
2216 if (compiled_data->backward.count <= 0)
2217 return;
2218 // Skip if there is no parameters.
2219 if (compiled_data->parameters->rnum == 0)
2220 {
2221 compiled_data->backward.count = 0;
2222 return;
2223 }
2224 if (!compiled_data->apply_gradients.graph)
2225 _ccv_cnnp_model_multistage_jit_2(model);
2226 else {
2227 const int parameter_size = compiled_data->parameters->rnum;
2228 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2229 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2230 if (compiled_data->backward.count > 1)
2231 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2232 else
2233 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2234 }
2235 if (compiled_data->apply_gradients.graph)
2236 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2237 // Reset backward count to 0.
2238 compiled_data->backward.count = 0;
2239}
2240
2241void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2242{
2243 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2244 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2245 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2245, __extension__ __PRETTY_FUNCTION__
); }))
;
2246 const int tensors_init = !!compiled_data->tensors_init.v;
2247 int this_tensor_init = tensors_init;
2248 if (!tensors_init)
2249 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2250 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2251 // Check if it is not fully allocated, if it is not, init_1.
2252 this_tensor_init = 0;
2253 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2254 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2255 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2256 if (param_ref < 0)
2257 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2257
, __extension__ __PRETTY_FUNCTION__); }))
; }
2258 else
2259 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2259, __extension__ __PRETTY_FUNCTION__
); }))
; }
2260 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2261 ccv_array_free(parameter_indices);
2262 const int parameter_size = compiled_data->parameters->rnum;
2263 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2263
, __extension__ __PRETTY_FUNCTION__); }))
;
2264 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2264, __extension__ __PRETTY_FUNCTION__
); }))
;
2265 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
2266 int i;
2267 if (!this_tensor_init)
2268 {
2269 if (compiled_data->tensors.parameters[d])
2270 {
2271 for (i = 1; i < parallel_count; i++)
2272 { assert(compiled_data->tensors.parameters[d + i * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[d + i *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[d + i * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[d + i * parameter_size]"
, "ccv_cnnp_model.c", 2272, __extension__ __PRETTY_FUNCTION__
); }))
; }
2273 this_tensor_init = 1;
2274 } else {
2275 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
;
2276 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2277 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2278 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2279 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2280 compiled_data->tensors.parameters[d] = ccv_nnc_tensor_new(0, info, 0);
2281 for (i = 1; i < parallel_count; i++)
2282 {
2283 if (i != device_id)
2284 CCV_TENSOR_SET_DEVICE_ID(info.type, i)(info.type) = (((info.type) & ~0xfff00) | (((i) & 0xfff
) << 8))
;
2285 else
2286 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2287 compiled_data->tensors.parameters[d + i * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2288 }
2289 }
2290 }
2291 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2292 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2292, __extension__
__PRETTY_FUNCTION__); }))
;
2293 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2294 for (i = 1; i < parallel_count; i++)
2295 {
2296 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2297 if (copy_tensor)
2298 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2299 }
2300 // Mark this symbol as init'ed.
2301 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2302 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2303 init_v[s >> 5] |= (1u << (s & 0x1f));
2304 // If we just allocated this tensor, now it is time to check if we need to mark it as fully allocated.
2305 if (!this_tensor_init)
2306 {
2307 if (ccv_cnnp_model_tensors_any_to_alloc(model, compiled_data))
2308 compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)compiled_data->tensors_init.v | (uintptr_t)1);
2309 else // Remove the flag.
2310 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2311 }
2312}
2313
2314void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2315{
2316 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2317 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2318 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2318, __extension__ __PRETTY_FUNCTION__
); }))
;
2319 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2319, __extension__ __PRETTY_FUNCTION__
); }))
;
2320 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2321 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2322 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2323 if (param_ref < 0)
2324 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2324
, __extension__ __PRETTY_FUNCTION__); }))
; }
2325 else
2326 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2326, __extension__ __PRETTY_FUNCTION__
); }))
; }
2327 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2328 ccv_array_free(parameter_indices);
2329 const int parameter_size = compiled_data->parameters->rnum;
2330 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2330
, __extension__ __PRETTY_FUNCTION__); }))
;
2331 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2331, __extension__ __PRETTY_FUNCTION__
); }))
;
2332 // We don't need to consider parallel_count, every parameter on each device is identical.
2333 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2334 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2334, __extension__
__PRETTY_FUNCTION__); }))
;
2335 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2336}
2337
2338ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2339{
2340 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2341 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2342 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2342, __extension__ __PRETTY_FUNCTION__
); }))
;
2343 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2343, __extension__ __PRETTY_FUNCTION__
); }))
;
2344 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2345 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2346 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2347 if (param_ref < 0)
2348 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2348
, __extension__ __PRETTY_FUNCTION__); }))
; }
2349 else
2350 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2350, __extension__ __PRETTY_FUNCTION__
); }))
; }
2351 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2352 ccv_array_free(parameter_indices);
2353 const int parameter_size = compiled_data->parameters->rnum;
2354 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2354
, __extension__ __PRETTY_FUNCTION__); }))
;
2355 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2355, __extension__ __PRETTY_FUNCTION__
); }))
;
2356 // We don't need to consider parallel_count, every parameter on each device is identical.
2357 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2358 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2358, __extension__
__PRETTY_FUNCTION__); }))
;
2359 return tensor->info;
2360}
2361
2362const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2363{
2364 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2365 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2366 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2366, __extension__ __PRETTY_FUNCTION__
); }))
;
2367 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2368 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2369 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2370 if (param_ref < 0)
2371 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2371
, __extension__ __PRETTY_FUNCTION__); }))
; }
2372 else
2373 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2373, __extension__ __PRETTY_FUNCTION__
); }))
; }
2374 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2375 ccv_array_free(parameter_indices);
2376 const int parameter_size = compiled_data->parameters->rnum;
2377 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2377
, __extension__ __PRETTY_FUNCTION__); }))
;
2378 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2378, __extension__ __PRETTY_FUNCTION__
); }))
;
2379 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2380}
2381
2382int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2383{
2384 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2384, __extension__ __PRETTY_FUNCTION__
); }))
;
2385 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2386 return compiled_data->parameters->rnum;
2387}
2388
2389uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2390{
2391 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2391, __extension__ __PRETTY_FUNCTION__
); }))
;
2392 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2393 const int parameter_size = compiled_data->parameters->rnum;
2394 int i;
2395 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2396 uint64_t size = 0;
2397 const int tensors_init = !!compiled_data->tensors_init.v;
2398 uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
: 0;
2399 for (i = 0; i < parameter_size; i++)
2400 {
2401 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2402 if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i])
2403 {
2404 ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info;
2405 size += ccv_nnc_tensor_data_size(params);
2406 continue;
2407 }
2408 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2409 .graph = graph,
2410 .d = d
2411 });
2412 size += ccv_nnc_tensor_data_size(params);
2413 }
2414 return size;
2415}
2416
2417int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type)
2418{
2419 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2419, __extension__ __PRETTY_FUNCTION__
); }))
;
2420 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2421 if (count != compiled_data->parameters->rnum)
2422 return 0;
2423 if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2424 CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) <<
8))
;
2425 int i;
2426 // We don't need to consider parallel_count, every parameter on each device is identical.
2427 for (i = 0; i < count; i++)
2428 {
2429 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2430 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2431 {
2432 tensors[i] = 0;
2433 continue;
2434 }
2435 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2436 if (tensor->info.type == type)
2437 tensors[i] = tensor;
2438 else {
2439 ccv_nnc_tensor_param_t info = tensor->info;
2440 info.type = type;
2441 tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet.
2442 }
2443 }
2444 for (i = 0; i < count; i++)
2445 {
2446 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2447 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2448 continue;
2449 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2450 // Now initiate transfer. We should do this one on a stream.
2451 if (tensor->info.type != type)
2452 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2453 }
2454 // Copy names and remove parameters.
2455 for (i = 0; i < count; i++)
2456 {
2457 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i];
2458 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2459 {
2460 names[i] = 0;
2461 continue;
2462 }
2463 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2464 const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof
(1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2465 names[i] = ccmallocmalloc(name_len + 1);
2466 names[i][name_len] = 0;
2467 memcpy(names[i], name, name_len);
2468 if (tensor->info.type == type)
2469 compiled_data->tensors.parameters[i] = 0; // Only move when it is moved.
2470 }
2471 return 1;
2472}
2473
2474KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2474
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Taking true branch
58
Assuming the condition is true
59
Assuming the condition is true
60
The value 1 is assigned to 'i'
61
Taking false branch
62
Assuming the condition is true
63
Assuming the condition is false
64
1st function call argument is an uninitialized value
2475
2476void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates)
2477{
2478 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2478, __extension__ __PRETTY_FUNCTION__
); }))
;
2479 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2480 int i;
2481 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2482 if (count != compiled_data->parameters->rnum)
2483 {
2484 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2485 // Build the map between name and the index.
2486 for (i = 0; i < count; i++)
2487 {
2488 int ret;
2489 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret);
2490 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2490
, __extension__ __PRETTY_FUNCTION__); }))
;
2491 kh_val(id_map, k)((id_map)->vals[k]) = i;
2492 }
2493 }
2494 const int parameter_size = compiled_data->parameters->rnum;
2495 int* copy_back = 0;
2496 const int tensors_init = !!compiled_data->tensors_init.v;
2497 if (!tensors_init)
2498 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2499 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2500 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2501 for (i = 0; i < parameter_size; i++)
2502 {
2503 int j = i;
2504 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2505 if (i >= 0 || strncmp(name, names[i], 1023) != 0)
2506 {
2507 // Build the map.
2508 if (id_map == 0)
2509 {
2510 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2511 for (j = 0; j < count; j++)
2512 {
2513 int ret;
2514 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret);
2515 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2515
, __extension__ __PRETTY_FUNCTION__); }))
;
2516 kh_val(id_map, k)((id_map)->vals[k]) = j;
2517 }
2518 }
2519 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name);
2520 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2521 continue;
2522 j = kh_val(id_map, k)((id_map)->vals[k]);
2523 }
2524 if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read.
2525 { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters
[i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t
)compiled_data->tensors.parameters[i] & (uintptr_t)1))
; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)"
, "ccv_cnnp_model.c", 2525, __extension__ __PRETTY_FUNCTION__
); }))
; }
2526 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
2527 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2528 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2529 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2530 const int d = parameter.d;
2531 if (info.type == tensors[j]->info.type && invalidates) // Can move.
2532 {
2533 // Deallocate it if needed.
2534 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2535 if (compiled_data->tensors.parameters[i])
2536 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2537 compiled_data->tensors.parameters[i] = tensors[j];
2538 tensors[j] = 0;
2539 } else {
2540 if (!compiled_data->tensors.parameters[i])
2541 { // Not allocated, to allocate first.
2542 // Create new one, make sure we create this by having the right parameters.
2543 const int type = info.type;
2544 info = tensors[j]->info;
2545 info.type = type; // Revert back the type.
2546 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
2547 }
2548 if (!copy_back)
2549 copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int));
2550 copy_back[i] = j + 1;
2551 }
2552 init_v[d >> 5] |= (1u << (d & 0x1f));
2553 // Create this tensor for other data parallel allocations.
2554 info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
2555 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2556 for (j = 1; j < parallel_count; j++)
2557 if (!compiled_data->tensors.parameters[i + j * parameter_size])
2558 {
2559 if (j != device_id)
2560 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
2561 else
2562 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2563 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2564 }
2565 // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
2566 }
2567 if (id_map)
2568 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2569 // Now do the transfer.
2570 if (copy_back)
2571 {
2572 for (i = 0; i < parameter_size; i++)
2573 {
2574 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[i]) & ~(uintptr_t)1))
;
2575 if (copy_back[i] == 0)
2576 continue;
2577 const int j = copy_back[i] - 1;
2578 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2579 }
2580 ccfreefree(copy_back);
2581 }
2582}
2583
2584ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2585{
2586 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2587 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2587, __extension__ __PRETTY_FUNCTION__); }))
;
2588 const int parameter_size = compiled_data->parameters->rnum;
2589 int i;
2590 for (i = 0; i < parameter_size; i++)
2591 {
2592 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2593 if (first(model, name, context))
2594 return ccv_cnnp_model_parameters(model, -1, i);
2595 }
2596 return 0;
2597}
2598
2599ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2600{
2601 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2602 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2602, __extension__ __PRETTY_FUNCTION__); }))
;
2603 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2604 const int parameter_size = compiled_data->parameters->rnum;
2605 int i;
2606 for (i = 0; i < parameter_size; i++)
2607 {
2608 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2609 if (filter(model, name, context))
2610 {
2611 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2612 ccv_array_push(parameters, &parameter);
2613 }
2614 }
2615 return parameters;
2616
2617}
2618
2619CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2620{
2621 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2622 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2622, __extension__ __PRETTY_FUNCTION__); }))
;
2623 const int tensors_init = !!compiled_data->tensors_init.v;
2624 if (!tensors_init) // If nothing initialized, we return parameter 0.
2625 return ccv_cnnp_model_parameters(model, -1, 0);
2626 const int parameter_size = compiled_data->parameters->rnum;
2627 int i;
2628 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2629 for (i = 0; i < parameter_size; i++)
2630 {
2631 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2632 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2633 return ccv_cnnp_model_parameters(model, -1, i);
2634 }
2635 return 0;
2636}
2637
2638static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2639{
2640 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2641 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2641, __extension__
__PRETTY_FUNCTION__); }))
;
2642 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2643 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2644 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2645 return to_parameter_indices;
2646}
2647
2648static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2649{
2650 // If the model is not compiled yet. Compile them now.
2651 if (!model->graph)
2652 {
2653 model->graph = ccv_nnc_symbolic_graph_new();
2654 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2654, __extension__ __PRETTY_FUNCTION__
); }))
;
2655 const int input_size = from_model->input_size;
2656 ccv_nnc_tensor_param_t input_params[input_size];
2657 int i;
2658 for (i = 0; i < input_size; i++)
2659 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2660 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2661 model->parallel_count = from_model->parallel_count;
2662 model->memory_compression = from_model->memory_compression;
2663 model->memory_reduction = from_model->memory_reduction;
2664 model->gradient_checkpointing = from_model->gradient_checkpointing;
2665 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2666 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2667 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2668 }
2669 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2670 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2670, __extension__ __PRETTY_FUNCTION__
); }))
;
2671 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2672 if (!to_tensors_init)
2673 {
2674 if (only_init_0)
2675 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2676 else
2677 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2678 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2679 // Check if it is not fully allocated, if it is not, init_1.
2680 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2681 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2681, __extension__ __PRETTY_FUNCTION__
); }))
;
2682 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2683 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2684 if (*from_param_ref < 0 && *param_ref >= 0)
2685 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2685, __extension__ __PRETTY_FUNCTION__
); }))
; }
2686 else if (*from_param_ref >= 0)
2687 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2687, __extension__ __PRETTY_FUNCTION__
); }))
; }
2688 if (*param_ref < 0 && *from_param_ref >= 0)
2689 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2689, __extension__ __PRETTY_FUNCTION__); }))
; }
2690 else if (*param_ref >= 0)
2691 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2691, __extension__ __PRETTY_FUNCTION__
); }))
; }
2692}
2693
2694void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2695{
2696 ccv_array_t* to_parameter_indices;
2697 int to_param_ref;
2698 ccv_array_t* from_parameter_indices;
2699 int from_param_ref;
2700 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2701 // Should be exactly the same tensor.
2702 if (to_param_ref < 0 && from_param_ref < 0)
2703 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2703, __extension__ __PRETTY_FUNCTION__
); }))
; }
2704 // To models.
2705 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2706 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2706, __extension__ __PRETTY_FUNCTION__
); }))
;
2707 // From models.
2708 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2709 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2710 const int to_parameter_size = to_compiled_data->parameters->rnum;
2711 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2712 int i, j;
2713 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2714 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2715 for (i = 0; i < rnum; i++)
2716 {
2717 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2718 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2718, __extension__ __PRETTY_FUNCTION__); }))
;
2719 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2719, __extension__ __PRETTY_FUNCTION__
); }))
;
2720 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2721 // If the original is not init'ed. We cannot copy from.
2722 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2723 continue;
2724 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2725 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2725, __extension__ __PRETTY_FUNCTION__); }))
;
2726 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2726, __extension__ __PRETTY_FUNCTION__
); }))
;
2727 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2728 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2728, __extension__
__PRETTY_FUNCTION__); }))
;
2729 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2730 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2730, __extension__
__PRETTY_FUNCTION__); }))
;
2731 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2732 for (j = 1; j < parallel_count; j++)
2733 {
2734 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2735 if (copy_tensor)
2736 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2737 }
2738 // Mark this symbol as init'ed.
2739 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2740 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2741 }
2742 ccv_array_free(to_parameter_indices);
2743 ccv_array_free(from_parameter_indices);
2744}
2745
2746void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2747{
2748 ccv_array_t* to_parameter_indices;
2749 int to_param_ref;
2750 ccv_array_t* from_parameter_indices;
2751 int from_param_ref;
2752 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2753 // Should be exactly the same tensor.
2754 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2755 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2755, __extension__ __PRETTY_FUNCTION__
); }))
; }
2756 // To models.
2757 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2758 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2758, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2759 // From models.
2760 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2761 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2762 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2762, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2763 const int from_parameter_size = from_compiled_data->parameters->rnum;
2764 const int to_parameter_size = to_compiled_data->parameters->rnum;
2765 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2766 int i, j;
2767 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2768 char* updated_name = 0;
2769 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2770 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2771 for (i = 0; i < rnum; i++)
2772 {
2773 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2774 // Need to figure out how to use the renamer here.
2775 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2776 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2776, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2777 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2777, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2778 if (renamer
18.1
'renamer' is non-null
)
2779 {
2780 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2781 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2782 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2783 updated_name = (char*)ccmallocmalloc(1024);
2784 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2785 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2786 memcpy(updated_name, src_name, src_name_len);
2787 updated_name[src_name_len] = 0;
2788 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2789 continue; // Skip this.
2790 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2791 {
2792 // Nothing changed.
2793 } else {
2794 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2795 {
2796 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2797 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is >= 'from_parameter_size'
55
Loop condition is false. Execution continues on line 2805
2798 {
2799 int ret;
2800 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
2801 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2801
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2802 kh_val(id_map, k)((id_map)->vals[k]) = j;
2803 }
2804 }
2805 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
56
Calling 'kh_get_ccv_cnnp_parameter_id'
2806 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2807 continue;
2808 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2809 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2809, __extension__ __PRETTY_FUNCTION__); }))
;
2810 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2810, __extension__
__PRETTY_FUNCTION__); }))
;
2811 }
2812 }
2813 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2813, __extension__ __PRETTY_FUNCTION__); }))
;
2814 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2814, __extension__
__PRETTY_FUNCTION__); }))
;
2815 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2816 // If the original is not init'ed. We cannot share from.
2817 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2818 continue;
2819 for (j = 0; j < parallel_count; j++)
2820 {
2821 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2822 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2822, __extension__
__PRETTY_FUNCTION__); }))
;
2823 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2824 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2825 ccv_nnc_tensor_free(dest);
2826 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2827 }
2828 // Mark this symbol as init'ed.
2829 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2830 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2831 }
2832 ccv_array_free(to_parameter_indices);
2833 ccv_array_free(from_parameter_indices);
2834 if (id_map)
2835 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2836 if (updated_name)
2837 ccfreefree(updated_name);
2838 // Mark it as incomplete so we will call init_1.
2839 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2840 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2841 else // Remove the flag.
2842 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2843}
2844
2845ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2846{
2847 if (!compiled_data->stream_map)
2848 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2849 int ret = 0;
2850 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2851 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2851, __extension__ __PRETTY_FUNCTION__); }))
;
2852 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2853 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2854 if (ret != 0)
2855 {
2856 stream = ccv_nnc_stream_context_new(type);
2857 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2858 }
2859 return stream;
2860}
2861
2862void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2863{
2864 ccv_array_t* to_parameter_indices;
2865 int to_param_ref;
2866 ccv_array_t* from_parameter_indices;
2867 int from_param_ref;
2868 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2869 // Should be exactly the same tensor.
2870 if (to_param_ref < 0 && from_param_ref < 0)
2871 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2871, __extension__ __PRETTY_FUNCTION__
); }))
; }
2872 // To models.
2873 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2874 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2874, __extension__ __PRETTY_FUNCTION__
); }))
;
2875 // From models.
2876 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2877 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2878 const int to_parameter_size = to_compiled_data->parameters->rnum;
2879 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2880 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2880, __extension__ __PRETTY_FUNCTION__
); }))
;
2881 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2881, __extension__ __PRETTY_FUNCTION__
); }))
;
2882 int i, j;
2883 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2884 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2885 for (i = 0; i < aux_in_size; i++)
2886 inputs[i + 2] = aux_ins[i];
2887 for (i = 0; i < aux_out_size; i++)
2888 outputs[i + 1] = aux_outs[i];
2889 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2890 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2891 for (i = 0; i < rnum; i++)
2892 {
2893 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2894 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2894, __extension__ __PRETTY_FUNCTION__); }))
;
2895 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2895, __extension__ __PRETTY_FUNCTION__
); }))
;
2896 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2897 // If the original is not init'ed. We cannot copy from.
2898 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2899 continue;
2900 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2901 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2901, __extension__ __PRETTY_FUNCTION__); }))
;
2902 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2902, __extension__ __PRETTY_FUNCTION__
); }))
;
2903 if (parallel_count > 1)
2904 {
2905 ccv_nnc_stream_context_t* streams[parallel_count];
2906 ccv_nnc_stream_signal_t* signal;
2907 if (stream_context)
2908 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2909 for (j = 0; j < parallel_count; j++)
2910 {
2911 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2912 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2913 if (!dest || !src)
2914 {
2915 streams[j] = 0;
2916 continue;
2917 }
2918 // At the moment, can only handle them on the same device.
2919 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2919, __extension__ __PRETTY_FUNCTION__
); }))
;
2920 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2920, __extension__ __PRETTY_FUNCTION__
); }))
;
2921 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2922 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2923 int type = stream_type;
2924 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2925 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2926 // Wait signal to finish.
2927 if (stream_context)
2928 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2929 inputs[0] = outputs[0] = dest;
2930 inputs[1] = src;
2931 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2932 if (stream_context)
2933 {
2934 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2935 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2936 }
2937 streams[j] = stream_0;
2938 }
2939 // If this should be blocking, blocking it.
2940 if (!stream_context)
2941 for (j = 0; j < parallel_count; j++)
2942 if (streams[j])
2943 ccv_nnc_stream_context_wait(streams[j]);
2944 } else {
2945 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2946 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2946, __extension__
__PRETTY_FUNCTION__); }))
;
2947 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2948 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2948, __extension__
__PRETTY_FUNCTION__); }))
;
2949 inputs[0] = outputs[0] = dest;
2950 inputs[1] = src;
2951 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2952 }
2953 // Mark this symbol as init'ed.
2954 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2955 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2956 }
2957 ccv_array_free(to_parameter_indices);
2958 ccv_array_free(from_parameter_indices);
2959}
2960
2961void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2962{
2963 int to_param_ref;
2964 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2965 // To models.
2966 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2967 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2967, __extension__ __PRETTY_FUNCTION__
); }))
;
2968 // Tensor has to be inited already.
2969 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2969, __extension__ __PRETTY_FUNCTION__
); }))
;
2970 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2970, __extension__ __PRETTY_FUNCTION__
); }))
;
2971 // From models.
2972 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2973 const int to_parameter_size = to_compiled_data->parameters->rnum;
2974 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2975 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2975, __extension__ __PRETTY_FUNCTION__
); }))
;
2976 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2976, __extension__ __PRETTY_FUNCTION__
); }))
;
2977 int i, j;
2978 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2979 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2980 for (i = 0; i < aux_in_size; i++)
2981 inputs[i + 1] = aux_ins[i];
2982 for (i = 0; i < aux_out_size; i++)
2983 outputs[i + 1] = aux_outs[i];
2984 for (i = 0; i < rnum; i++)
2985 {
2986 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2987 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2987, __extension__ __PRETTY_FUNCTION__); }))
;
2988 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2988, __extension__ __PRETTY_FUNCTION__
); }))
;
2989 if (parallel_count > 1)
2990 {
2991 ccv_nnc_stream_context_t* streams[parallel_count];
2992 ccv_nnc_stream_signal_t* signal;
2993 if (stream_context)
2994 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2995 for (j = 0; j < parallel_count; j++)
2996 {
2997 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2998 if (!dest)
2999 {
3000 streams[j] = 0;
3001 continue;
3002 }
3003 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3004 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3005 int type = stream_type;
3006 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3007 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3008 // Wait signal to finish.
3009 if (stream_context)
3010 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3011 inputs[0] = outputs[0] = dest;
3012 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3013 if (stream_context)
3014 {
3015 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3016 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3017 }
3018 streams[j] = stream_0;
3019 }
3020 // If this should be blocking, blocking it.
3021 if (!stream_context)
3022 for (j = 0; j < parallel_count; j++)
3023 if (streams[j])
3024 ccv_nnc_stream_context_wait(streams[j]);
3025 } else {
3026 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
3027 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3027, __extension__
__PRETTY_FUNCTION__); }))
;
3028 inputs[0] = outputs[0] = dest;
3029 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3030 }
3031 // No need to mark this symbol as init'ed, it is already.
3032 }
3033 ccv_array_free(to_parameter_indices);
3034}
3035
3036void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
3037{
3038 int to_param_ref;
3039 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3040 // To models.
3041 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
3042 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 3042, __extension__ __PRETTY_FUNCTION__
); }))
;
3043 // Tensor has to be inited already.
3044 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 3044, __extension__ __PRETTY_FUNCTION__
); }))
;
3045 ccv_nnc_tensor_t** tensor_gradients;
3046 if (to_compiled_data->backward.count > 1)
3047 tensor_gradients = to_compiled_data->tensors.accum_gradients;
3048 else
3049 tensor_gradients = to_compiled_data->tensors.gradients;
3050 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 3050, __extension__ __PRETTY_FUNCTION__
); }))
;
3051 // From models.
3052 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3053 const int to_parameter_size = to_compiled_data->parameters->rnum;
3054 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3055 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 3055, __extension__ __PRETTY_FUNCTION__
); }))
;
3056 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 3056, __extension__ __PRETTY_FUNCTION__
); }))
;
3057 int i, j;
3058 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
3059 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
3060 for (i = 0; i < aux_in_size; i++)
3061 inputs[i + 1] = aux_ins[i];
3062 for (i = 0; i < aux_out_size; i++)
3063 outputs[i + 1] = aux_outs[i];
3064 for (i = 0; i < rnum; i++)
3065 {
3066 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3067 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3067, __extension__ __PRETTY_FUNCTION__); }))
;
3068 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3068, __extension__ __PRETTY_FUNCTION__
); }))
;
3069 if (parallel_count > 1)
3070 {
3071 ccv_nnc_stream_context_t* streams[parallel_count];
3072 ccv_nnc_stream_signal_t* signal;
3073 if (stream_context)
3074 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
3075 for (j = 0; j < parallel_count; j++)
3076 {
3077 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
3078 if (!dest)
3079 {
3080 streams[j] = 0;
3081 continue;
3082 }
3083 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3084 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3085 int type = stream_type;
3086 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3087 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3088 // Wait signal to finish.
3089 if (stream_context)
3090 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3091 inputs[0] = outputs[0] = dest;
3092 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3093 if (stream_context)
3094 {
3095 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3096 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3097 }
3098 streams[j] = stream_0;
3099 }
3100 // If this should be blocking, blocking it.
3101 if (!stream_context)
3102 for (j = 0; j < parallel_count; j++)
3103 if (streams[j])
3104 ccv_nnc_stream_context_wait(streams[j]);
3105 } else {
3106 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
3107 if (!dest)
3108 continue;
3109 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3109, __extension__
__PRETTY_FUNCTION__); }))
;
3110 inputs[0] = outputs[0] = dest;
3111 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3112 }
3113 // No need to mark this symbol as init'ed, it is already.
3114 }
3115 ccv_array_free(to_parameter_indices);
3116}
3117
3118void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
3119{
3120 // Only CUDA backend has this feature.
3121#ifdef HAVE_CUDA1
3122 int to_param_ref;
3123 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3124 // To models.
3125 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3126 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3126, __extension__ __PRETTY_FUNCTION__); }))
;
3127 // Tensor has to be inited already.
3128 assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0)
, __extension__ ({ if (!!compiled_data->tensors_init.v) ; else
__assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c"
, 3128, __extension__ __PRETTY_FUNCTION__); }))
;
3129 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 3129, __extension__ __PRETTY_FUNCTION__
); }))
;
3130 // From models.
3131 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3132 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3133 int i;
3134 for (i = 0; i < rnum; i++)
3135 {
3136 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3137 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3137, __extension__ __PRETTY_FUNCTION__); }))
;
3138 assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3138, __extension__ __PRETTY_FUNCTION__
); }))
;
3139 if (parallel_count > 1)
3140 {
3141 assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect."
) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect."
) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\""
, "ccv_cnnp_model.c", 3141, __extension__ __PRETTY_FUNCTION__
); }))
;
3142 } else {
3143 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[dest_d]) & ~(uintptr_t)1))
;
3144 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 3144, __extension__
__PRETTY_FUNCTION__); }))
;
3145 ccv_nnc_tensor_param_t params = src->info;
3146 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY)
3147 continue;
3148 const size_t size = ccv_nnc_tensor_data_size(params);
3149 if (size <= 0)
3150 continue;
3151 const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1);
3152 const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0);
3153 ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t));
3154 tensor->dataof = 0;
3155 tensor->alias_ref = 0;
3156 tensor->sig = 0;
3157 tensor->refcount = 1;
3158 tensor->info = params;
3159 if (tfb)
3160 {
3161 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2];
3162 // This corresponding to mat->step
3163 tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype
) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12
] * (((((params.datatype) & 0xFF000) | params.dim[2])) &
0xFFF) + 3) & -4)
;
3164 } else // This won't be recognized by ccv_dense_matrix_t
3165 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000);
3166 // Remove this flag so it can be deallocated as usual.
3167 tensor->type &= ~CCV_NO_DATA_ALLOC;
3168 assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY
) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00
) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY"
, "ccv_cnnp_model.c", 3168, __extension__ __PRETTY_FUNCTION__
); }))
;
3169 void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size);
3170 if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
3171 {
3172 tensor->data.u8 = (uint8_t*)ptr;
3173 tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
3174 } else {
3175 // Allocation failed.
3176 ccfreefree(tensor);
3177 continue;
3178 }
3179 // TODO: Cannot run this on the stream context yet, due to allocation and deallocations.
3180 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0);
3181 cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size);
3182 compiled_data->tensors.parameters[dest_d] = tensor;
3183 // Can free out the old one.
3184 if (should_free)
3185 ccv_nnc_tensor_free(src);
3186 }
3187 // No need to mark this symbol as init'ed, it is already.
3188 }
3189 ccv_array_free(to_parameter_indices);
3190#endif
3191}
3192
3193ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
3194{
3195 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3196 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3196, __extension__ __PRETTY_FUNCTION__); }))
;
3197 return compiled_data->minimize.minimizer;
3198}
3199
3200void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
3201{
3202 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3203 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3203, __extension__ __PRETTY_FUNCTION__); }))
;
3204 const int parameter_size = compiled_data->parameters->rnum;
3205 if (parameter_size == 0)
3206 return;
3207 if (reset)
3208 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 3208, __extension__ __PRETTY_FUNCTION__
); }))
; }
3209 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3210 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
3211 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
3212 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
3213 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3214 // We update all parameters, at this point, we have one minimizer.
3215 if (set_parameters == 0 || set_parameter_size == 0)
3216 compiled_data->minimize.minimizer = minimizer;
3217 int i;
3218 if (set_parameters && set_parameter_size)
3219 {
3220 // I need to save what's the minimizer along with this.
3221 if (!compiled_data->minimize.parameters)
3222 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
3223 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
3224 set_minimizer_for_parameter->minimizer = minimizer;
3225 set_minimizer_for_parameter->parameter_size = set_parameter_size;
3226 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
3227 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
3228 }
3229 // If reset is true, clear the parameters array.
3230 if (reset && compiled_data->minimize.parameters)
3231 {
3232 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3233 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3234 ccv_array_clear(compiled_data->minimize.parameters);
3235 }
3236 if (!compiled_data->update_nodes)
3237 return;
3238 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
3239 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 3239, __extension__ __PRETTY_FUNCTION__); }))
;
3240 if (saved_aux_size > old_max_saved_aux_size)
3241 {
3242 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 3242, __extension__ __PRETTY_FUNCTION__
); }))
;
3243 // Reallocate first, move them around later.
3244 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
3245 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
3246 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
3247 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
3248 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
3249 }
3250 int flag = 0;
3251 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3252 if (set_parameters && set_parameter_size)
3253 {
3254 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
3255 for (i = 0; i < set_parameter_size; i++)
3256 {
3257 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
3258 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 3258, __extension__ __PRETTY_FUNCTION__
); }))
;
3259 const int old_rnum = parameter_indices->rnum;
3260 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
3261 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
3262 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 3262, __extension__ __PRETTY_FUNCTION__
); }))
;
3263 if (param_ref >= 0)
3264 {
3265 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 3265, __extension__ __PRETTY_FUNCTION__
); }))
;
3266 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
3267 parameter_indices->rnum = old_rnum + 1;
3268 }
3269 }
3270 // We may have duplicated indices, but that is OK, we will set it twice.
3271 for (i = 0; i < parameter_indices->rnum; i++)
3272 {
3273 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
3274 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
3275 flag = 1;
3276 }
3277 ccv_array_free(parameter_indices);
3278 } else {
3279 for (i = 0; i < parameter_size; i++)
3280 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
3281 flag = 1;
3282 if (compiled_data->minimize.parameters)
3283 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
3284 flag = 1;
3285 }
3286 if (flag)
3287 {
3288 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
3289 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
3290 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3291 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3292 }
3293}
3294
3295void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
3296{
3297 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3298 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3298, __extension__ __PRETTY_FUNCTION__); }))
;
3299 compiled_data->compile_params = compile_params;
3300}
3301
3302void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
3303{
3304 if (model->graph && out_size > 0)
3305 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
3306 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
3307 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
3308 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
3309 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
3310 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
3311 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
3312}
3313
3314void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
3315{
3316 if (model->graph)
3317 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
3318}
3319
3320static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
3321{
3322 int i;
3323 const int parameter_size = compiled_data->parameters->rnum;
3324 ccv_array_free(compiled_data->parameters);
3325 if (compiled_data->parameter_flags)
3326 ccfreefree(compiled_data->parameter_flags);
3327 const int internal_size = compiled_data->internals->rnum;
3328 ccv_array_free(compiled_data->internals);
3329 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 3329, __extension__ __PRETTY_FUNCTION__
); }))
;
3330 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 3330, __extension__ __PRETTY_FUNCTION__
); }))
;
3331 for (i = 0; i < parameter_size; i++)
3332 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3333 ccv_array_free(compiled_data->ids.parameters);
3334 for (i = 0; i < internal_size; i++)
3335 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3336 ccv_array_free(compiled_data->ids.internals);
3337 const int parallel_count = compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_root_parallel_count(model);
3338 if (compiled_data->tensors.parameters)
3339 {
3340 for (i = 0; i < parameter_size * parallel_count; i++)
3341 // If it is not marked as not belonging, we can free it.
3342 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3343 if (compiled_data->tensors.parameters[i])
3344 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3345 for (i = 0; i < internal_size * parallel_count; i++)
3346 if (compiled_data->tensors.internals[i])
3347 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3348 ccfreefree(compiled_data->tensors.parameters);
3349 }
3350 if (compiled_data->tensors.gradients)
3351 {
3352 for (i = 0; i < parameter_size * parallel_count; i++)
3353 {
3354 if (compiled_data->tensors.gradients[i])
3355 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3356 if (compiled_data->tensors.accum_gradients[i])
3357 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3358 }
3359 ccfreefree(compiled_data->tensors.gradients);
3360 }
3361 if (compiled_data->minimize.parameters)
3362 {
3363 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3364 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3365 ccv_array_free(compiled_data->minimize.parameters);
3366 }
3367 if (compiled_data->rewindables)
3368 ccv_array_free(compiled_data->rewindables);
3369 if (compiled_data->tensors_init.v)
3370 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3371 if (compiled_data->evaluate.tos)
3372 ccfreefree(compiled_data->evaluate.tos);
3373 compiled_data->evaluate.tos = 0;
3374 if (compiled_data->stream_map)
3375 {
3376 khiter_t k;
3377 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3378 {
3379 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3380 continue;
3381 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3382 ccv_nnc_stream_context_free(stream);
3383 }
3384 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3385 }
3386 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3387 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3388 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3389 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3390 if (compiled_data->gradient_checkpoints)
3391 {
3392 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3393 {
3394 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3395 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3395, __extension__ __PRETTY_FUNCTION__
); }))
;
3396 ccfreefree(checkpoint->inputs);
3397 ccv_array_free(checkpoint->tensor_symbols);
3398 }
3399 ccv_array_free(compiled_data->gradient_checkpoints);
3400 }
3401 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3402 ccfreefree(compiled_data);
3403}
3404
3405void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3406{
3407 ccv_cnnp_model_deinit(model);
3408 if (model->isa->dealloc)
3409 model->isa->dealloc(model);
3410 if (model->io)
3411 {
3412 int i;
3413 for (i = 0; i < model->io->rnum; i++)
3414 {
3415 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3416 if (model_io->outgoings)
3417 ccv_array_free(model_io->outgoings);
3418 if (model_io->incomings)
3419 ccv_array_free(model_io->incomings);
3420 if (model_io->dependencies)
3421 ccv_array_free(model_io->dependencies);
3422 ccfreefree(model_io);
3423 }
3424 ccv_array_free(model->io);
3425 }
3426 if (model->parameter_indices)
3427 ccv_array_free(model->parameter_indices);
3428 if (model->inputs)
3429 ccfreefree(model->inputs);
3430 if (model->graph)
3431 ccv_nnc_symbolic_graph_free(model->graph);
3432 if (model->compiled_data)
3433 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3434 if (model->name)
3435 ccfreefree(model->name);
3436 ccfreefree(model);
3437}
3438
3439void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3440{
3441 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3442 if (!compiled_data)
3443 return;
3444 if (compiled_data->graph)
3445 ccv_nnc_graph_cancel(compiled_data->graph);
3446 if (compiled_data->apply_gradients.graph)
3447 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3448}
3449
3450void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3451{
3452 model->exec_flags = flags;
3453}
3454
3455int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3456{
3457 return model->exec_flags;
3458}