Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2522, column 8
Array access (via field 'vals') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2026-04-29-094735-1282740-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7#include "_ccv_nnc_symbolic_graph.h"
8#ifdef HAVE_CUDA1
9#include "gpu/ccv_nnc_compat.h"
10#endif
11
12// MARK - Level-5 API
13
14ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
15{
16 if (!model->io)
17 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
18 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
19 model_io->param_ref = 0;
20 model_io->param_sel = 0;
21 model_io->visit = 0;
22 model_io->model = model;
23 model_io->dependencies = 0;
24 model_io->dependents = 0;
25 model_io->outgoings = 0;
26 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
27 ccv_array_push(model->io, &model_io);
28 if (input_size > 0)
29 {
30 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
31 ccv_array_resize(model_io->incomings, input_size);
32 int i;
33 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
34 for (i = 0; i < input_size; i++)
35 {
36 if (!inputs[i]->outgoings)
37 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
38 ccv_array_push(inputs[i]->outgoings, &model_io);
39 }
40 } else {
41 model_io->incomings = 0;
42 }
43 return model_io;
44}
45
46void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
47{
48 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 48, __extension__ __PRETTY_FUNCTION__);
}))
;
49 if (!model_io->dependencies)
50 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
51 int i, j;
52 for (i = 0; i < dependency_size; i++)
53 {
54 int flag = 0;
55 // Check if it is already exist or not.
56 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
57 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
58 flag = 1;
59 if (flag)
60 continue;
61 ccv_array_push(model_io->dependencies, dependencies + i);
62 ++dependencies[i]->dependents;
63 }
64}
65
66int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
67{
68 return model->output_size;
69}
70
71int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
72{
73 // If the model is compiled, it is default to 1 unless it is not.
74 if (model->compiled_data)
75 return model->is_trainable >= 0 ? model->is_trainable : 1;
76 return model->is_trainable;
77}
78
79ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
80{
81 if (!model->io)
82 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
83 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
84 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
85 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
86 model_io->visit = 0;
87 model_io->model = model;
88 model_io->outputs = 0;
89 model_io->dependencies = 0;
90 model_io->dependents = 0;
91 model_io->incomings = 0;
92 model_io->outgoings = 0;
93 ccv_array_push(model->io, &model_io);
94 return model_io;
95}
96
97void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
98{
99 model->notify_hook.func = func;
100 model->notify_hook.context = context;
101}
102
103void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
104{
105 if (model->notify_hook.func)
106 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
107 if (model->isa->notify)
108 model->isa->notify(model, tag, payload);
109}
110
111static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
112{
113 int i, j;
114 for (i = 0; i < graph_exec_symbol_size; i++)
115 {
116 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
117 // Check whether this tensor symbol has any duplicate.
118 for (j = i + 1; j < graph_exec_symbol_size;)
119 {
120 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
121 // If there is a same tensor symbol, remove it.
122 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
123 {
124 if (j + 1 < graph_exec_symbol_size)
125 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
126 --graph_exec_symbol_size;
127 continue;
128 }
129 ++j;
130 }
131 }
132 return graph_exec_symbol_size;
133}
134
135void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
136{
137 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
138 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
139 int i;
140 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
141 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
142 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
143 {
144 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
145 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
146 {
147 // Only add to parameter_indices if it is trainable.
148 if (add_to_array_context->add_parameter_indices)
149 ccv_array_add_unique_int(model->parameter_indices, i);
150 // Found it, return, don't add it.
151 return;
152 }
153 }
154 // Only add to parameter_indices if it is trainable.
155 if (add_to_array_context->add_parameter_indices)
156 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
157 // This is a new one, no need to add_unique_int, it is unique.
158 ccv_array_push(add_to_array_context->symbols, &symbol);
159 if (add_to_array_context->trainables)
160 ccv_array_push(add_to_array_context->trainables, &is_trainable);
161 char id[2048];
162 id[0] = add_to_array_context->prefix;
163 id[1] = '-';
164 int total_len = 2;
165 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
166 {
167 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
168 int len;
169 if (name->name && name->name[0] != '\0')
170 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
171 else
172 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
173 total_len += len;
174 if (total_len >= 2047)
175 break;
176 }
177 if (total_len < 2047)
178 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
179 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 179, __extension__ __PRETTY_FUNCTION__)
; }))
;
180 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
181 memcpy(heap_id, id, total_len + 1);
182 ccv_array_push(add_to_array_context->ids, &heap_id);
183 ++add_to_array_context->sequence->it;
184}
185
186static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
187{
188 compiled_data->f = compiled_data->fits + output_size;
189 compiled_data->xpu_alloc.mp_hdr = -1;
190 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
191 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
192 compiled_data->gradient_checkpoints = gradient_checkpoints;
193}
194
195static int _ccv_cnnp_model_root_parallel_count(const ccv_cnnp_model_t* const model)
196{
197 return ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
198}
199
200static int _ccv_cnnp_model_effective_parallel_count(const ccv_cnnp_model_t* const model)
201{
202 int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
203 if (model->graph && model->graph->data_parallel.count > parallel_count)
204 parallel_count = model->graph->data_parallel.count;
205 return parallel_count;
206}
207
208static int _ccv_cnnp_compiled_data_parallel_count(const ccv_cnnp_model_t* const model, const ccv_cnnp_compiled_data_t* const compiled_data)
209{
210 return compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_effective_parallel_count(model);
211}
212
213ccv_nnc_tensor_symbol_t ccv_cnnp_model_get_symbol(ccv_cnnp_model_t* const self, const ccv_nnc_tensor_symbol_t symbol)
214{
215 assert(self->data)((void) sizeof ((self->data) ? 1 : 0), __extension__ ({ if
(self->data) ; else __assert_fail ("self->data", "ccv_cnnp_model.c"
, 215, __extension__ __PRETTY_FUNCTION__); }))
;
216 ccv_cnnp_model_build_data_t* const build_data = (ccv_cnnp_model_build_data_t*)self->data;
217 if (build_data->parallel_count <= 1 || build_data->parallel_rank == 0)
218 return symbol;
219 const int rank = build_data->parallel_rank;
220 assert(rank > 0)((void) sizeof ((rank > 0) ? 1 : 0), __extension__ ({ if (
rank > 0) ; else __assert_fail ("rank > 0", "ccv_cnnp_model.c"
, 220, __extension__ __PRETTY_FUNCTION__); }))
;
221 assert(rank < build_data->parallel_count)((void) sizeof ((rank < build_data->parallel_count) ? 1
: 0), __extension__ ({ if (rank < build_data->parallel_count
) ; else __assert_fail ("rank < build_data->parallel_count"
, "ccv_cnnp_model.c", 221, __extension__ __PRETTY_FUNCTION__)
; }))
;
222 ccv_nnc_symbolic_graph_t* const graph = (ccv_nnc_symbolic_graph_t*)symbol.graph;
223 ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, symbol, rank);
224 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
225 return copy;
226 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, symbol);
227 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY)
228 CCV_TENSOR_SET_DEVICE_ID(params.type, rank)(params.type) = (((params.type) & ~0xfff00) | (((rank) &
0xfff) << 8))
;
229 copy = ccv_nnc_tensor_symbol_new(graph, params, 0);
230 ccv_nnc_tensor_symbol_set_copy(graph, symbol, rank, copy);
231 return copy;
232}
233
234typedef struct {
235 void* old_graph_exec_symbol_new_hook_context;
236 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
237 ccv_nnc_symbolic_graph_t* graph;
238 ccv_cnnp_model_build_data_t* build_data;
239} ccv_cnnp_model_set_exec_flags_context_t;
240
241static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
242{
243 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
244 if (flags_context->build_data->exec_flags)
245 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
246 if (flags_context->old_graph_exec_symbol_new_hook)
247 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
248}
249
250static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
251{
252 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 252, __extension__ __PRETTY_FUNCTION__); }))
;
253 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
254 int i;
255 for (i = 0; i < input_size; i++)
256 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
257 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
258 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
259 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
260 ccv_cnnp_model_sequence_t model_sequence = {
261 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
262 };
263 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
264 .add_parameter_indices = 1,
265 .prefix = 't',
266 .sequence = &model_sequence,
267 .symbols = parameters,
268 .ids = parameter_ids,
269 .trainables = parameter_trainables,
270 };
271 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
272 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
273 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
274 .add_parameter_indices = 0,
275 .prefix = 'r',
276 .sequence = &model_sequence,
277 .symbols = internals,
278 .ids = internal_ids,
279 .trainables = 0,
280 };
281 ccv_cnnp_model_build_data_t build_data = {
282 .exec_flags = 0,
283 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
284 .parallel_count = 1,
285 .parallel_rank = 0,
286 .model_sequence = &model_sequence,
287 .add_to_array = ccv_cnnp_model_add_to_array,
288 .parameters = parameters,
289 .context = {
290 .add_to_parameter = &add_to_parameter_context,
291 .add_to_output = &add_to_output_context,
292 },
293 .gradient_checkpoints = 0,
294 };
295 model->data = &build_data;
296 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
297 .graph = model->graph,
298 .build_data = &build_data,
299 .old_graph_exec_symbol_new_hook = 0,
300 .old_graph_exec_symbol_new_hook_context = 0
301 };
302 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
303 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
304 // Reset back to previous hook.
305 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
306 for (i = 0; i < model->output_size; i++)
307 {
308 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
309 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
310 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
311 continue;
312 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
313 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
314 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
315 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
316 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
317 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
318 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
319 }
320 model->data = 0;
321 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
322 if (model_sequence.sequences)
323 ccv_array_free(model_sequence.sequences);
324 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
325 int not_trainables = 0;
326 // Assert no parameter is alias.
327 for (i = 0; i < parameters->rnum; i++)
328 {
329 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
330 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
331 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 331, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
332 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
333 not_trainables = 1;
334 }
335 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 335, __extension__ __PRETTY_FUNCTION__)
; }))
;
336 uint64_t* parameter_flags = 0;
337 if (not_trainables)
338 {
339 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
340 for (i = 0; i < parameter_trainables->rnum; i++)
341 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
342 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
343 }
344 ccv_array_free(parameter_trainables);
345 // Assert no internal is alias.
346 for (i = 0; i < internals->rnum; i++)
347 {
348 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
349 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
350 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 350, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
351 }
352 const int output_size = model->output_size;
353 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
354 const int parameters_rnum = parameters->rnum;
355 if (input_size > 0)
356 {
357 ccv_array_resize(parameters, parameters_rnum + input_size);
358 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
359 }
360 ccv_nnc_symbolic_graph_simplify(model->graph,
361 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
362 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
363 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
364 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
365 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
366 model->outputs, output_size,
367 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
368 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
369 // Size it down.
370 parameters->rnum = parameters_rnum;
371 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
372 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
373 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
374 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 374, __extension__ __PRETTY_FUNCTION__)
; }))
;
375 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
376 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
377 compiled_data->loss = loss;
378 if (loss.cmd == CCV_NNC_NOOP)
379 {
380 // If no loss function provided, there is no fits.
381 for (i = 0; i < output_size; i++)
382 {
383 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
384 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
385 if (alias_to.d < 0)
386 compiled_data->f[i] = model->outputs[i];
387 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
388 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
389 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
390 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
391 int j;
392 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
393 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 393, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
394 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
395 }
396 }
397 } else {
398 for (i = 0; i < output_size; i++)
399 {
400 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
401 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
402 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
403 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
404 }
405 }
406 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
407 ccv_nnc_symbolic_graph_simplify(model->graph,
408 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
409 0, 0, // No need to provide binds at this point.
410 compiled_data->f, model->output_size,
411 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
412 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
413 // If inputs are from GPU, stream type is GPU.
414 compiled_data->parameters = parameters;
415 compiled_data->parameter_flags = parameter_flags;
416 compiled_data->internals = internals;
417 compiled_data->ids.parameters = parameter_ids;
418 compiled_data->ids.internals = internal_ids;
419 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
420}
421
422static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
423{
424 ccv_array_t* const stack = (ccv_array_t*)context;
425 ccv_array_push(stack, &symbol.d);
426}
427
428static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
429{
430 const ccv_nnc_tensor_symbol_t src_symbol = {
431 .d = src_index,
432 .graph = src_graph
433 };
434 const ccv_nnc_tensor_symbol_t dest_symbol = {
435 .d = dest_index,
436 .graph = dest_graph
437 };
438 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
439 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
440 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
441 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
442 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
443 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
444}
445
446static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
447{
448 const ccv_nnc_tensor_symbol_t src_symbol = {
449 .d = src_index,
450 .graph = src_graph
451 };
452 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
453 const ccv_nnc_tensor_symbol_t dest_symbol = {
454 .d = dest_index,
455 .graph = dest_graph
456 };
457 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
458 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
459}
460
461static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
462static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
463
464typedef struct {
465 int parallel_count;
466 ccv_nnc_symbolic_graph_t* graph;
467 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
468} ccv_nnc_graph_exec_update_t;
469
470static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
471{
472 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
473 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
474 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
475 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
476 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
477 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
478 const int parallel_count = graph_exec_update->parallel_count;
479 int i;
480 for (i = 1; i < parallel_count; i++)
481 {
482 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
483 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
484 {
485 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
486 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
487 }
488 }
489}
490
491void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
492{
493 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 493, __extension__ __PRETTY_FUNCTION__); }))
;
494 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 494, __extension__ __PRETTY_FUNCTION__)
; }))
;
495 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 495, __extension__ __PRETTY_FUNCTION__); }))
;
496 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
497 init->graph = ccv_nnc_symbolic_graph_new();
498 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
499 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
500 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
501 init->parallel_count = model->parallel_count;
502 init->memory_compression = model->memory_compression;
503 init->memory_reduction = model->memory_reduction;
504 init->gradient_checkpointing = model->gradient_checkpointing;
505 init->compiled_data->stream_type = model->compiled_data->stream_type;
506 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
507 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
508 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
509 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
510 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
511 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
512 int i, j;
513 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
514 for (i = 0; i < compiled_data->parameters->rnum; i++)
515 {
516 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
517 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 517, __extension__ __PRETTY_FUNCTION__)
; }))
;
518 }
519 for (i = 0; i < compiled_data->internals->rnum; i++)
520 {
521 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
522 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 522, __extension__ __PRETTY_FUNCTION__)
; }))
;
523 }
524 // Update inputs.
525 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 525, __extension__ __PRETTY_FUNCTION__)
; }))
;
526 for (i = 0; i < model->input_size; i++)
527 if (model->inputs[i].d >= 0)
528 {
529 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 529, __extension__ __PRETTY_FUNCTION__)
; }))
;
530 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
531 }
532 // Update outputs.
533 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 533, __extension__ __PRETTY_FUNCTION__)
; }))
;
534 for (i = 0; i < model->output_size; i++)
535 {
536 if (model->outputs[i].d >= 0)
537 {
538 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 538, __extension__
__PRETTY_FUNCTION__); }))
;
539 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
540 }
541 if (model->outputs[i].d != model->compiled_data->f[i].d)
542 {
543 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 543, __extension__ __PRETTY_FUNCTION__)
; }))
;
544 if (model->compiled_data->f[i].d >= 0)
545 {
546 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 546, __extension__ __PRETTY_FUNCTION__)
; }))
;
547 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
548 }
549 }
550 }
551 // Go through the graph to set tensor on matching symbols
552 for (i = 0; i < stack->rnum; i++)
553 {
554 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
555 // If exceed range, skip.
556 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
557 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
558 continue;
559 const ccv_nnc_graph_exec_symbol_t src_symbol = {
560 .d = d,
561 .graph = init->graph
562 };
563 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
564 .d = d,
565 .graph = model->graph
566 };
567 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
568 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
569 // If the name doesn't match, skip.
570 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
571 continue;
572 // Now get all the inputs and outputs, if matches, set them.
573 const int* src_inputs;
574 int src_input_size;
575 const int* src_outputs;
576 int src_output_size;
577 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
578 const int* dest_inputs;
579 int dest_input_size;
580 const int* dest_outputs;
581 int dest_output_size;
582 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
583 // We may have unmatched input / output size because this is the minimizer and it has
584 // different saved_aux (for example, when we shrunk with CMD_NOOP).
585 if (src_input_size != dest_input_size)
586 continue;
587 if (src_output_size != dest_output_size)
588 continue;
589 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
590 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
591 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
592 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
593 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
594 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
595 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
596 // a new exec symbol.
597 for (j = 0; j < src_input_size; j++)
598 if (src_inputs[j] >= 0)
599 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
600 for (j = 0; j < src_output_size; j++)
601 if (src_outputs[j] >= 0)
602 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
603 }
604 ccv_array_free(stack);
605 // After this, we get all tensors in the model graph resolved through tensor_auto.
606 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
607 // Verify symbols we get matches.
608 const int parameter_size = compiled_data->parameters->rnum;
609 for (i = 0; i < parameter_size; i++)
610 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 610, __extension__ __PRETTY_FUNCTION__)
; }))
; }
611 const int internal_size = compiled_data->internals->rnum;
612 for (i = 0; i < internal_size; i++)
613 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 613, __extension__ __PRETTY_FUNCTION__)
; }))
; }
614 // Go through compiled data.
615 if (compiled_data->tensor_arena)
616 {
617 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
618 if (flag == 0 && compiled_data->graph_exec_arena)
619 {
620 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
621 // Since we will reinit, if we previously set is_test, we need to set it again.
622 if (compiled_data->is_test)
623 {
624 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
625 ccv_nnc_graph_exec_update_t update = {
626 .parallel_count = parallel_count,
627 .graph = model->graph,
628 .graph_exec_arena = compiled_data->graph_exec_arena,
629 };
630 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
631 }
632 } else
633 // Free-up tensor arena & graph exec arena.
634 _ccv_cnnp_compiled_data_graph_free(compiled_data);
635 }
636 // There are other compiled graphs, for accum and apply gradients.
637 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
638 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
639 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
640 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
641 // That is why we don't update these compiled graphs at all this point.
642 // Free the model, we've already "absorbed" it.
643 ccv_cnnp_model_free(init);
644}
645
646void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
647{
648 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 648, __extension__ __PRETTY_FUNCTION__)
; }))
;
649 if (model->input_size == 0)
650 model->input_size = input_size;
651 if (!model->graph) // The graph is not compiled yet.
652 {
653 model->graph = ccv_nnc_symbolic_graph_new();
654 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
655 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 655, __extension__ __PRETTY_FUNCTION__)
; }))
;
656 int i, flag = 0;
657 for (i = 0; !flag && i < input_size; i++)
658 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
659 // If inputs are from GPU, stream type is GPU.
660 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
661 model->compiled_data->minimize.minimizer = minimizer;
662 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
663 } else {
664 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
665 // And then absorb the "new model" to the old one.
666 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
667 ccv_cnnp_model_absorb(model, init, inputs, input_size);
668 // Reset minimizer.
669 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
670 }
671}
672
673ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
674{
675 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
676 new_model->is_trainable = is_trainable;
677 return new_model;
678}
679
680void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
681{
682 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 682, __extension__ __PRETTY_FUNCTION__); }))
;
683 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 683, __extension__ __PRETTY_FUNCTION__)
; }))
;
684 ccv_nnc_symbolic_graph_t* const graph = model->graph;
685 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
686 int i;
687 for (i = 0; i < output_size; i++)
688 {
689 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 689, __extension__ __PRETTY_FUNCTION__)
; }))
;
690 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
691 }
692}
693
694void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
695{
696 if (workspace_size == model->workspace_size)
697 return;
698 model->workspace_size = workspace_size;
699 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
700 if (compiled_data && compiled_data->graph)
701 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
702}
703
704size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
705{
706 return model->workspace_size;
707}
708
709void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
710{
711 if (parallel == 0)
712 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
713 else
714 model->parallel_count = parallel;
715 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
716 if (compiled_data)
717 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 717, __extension__ __PRETTY_FUNCTION__)
; }))
; }
718}
719
720void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
721{
722 model->max_stream_count = max_stream_count;
723 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
724 if (compiled_data)
725 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 725, __extension__ __PRETTY_FUNCTION__)
; }))
; }
726}
727
728void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
729{
730 model->memory_compression = memory_compression;
731 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
732 if (compiled_data)
733 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 733, __extension__ __PRETTY_FUNCTION__)
; }))
; }
734}
735
736void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
737{
738 model->memory_reduction = memory_reduction;
739 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
740 if (compiled_data)
741 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 741, __extension__ __PRETTY_FUNCTION__)
; }))
; }
742}
743
744void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
745{
746 model->gradient_checkpointing = gradient_checkpointing;
747}
748
749int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
750{
751 return model->gradient_checkpointing;
752}
753
754typedef struct {
755 int parallel_count;
756 ccv_nnc_symbolic_graph_t* graph;
757 ccv_cnnp_compiled_data_t* compiled_data;
758 ccv_nnc_tensor_arena_t* tensor_arena;
759} ccv_nnc_tensor_init_states_t;
760
761static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
762{
763 int i;
764 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
765 for (i = 0; i < compiled_data->parameters->rnum; i++)
766 {
767 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
768 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
769 return 1;
770 }
771 for (i = 0; i < compiled_data->internals->rnum; i++)
772 {
773 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
774 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
775 return 1;
776 }
777 return 0;
778}
779
780static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
781{
782 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
783 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
784 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
785 if (!output_tensor)
786 return;
787 const int d = output_symbol.d;
788 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 788, __extension__ __PRETTY_FUNCTION__)
; }))
;
789 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
790 if (init_v[d >> 5] & (1u << (d & 0x1f)))
791 return;
792 init_v[d >> 5] |= (1u << (d & 0x1f));
793 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
794 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
795 const int parallel_count = tensor_init_states->parallel_count;
796 int i;
797 for (i = 1; i < parallel_count; i++)
798 {
799 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
800 if (copy)
801 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
802 }
803}
804
805// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
806// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
807static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
808{
809 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 809, __extension__ __PRETTY_FUNCTION__); }))
;
810 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 810, __extension__ __PRETTY_FUNCTION__)
; }))
;
811 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
812 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 812, __extension__
__PRETTY_FUNCTION__); }))
;
813 int i;
814 for (i = 0; i < compiled_data->rewindables->rnum; i++)
815 {
816 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
817 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
818 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
819 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
820 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
821 }
822 ccv_array_clear(compiled_data->rewindables);
823 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
824}
825
826static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
827{
828 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
829 .type = CCV_CNNP_REWIND_TENSOR,
830 .tensor = symbol
831 };
832 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
833 ccv_array_push(rewind_symbols, &rewind_symbol);
834}
835
836static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
837{
838 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
839 .type = CCV_CNNP_REWIND_TENSOR,
840 .tensor = symbol
841 };
842 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
843 ccv_array_push(rewind_symbols, &rewind_symbol);
844}
845
846static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
847{
848 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
849 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
850 .graph_exec = symbol
851 };
852 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
853 ccv_array_push(rewind_symbols, &rewind_symbol);
854}
855
856static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
857{
858 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
859 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
860 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
861 int i;
862 for (i = 1; i < parallel_count; i++)
863 {
864 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
865 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
866 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
867 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
868 }
869}
870
871static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
872{
873 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 873, __extension__ __PRETTY_FUNCTION__); }))
;
874 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 874, __extension__ __PRETTY_FUNCTION__); }))
;
875 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
876 int i;
877 for (i = 1; i < parallel_count; i++)
878 {
879 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
880 if (copy_symbol.graph)
881 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
882 }
883 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
884 if (graph_exec_arena)
885 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
886 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
887 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
888 if (gradient_graph_exec_arena)
889 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
890}
891
892static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
893{
894 int this_parameter_flag = 0;
895 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
896 return this_parameter_flag;
897 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
898 int j, k;
899 // For no-op, we can preserve previous saved_aux_size.
900 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
901 {
902 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
903 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
904 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
905 // make sure some model parameters don't update if we don't want them to.
906 int old_saved_aux_size;
907 if (old_minimizer.cmd == CCV_NNC_NOOP)
908 {
909 int input_size;
910 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
911 if (input_size < 2) // This is not legit.
912 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
913 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
914 old_saved_aux_size = input_size - 2;
915 } else
916 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
917 if (old_saved_aux_size != saved_aux_size)
918 {
919 this_parameter_flag = 1;
920 if (saved_aux_size > old_saved_aux_size)
921 {
922 // Allocate new tensor symbols.
923 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
924 for (j = old_saved_aux_size; j < saved_aux_size; j++)
925 {
926 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
927 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
928 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
929 for (k = 1; k < parallel_count; k++)
930 {
931 ccv_nnc_tensor_param_t dev_info = info;
932 if (k != device_id)
933 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
934 else
935 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
936 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
937 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
938 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
939 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
940 }
941 }
942 } else {
943 for (j = saved_aux_size; j < old_saved_aux_size; j++)
944 {
945 for (k = 1; k < parallel_count; k++)
946 {
947 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
948 if (src_copy.d >= 0)
949 {
950 ccv_nnc_tensor_symbol_free(graph, src_copy);
951 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
952 }
953 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
954 if (dest_copy.d >= 0)
955 {
956 ccv_nnc_tensor_symbol_free(graph, dest_copy);
957 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
958 }
959 }
960 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
961 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
962 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
963 }
964 }
965 }
966 }
967 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
968 if (this_parameter_flag)
969 {
970 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
971 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
972 const int* inputs = 0;
973 int input_size = 0;
974 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
975 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 975, __extension__ __PRETTY_FUNCTION__)
; }))
;
976 update_inputs[0].d = inputs[0];
977 update_inputs[0].graph = graph;
978 update_inputs[1].d = inputs[1];
979 update_inputs[1].graph = graph;
980 update_outputs[0] = updated_parameters[parameter_indice];
981 for (j = 0; j < saved_aux_size; j++)
982 {
983 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
984 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
985 }
986 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
987 for (k = 1; k < parallel_count; k++)
988 {
989 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
990 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 990, __extension__ __PRETTY_FUNCTION__); }))
;
991 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
992 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 992, __extension__ __PRETTY_FUNCTION__)
; }))
;
993 update_inputs[0].d = inputs[0];
994 update_inputs[0].graph = graph;
995 update_inputs[1].d = inputs[1];
996 update_inputs[1].graph = graph;
997 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
998 for (j = 0; j < saved_aux_size; j++)
999 {
1000 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
1001 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
1002 }
1003 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
1004 }
1005 }
1006 return this_parameter_flag;
1007}
1008
1009typedef struct {
1010 int parameter_size;
1011 ccv_nnc_cmd_t minimizer;
1012 ccv_cnnp_model_io_t parameters[1];
1013} ccv_cnnp_set_minimizer_for_parameter_t;
1014
1015static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
1016{
1017 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1018 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1018, __extension__ __PRETTY_FUNCTION__); }))
;
1019 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1020 // We update all parameters, at this point, we have one minimizer.
1021 const int parameter_size = compiled_data->parameters->rnum;
1022 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
1023 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
1024 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 1024, __extension__ __PRETTY_FUNCTION__); }))
;
1025 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1026 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1026, __extension__ __PRETTY_FUNCTION__
); }))
;
1027 ccv_array_t* const parameters = compiled_data->minimize.parameters;
1028 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1029 int i, j, flag = 0;
1030 for (i = 0; i < parameters->rnum; i++)
1031 {
1032 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
1033 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
1034 {
1035 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
1036 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 1036, __extension__ __PRETTY_FUNCTION__
); }))
;
1037 const int old_rnum = parameter_indices->rnum;
1038 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
1039 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
1040 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 1040, __extension__ __PRETTY_FUNCTION__
); }))
;
1041 if (param_ref >= 0)
1042 {
1043 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1043, __extension__ __PRETTY_FUNCTION__
); }))
;
1044 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
1045 parameter_indices->rnum = old_rnum + 1;
1046 }
1047 }
1048 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1049 // We may have duplicated indices, but that is OK, we will set it twice.
1050 for (j = 0; j < parameter_indices->rnum; j++)
1051 {
1052 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1053 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1053, __extension__ __PRETTY_FUNCTION__
); }))
;
1054 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1055 flag = 1;
1056 }
1057 ccv_array_clear(parameter_indices);
1058 }
1059 ccv_array_free(parameter_indices);
1060 return flag;
1061}
1062
1063static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1064{
1065 if (new_saved_aux_size == old_saved_aux_size)
1066 return;
1067 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1067, __extension__ __PRETTY_FUNCTION__
); }))
;
1068 int i, j;
1069 for (i = parameter_size - 1; i >= 0; i--)
1070 {
1071 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1072 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1073 for (j = old_saved_aux_size - 1; j >= 0; j--)
1074 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1075 }
1076}
1077
1078static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1079{
1080 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1081 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1081, __extension__ __PRETTY_FUNCTION__); }))
;
1082 if (!compiled_data->rewindables)
1083 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1084 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1085 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1086 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1087}
1088
1089static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1090{
1091 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1092 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1092, __extension__ __PRETTY_FUNCTION__
); }))
;
1093 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1093, __extension__ __PRETTY_FUNCTION__
); }))
;
1094 const int evaluate_to_size = compiled_data->evaluate.to_size;
1095 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1095, __extension__ __PRETTY_FUNCTION__
); }))
;
1096 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1097 assert(_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now")((void) sizeof ((_ccv_cnnp_model_effective_parallel_count(model
) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ? 1 : 0), __extension__ ({ if (_ccv_cnnp_model_effective_parallel_count
(model) == parallel_count && "local replicated stateful models only support forward / no-grad evaluation for now"
) ; else __assert_fail ("_ccv_cnnp_model_effective_parallel_count(model) == parallel_count && \"local replicated stateful models only support forward / no-grad evaluation for now\""
, "ccv_cnnp_model.c", 1097, __extension__ __PRETTY_FUNCTION__
); }))
;
1098 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1099 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1100 int i, j;
1101 const int output_size = model->output_size;
1102 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1102, __extension__ __PRETTY_FUNCTION__
); }))
;
1103 if (fits)
1104 for (i = 0; i < output_size; i++)
1105 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1106 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1107 const int parameter_size = compiled_data->parameters->rnum;
1108 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1109 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1110 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1111 int parameter_size_maybe_more = parameter_size;
1112 compiled_data->disable_outgrad = disable_outgrad;
1113 int outgrad_size;
1114 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1115 outgrad_size = 0;
1116 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1117 outgrad_size = model->input_size;
1118 else {
1119 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1119, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1120 outgrad_size = 0;
1121 for (i = 0; i < model->input_size; i++)
1122 if (!(disable_outgrad & ((uint64_t)1 << i)))
1123 ++outgrad_size;
1124 }
1125 compiled_data->outgrad_size = outgrad_size;
1126 parameter_size_maybe_more += outgrad_size;
1127 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1128 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1129 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1130 compiled_data->backward.to_size = parameter_size_maybe_more;
1131 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1132 if (compiled_data->parameter_flags)
1133 {
1134 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1135 for (i = 0; i < parameter_size; i++)
1136 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1137 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1138 else
1139 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1140 }
1141 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1142 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1143 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1144 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1145 else { // Compute minimize with gradients including selected inputs.
1146 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1146, __extension__ __PRETTY_FUNCTION__
); }))
;
1147 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1147, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1148 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1148, __extension__ __PRETTY_FUNCTION__
); }))
;
1149 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1150 j = 0;
1151 for (i = 0; i < model->input_size; i++)
1152 if (!(disable_outgrad & ((uint64_t)1 << i)))
1153 outgrads[j++] = model->inputs[i];
1154 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1155 }
1156 if (compiled_data->parameter_flags)
1157 ccfreefree(parameters);
1158 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1159 if (compiled_data->minimize.parameters)
1160 _ccv_cnnp_apply_parameters_with_minimizer(model);
1161 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1162 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1163 for (i = 0; i < output_size; i++)
1164 {
1165 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1166 // Init this to 1 so we can backprop.
1167 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1168 }
1169 compiled_data->backward.to_size = 0;
1170 for (i = 0; i < parameter_size_maybe_more; i++)
1171 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1172 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1173 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1174 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1175 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1176 {
1177 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1178 continue;
1179 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1180 const int* tos;
1181 int to_size;
1182 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1183 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1184 {
1185 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1186 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1187 int flag = 0;
1188 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1189 for (j = i - 1; !flag && j >= 0; j--)
1190 if (j + outgrad_destination_start < destination_count)
1191 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1192 if (!flag) // Only if we cannot find it, we add it.
1193 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1194 }
1195 }
1196 if (parallel_count > 1)
1197 {
1198 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1199 0, 0,
1200 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1201 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1202 0, 0, 0,
1203 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1204 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1205 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1206 for (i = 0; i < evaluate_to_size; i++)
1207 for (j = 1; j < parallel_count; j++)
1208 {
1209 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1210 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1211 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1212 }
1213 const int backward_to_size = compiled_data->backward.to_size;
1214 for (i = 0; i < backward_to_size; i++)
1215 for (j = 1; j < parallel_count; j++)
1216 {
1217 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1218 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1219 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1220 }
1221 }
1222 // Only use memory compression if we are in gradient parameter mode.
1223 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1224 {
1225 if (model->memory_compression)
1226 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1227 if (model->memory_reduction)
1228 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1229 }
1230 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1231 compiled_data->gradient_mode = gradient_mode;
1232}
1233
1234void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1235{
1236 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1236, __extension__ __PRETTY_FUNCTION__
); }))
;
1237 const int parameter_size = compiled_data->parameters->rnum;
1238 const int parallel_count = _ccv_cnnp_model_effective_parallel_count(model);
1239 compiled_data->parallel_count = parallel_count;
1240 const int internal_size = compiled_data->internals->rnum;
1241 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1242 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1243 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1244 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1245}
1246
1247int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1248{
1249 int i, j;
1250 const int parameter_size = compiled_data->parameters->rnum;
1251 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1252 const int internal_size = compiled_data->internals->rnum;
1253 for (i = 0; i < parameter_size; i++)
1254 {
1255 // parameters has to be allocated all together.
1256 if (compiled_data->tensors.parameters[i])
1257 {
1258 for (j = 1; j < parallel_count; j++)
1259 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1259, __extension__ __PRETTY_FUNCTION__
); }))
; }
1260 continue;
1261 }
1262 return 1;
1263 }
1264 for (i = 0; i < internal_size; i++)
1265 {
1266 if (!compiled_data->tensors.internals[i])
1267 return 1;
1268 for (j = 1; j < parallel_count; j++)
1269 if (!compiled_data->tensors.internals[i + j * internal_size])
1270 return 1;
1271 }
1272 return 0;
1273}
1274
1275void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1276{
1277 int i, j;
1278 const int parameter_size = compiled_data->parameters->rnum;
1279 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1280 compiled_data->parallel_count = parallel_count;
1281 const int internal_size = compiled_data->internals->rnum;
1282 for (i = 0; i < parameter_size; i++)
1283 {
1284 // parameters has to be allocated all together.
1285 if (compiled_data->tensors.parameters[i])
1286 {
1287 for (j = 1; j < parallel_count; j++)
1288 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1288, __extension__ __PRETTY_FUNCTION__
); }))
; }
1289 continue;
1290 }
1291 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1292 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1293 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1294 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1295 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1296 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1297 for (j = 1; j < parallel_count; j++)
1298 {
1299 if (j != device_id)
1300 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1301 else
1302 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1303 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1304 }
1305 }
1306 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1307 for (i = 0; i < internal_size; i++)
1308 {
1309 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1310 const int d = retained.d;
1311 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1312 continue;
1313 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1314 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1315 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1316 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1317 if (!compiled_data->tensors.internals[i])
1318 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1319 for (j = 1; j < parallel_count; j++)
1320 {
1321 if (j != device_id)
1322 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1323 else
1324 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1325 if (!compiled_data->tensors.internals[i + j * internal_size])
1326 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1327 }
1328 }
1329 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1330}
1331
1332static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1333{
1334 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1335 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1336}
1337
1338static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1339{
1340 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1340, __extension__ __PRETTY_FUNCTION__
); }))
;
1341 int i, j;
1342 for (i = 0; i < tensor_size; i++)
1343 {
1344 if (!tensors[i])
1345 continue;
1346 const int d = tensor_symbols[i].d;
1347 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1348 continue;
1349 for (j = 1; j < parallel_count; j++)
1350 if (tensors[i + j * tensor_size])
1351 {
1352 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1353 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1354 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1355 }
1356 }
1357}
1358
1359static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1360{
1361 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1361, __extension__ __PRETTY_FUNCTION__
); }))
;
1362 int i, j;
1363 for (i = 0; i < tensor_size; i++)
1364 {
1365 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1366 for (j = 1; j < parallel_count; j++)
1367 {
1368 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1369 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1370 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1371 { // We shouldn't allocate this, free it up.
1372 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1373 tensors[i + j * tensor_size] = 0;
1374 }
1375 }
1376 }
1377}
1378
1379static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1380{
1381 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1381, __extension__ __PRETTY_FUNCTION__
); }))
;
1382 int i, j;
1383 for (i = 0; i < tensor_size; i++)
1384 {
1385 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1386 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1387 continue;
1388 if (graph)
1389 {
1390 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1391 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1392 tensor_symbol = alias_to;
1393 }
1394 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1395 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1396 {
1397 const ccv_nnc_tensor_bind_t retained_bind = {
1398 .symbol = tensor_symbol,
1399 .tensor = tensor
1400 };
1401 ccv_array_push(tensor_binds, &retained_bind);
1402 }
1403 for (j = 1; j < parallel_count; j++)
1404 {
1405 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1406 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1407 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1408 {
1409 const ccv_nnc_tensor_bind_t bind = {
1410 .symbol = copy,
1411 .tensor = tensors[i + j * tensor_size]
1412 };
1413 ccv_array_push(tensor_binds, &bind);
1414 }
1415 }
1416 }
1417}
1418
1419static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1420{
1421 if (compiled_data->graph)
1422 ccv_nnc_graph_free(compiled_data->graph);
1423 compiled_data->graph = 0;
1424 compiled_data->is_test = 0;
1425 if (compiled_data->tensor_arena)
1426 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1427 compiled_data->tensor_arena = 0;
1428 if (compiled_data->graph_exec_arena)
1429 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1430 compiled_data->graph_exec_arena = 0;
1431 if (compiled_data->backward.from_ops)
1432 ccfreefree(compiled_data->backward.from_ops);
1433 compiled_data->backward.from_ops = 0;
1434 if (compiled_data->evaluate.schedule)
1435 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1436 compiled_data->evaluate.schedule = 0;
1437 if (compiled_data->backward.schedule)
1438 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1439 compiled_data->backward.schedule = 0;
1440}
1441
1442static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1443{
1444 if (compiled_data->gradients)
1445 ccfreefree(compiled_data->gradients);
1446 compiled_data->gradients = 0;
1447 if (compiled_data->updated_parameters)
1448 ccfreefree(compiled_data->updated_parameters);
1449 compiled_data->updated_parameters = 0;
1450 compiled_data->update_nodes = 0;
1451 compiled_data->saved_aux = 0;
1452}
1453
1454static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1455{
1456 if (compiled_data->backward.gradients)
1457 ccfreefree(compiled_data->backward.gradients);
1458 compiled_data->backward.gradients = 0;
1459 if (compiled_data->backward.accum)
1460 ccv_nnc_graph_free(compiled_data->backward.accum);
1461 compiled_data->backward.accum = 0;
1462 if (compiled_data->backward.tensor_arena)
1463 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1464 compiled_data->backward.tensor_arena = 0;
1465 if (compiled_data->backward.graph_exec_arena)
1466 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1467 compiled_data->backward.graph_exec_arena = 0;
1468}
1469
1470static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1471{
1472 if (compiled_data->apply_gradients.graph)
1473 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1474 compiled_data->apply_gradients.graph = 0;
1475 if (compiled_data->apply_gradients.tensor_arena)
1476 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1477 compiled_data->apply_gradients.tensor_arena = 0;
1478 if (compiled_data->apply_gradients.graph_exec_arena)
1479 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1480 compiled_data->apply_gradients.graph_exec_arena = 0;
1481}
1482
1483// Compile the graph to run ccv_cnnp_model_fit
1484static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1485{
1486 int i, j;
1487 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1488 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1488, __extension__ __PRETTY_FUNCTION__
); }))
;
1489 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1490 const int parallel_count = _ccv_cnnp_model_root_parallel_count(model);
1491 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1491, __extension__ __PRETTY_FUNCTION__
); }))
;
1492 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1492
, __extension__ __PRETTY_FUNCTION__); }))
;
1493 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1493, __extension__ __PRETTY_FUNCTION__
); }))
;
1494 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1495 {
1496 _ccv_cnnp_model_set_rewindables(model);
1497 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1498 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1499 _ccv_cnnp_model_rewind_graph(model);
1500 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1501 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1502 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1503 }
1504 const int tensors_init = !!compiled_data->tensors_init.v;
1505 if (!tensors_init)
1506 _ccv_cnnp_model_tensors_init(model, compiled_data);
1507 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1508 // Check if it is not fully allocated, if it is not, init_1.
1509 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1510 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1511 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1511, __extension__ __PRETTY_FUNCTION__); }))
;
1512 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1512, __extension__ __PRETTY_FUNCTION__); }))
;
1513 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1513
, __extension__ __PRETTY_FUNCTION__); }))
;
1514 const int input_size_per_p = input_size / parallel_count;
1515 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1516 const int output_size_per_p = output_size / parallel_count;
1517 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1518 const int fit_size_per_p = fit_size / parallel_count;
1519 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1520 const int parameter_size = compiled_data->parameters->rnum;
1521 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1522 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1523 const int internal_size = compiled_data->internals->rnum;
1524 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1525 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1526 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1527 ccv_array_free(tensor_binds);
1528 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1529 if (tensors_init && parallel_count > 1)
1530 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1531 // If tensor is not init'ed, we need to init states first.
1532 if (_ccv_cnnp_any_to_init(compiled_data))
1533 {
1534 ccv_nnc_tensor_init_states_t tensor_init_states = {
1535 .parallel_count = parallel_count,
1536 .graph = model->graph,
1537 .compiled_data = compiled_data,
1538 .tensor_arena = compiled_data->tensor_arena
1539 };
1540 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1541 }
1542 compiled_data->is_test = 0;
1543 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1544 // No need to set because it is default to training mode.
1545 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1546 for (i = 0; i < saved_aux_size * parameter_size; i++)
1547 {
1548 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1549 continue;
1550 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1551 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1552 for (j = 1; j < parallel_count; j++)
1553 {
1554 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1555 if (copy)
1556 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1557 }
1558 }
1559 const int evaluate_to_size = compiled_data->evaluate.to_size;
1560 compiled_data->evaluate.to_op_size = 0;
1561 for (i = 0; i < evaluate_to_size; i++)
1562 {
1563 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1564 if (to.graph)
1565 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1566 }
1567 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1568 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1569}
1570
1571ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1572{
1573 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1574 if (!compiled_data || !compiled_data->graph)
1575 return 0;
1576 return ccv_nnc_graph_default_stream(compiled_data->graph);
1577}
1578
1579uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1580{
1581 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1582 if (!compiled_data || !compiled_data->tensor_arena)
1583 return 0;
1584 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1585}
1586
1587static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1588{
1589 int i, j;
1590 for (i = 0; i < tensor_size; i++)
1591 {
1592 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1593 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1594 continue;
1595 if (graph)
1596 {
1597 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1598 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1599 tensor_symbol = alias_to;
1600 }
1601 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1602 for (j = 1; j < parallel_count; j++)
1603 {
1604 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1605 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1606 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1607 }
1608 }
1609}
1610
1611void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1612{
1613 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1614 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1614, __extension__ __PRETTY_FUNCTION__); }))
;
1615 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1616 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1616, __extension__ __PRETTY_FUNCTION__
); }))
;
1617 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1617, __extension__ __PRETTY_FUNCTION__
); }))
;
1618 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1618
, __extension__ __PRETTY_FUNCTION__); }))
;
1619 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1619, __extension__ __PRETTY_FUNCTION__); }))
;
1620 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1621 {
1622 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1623 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1624 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1625 // Compile the symbolic graph down only when needed.
1626 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1627 } else {
1628 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1628, __extension__ __PRETTY_FUNCTION__); }))
;
1629 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1629, __extension__ __PRETTY_FUNCTION__); }))
;
1630 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1630
, __extension__ __PRETTY_FUNCTION__); }))
;
1631 const int input_size_per_p = input_size / parallel_count;
1632 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1633 const int output_size_per_p = output_size / parallel_count;
1634 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1635 const int fit_size_per_p = fit_size / parallel_count;
1636 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1637 }
1638 if (compiled_data->is_test)
1639 {
1640 compiled_data->is_test = 0;
1641 ccv_nnc_graph_exec_update_t update = {
1642 .parallel_count = parallel_count,
1643 .graph = model->graph,
1644 .graph_exec_arena = compiled_data->graph_exec_arena,
1645 };
1646 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1647 }
1648 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1649}
1650
1651// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1652static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1653{
1654 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1655 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1656 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1657 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1657, __extension__ __PRETTY_FUNCTION__
); }))
;
1658 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1658, __extension__ __PRETTY_FUNCTION__
); }))
;
1659 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1660 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1661 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1662 {
1663 const int evaluate_to_size = compiled_data->evaluate.to_size;
1664 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1665 _ccv_cnnp_model_set_rewindables(model);
1666 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1667 0, 0,
1668 0, 0, 0,
1669 0, 0, 0,
1670 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1671 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1672 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1673 int i, j;
1674 for (i = 0; i < evaluate_to_size; i++)
1675 for (j = 1; j < parallel_count; j++)
1676 {
1677 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1678 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1679 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1680 }
1681 }
1682 const int tensors_init = !!compiled_data->tensors_init.v;
1683 if (!tensors_init)
1684 _ccv_cnnp_model_tensors_init(model, compiled_data);
1685 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1686 // Check if it is not fully allocated, if it is not, init_1.
1687 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1688 const int tensor_parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
1689 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1690 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1690, __extension__ __PRETTY_FUNCTION__); }))
;
1691 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1691, __extension__ __PRETTY_FUNCTION__); }))
;
1692 const int input_size_per_p = input_size / parallel_count;
1693 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1694 const int output_size_per_p = output_size / parallel_count;
1695 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1696 const int parameter_size = compiled_data->parameters->rnum;
1697 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, tensor_parallel_count, tensor_binds);
1698 const int internal_size = compiled_data->internals->rnum;
1699 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count);
1700 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, tensor_parallel_count, tensor_binds);
1701 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1702 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1703 ccv_array_free(tensor_binds);
1704 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1705 // If tensor is not init'ed, we need to init states first.
1706 if (tensors_init && tensor_parallel_count > 1)
1707 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, tensor_parallel_count);
1708 if (_ccv_cnnp_any_to_init(compiled_data))
1709 {
1710 ccv_nnc_tensor_init_states_t tensor_init_states = {
1711 .parallel_count = tensor_parallel_count,
1712 .graph = model->graph,
1713 .compiled_data = compiled_data,
1714 .tensor_arena = compiled_data->tensor_arena
1715 };
1716 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1717 }
1718 compiled_data->is_test = 1;
1719 ccv_nnc_graph_exec_update_t update = {
1720 .parallel_count = parallel_count,
1721 .graph = model->graph,
1722 .graph_exec_arena = compiled_data->graph_exec_arena,
1723 };
1724 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1725 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1726 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1727}
1728
1729static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1730{
1731 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1731, __extension__ __PRETTY_FUNCTION__
); }))
;
1732 const int parameter_size = compiled_data->parameters->rnum;
1733 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1734 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1735 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1736 int i, j;
1737 for (i = 0; i < parameter_size; i++)
1738 {
1739 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1740 {
1741 compiled_data->tensors.gradients[i] = 0;
1742 compiled_data->tensors.accum_gradients[i] = 0;
1743 for (j = 1; j < parallel_count; j++)
1744 {
1745 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1746 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1747 }
1748 continue;
1749 }
1750 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1751 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1752 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1753 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1754 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1755 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1756 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1757 for (j = 1; j < parallel_count; j++)
1758 {
1759 if (j != device_id)
1760 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1761 else
1762 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1763 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1764 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1765 }
1766 }
1767}
1768
1769static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1770{
1771 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1772 return 1;
1773 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1774 return 0;
1775 int i;
1776 for (i = 0; i < input_size; i++)
1777 if (!(disable_outgrad & ((uint64_t)1 << i)))
1778 return 0;
1779 return 1;
1780}
1781
1782// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1783// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1784static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1785{
1786 int i, j;
1787 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1788 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1789 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1789, __extension__ __PRETTY_FUNCTION__
); }))
;
1790 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1791 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1792 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1792, __extension__ __PRETTY_FUNCTION__
); }))
;
1793 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1793, __extension__ __PRETTY_FUNCTION__
); }))
;
1794 // There shouldn't be a loss function if we evaluate with multistage jit.
1795 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1795, __extension__ __PRETTY_FUNCTION__
); }))
;
1796 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1797 {
1798 _ccv_cnnp_model_set_rewindables(model);
1799 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1800 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1801 _ccv_cnnp_model_rewind_graph(model);
1802 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1803 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1804 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1805 }
1806 const int tensors_init = !!compiled_data->tensors_init.v;
1807 if (!tensors_init)
1808 _ccv_cnnp_model_tensors_init(model, compiled_data);
1809 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1810 // Check if it is not fully allocated, if it is not, init_1.
1811 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1812 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1813 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1813, __extension__ __PRETTY_FUNCTION__); }))
;
1814 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1814, __extension__ __PRETTY_FUNCTION__); }))
;
1815 const int input_size_per_p = input_size / parallel_count;
1816 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1817 const int output_size_per_p = output_size / parallel_count;
1818 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1819 const int parameter_size = compiled_data->parameters->rnum;
1820 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1821 const int internal_size = compiled_data->internals->rnum;
1822 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1823 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1824 if (!compiled_data->tensors.gradients)
1825 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1826 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1827 if (compiled_data->backward.to_size > 0)
1828 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1829 else
1830 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1831 ccv_array_free(tensor_binds);
1832 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1833 if (tensors_init && parallel_count > 1)
1834 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1835 // If tensor is not init'ed, we need to init states first.
1836 if (_ccv_cnnp_any_to_init(compiled_data))
1837 {
1838 ccv_nnc_tensor_init_states_t tensor_init_states = {
1839 .parallel_count = parallel_count,
1840 .graph = model->graph,
1841 .compiled_data = compiled_data,
1842 .tensor_arena = compiled_data->tensor_arena
1843 };
1844 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1845 }
1846 compiled_data->is_test = is_test;
1847 ccv_nnc_graph_exec_update_t update = {
1848 .parallel_count = parallel_count,
1849 .graph = model->graph,
1850 .graph_exec_arena = compiled_data->graph_exec_arena,
1851 };
1852 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1853 const int evaluate_to_size = compiled_data->evaluate.to_size;
1854 compiled_data->evaluate.to_op_size = 0;
1855 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1856 for (i = 0; i < evaluate_to_size; i++)
1857 {
1858 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1859 if (to_op.graph)
1860 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1861 const int* tos;
1862 int to_size;
1863 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1864 for (j = 0; j < to_size; j++)
1865 {
1866 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1867 .d = tos[j],
1868 .graph = model->graph
1869 });
1870 if (to_op.graph)
1871 ccv_array_add_unique_int(backward_from, to_op.d);
1872 }
1873 }
1874 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1874, __extension__
__PRETTY_FUNCTION__); }))
;
1875 compiled_data->backward.from_op_size = backward_from->rnum;
1876 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1877 for (i = 0; i < backward_from->rnum; i++)
1878 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1879 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1880 .graph = compiled_data->graph,
1881 };
1882 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1883 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1884 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1885 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1886 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1887 const int source_size = compiled_data->graph->sources->rnum;
1888 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1888, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1888, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1889 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1890 visited[(idx >> 5)] |= (1u << (idx & 31));
1891 } ccv_nnc_graph_visit_endfor} }
1892 ccv_nnc_graph_visit_free(visit);
1893 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1894 const int destination_size = compiled_data->graph->destinations->rnum;
1895 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1895, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1895, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1896 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1897 visited[(idx >> 5)] |= (1u << (idx & 31));
1898 } ccv_nnc_graph_visit_endfor} }
1899 ccv_nnc_graph_visit_free(visit);
1900 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1900, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1901 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1902 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1903 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1904 {
1905 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); }))
;
1906 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1907 ccv_array_add_unique_int(backward_from, idx);
1908 }
1909 } ccv_nnc_graph_visit_endfor} }
1910 ccv_nnc_graph_visit_free(visit);
1911 ccfreefree(visited);
1912 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1913 {
1914 compiled_data->backward.from_op_size = backward_from->rnum;
1915 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1916 for (i = 0; i < backward_from->rnum; i++)
1917 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1918 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1919 .graph = compiled_data->graph,
1920 };
1921 }
1922 ccv_array_free(backward_from);
1923 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1924 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1925}
1926
1927void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1928{
1929 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1930 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1930, __extension__ __PRETTY_FUNCTION__); }))
;
1931 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1932 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1932, __extension__ __PRETTY_FUNCTION__
); }))
;
1933 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1933, __extension__ __PRETTY_FUNCTION__
); }))
;
1934 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1934, __extension__ __PRETTY_FUNCTION__); }))
;
1935 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1936 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1937 if (!compiled_data->graph || mode_mismatch)
1938 {
1939 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1940 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1941 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1942 if (params.requires_grad)
1943 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1944 else
1945 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1946 } else {
1947 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1948 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1948, __extension__ __PRETTY_FUNCTION__); }))
;
1949 const int input_size_per_p = input_size / parallel_count;
1950 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1951 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1951, __extension__ __PRETTY_FUNCTION__); }))
;
1952 const int output_size_per_p = output_size / parallel_count;
1953 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1954 }
1955 if (compiled_data->is_test != params.is_test)
1956 {
1957 compiled_data->is_test = params.is_test;
1958 ccv_nnc_graph_exec_update_t update = {
1959 .parallel_count = parallel_count,
1960 .graph = model->graph,
1961 .graph_exec_arena = compiled_data->graph_exec_arena,
1962 };
1963 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1964 }
1965}
1966
1967void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1968{
1969 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1970 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1970, __extension__ __PRETTY_FUNCTION__); }))
;
1971 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1972 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1973 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1974 else {
1975 if (!compiled_data->evaluate.schedule)
1976 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1977 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1978 }
1979}
1980
1981// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1982// Particularly, this method compiles the accumulator graph.
1983static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1984{
1985 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1986 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1986, __extension__ __PRETTY_FUNCTION__); }))
;
1987 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1987, __extension__ __PRETTY_FUNCTION__
); }))
;
1988 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1989 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1990 const int parameter_size = compiled_data->parameters->rnum;
1991 int i, j;
1992 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1993 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1994 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1995 for (i = 0; i < parameter_size; i++)
1996 for (j = 0; j < parallel_count; j++)
1997 if (compiled_data->tensors.gradients[i + j * parameter_size])
1998 {
1999 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
2000 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
2001 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
2002 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2003 ccv_nnc_tensor_symbol_t inputs[2];
2004 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2005 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2006 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
2007 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
2008 } else {
2009 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2010 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2011 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
2012 }
2013 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
2014 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
2015 {
2016 ccv_nnc_symbolic_graph_free(accum);
2017 // Create empty graph.
2018 compiled_data->backward.accum = ccv_nnc_graph_new();
2019 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
2020 return;
2021 }
2022 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2023 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2024 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
2025 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
2026 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
2027 ccv_nnc_symbolic_graph_free(accum);
2028 ccv_array_free(tensor_binds);
2029 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
2030}
2031
2032void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
2033{
2034 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2035 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2035, __extension__ __PRETTY_FUNCTION__); }))
;
2036 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2036, __extension__ __PRETTY_FUNCTION__
); }))
;
2037 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2038 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 2038, __extension__ __PRETTY_FUNCTION__
); }))
;
2039 if (outgrad_size > 0)
2040 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 2040, __extension__ __PRETTY_FUNCTION__
); }))
; }
2041 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2041, __extension__ __PRETTY_FUNCTION__); }))
;
2042 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2042, __extension__ __PRETTY_FUNCTION__
); }))
;
2043 const int parameter_size = compiled_data->parameters->rnum;
2044 // If we need to accumulate the gradients now, do jit on accumulator.
2045 if (compiled_data->backward.count > 0)
2046 {
2047 if (!compiled_data->backward.accum)
2048 _ccv_cnnp_model_multistage_jit_1(model);
2049 else if (compiled_data->backward.count == 1) {
2050 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2051 int i;
2052 for (i = 0; i < parameter_size * parallel_count; i++)
2053 {
2054 ccv_nnc_tensor_t* tensor;
2055 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2056 }
2057 if (compiled_data->backward.tensor_arena)
2058 {
2059 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2060 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2061 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2062 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2063 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2064 }
2065 }
2066 }
2067 const int ingrad_size_per_p = model->output_size;
2068 const int outgrad_size_per_p = compiled_data->outgrad_size;
2069 int i, j;
2070 for (i = 0; i < ingrad_size_per_p; i++)
2071 {
2072 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2073 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2074 {
2075 // Set it to 1 if it is not specified.
2076 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2077 if (ingrad_tensor)
2078 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2079 for (j = 1; j < parallel_count; j++)
2080 {
2081 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2082 if (ingrad_tensor)
2083 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2084 }
2085 } else {
2086 // Make sure the length matches, in case it is an alias.
2087 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2087, __extension__ __PRETTY_FUNCTION__
); }))
;
2088 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2089 for (j = 1; j < parallel_count; j++)
2090 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2091 }
2092 }
2093 if (outgrad_size > 0)
2094 {
2095 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2095, __extension__ __PRETTY_FUNCTION__
); }))
;
2096 for (i = 0; i < outgrad_size_per_p; i++)
2097 if (outgrads[i])
2098 {
2099 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2100 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2101 for (j = 1; j < parallel_count; j++)
2102 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2103 }
2104 } else {
2105 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2106, __extension__ __PRETTY_FUNCTION__
); }))
2106 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2106, __extension__ __PRETTY_FUNCTION__
); }))
;
2107 }
2108 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2109 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2110 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2111 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2112 if (!compiled_data->backward.schedule)
2113 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2114 // Run the backward pass.
2115 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2116 // If we need to run accumulation round, do that now.
2117 if (compiled_data->backward.count > 0)
2118 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2119 // Update the count, this determines whether we need to accumulate or not.
2120 ++compiled_data->backward.count;
2121}
2122
2123// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2124// Particularly, this method compiles the parameter update graph.
2125static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2126{
2127 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2128 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2128, __extension__ __PRETTY_FUNCTION__
); }))
;
2129 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2130 const int parameter_size = compiled_data->parameters->rnum;
2131 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2132 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2133 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2134 // Bind accumulated gradients.
2135 if (compiled_data->backward.count > 1)
2136 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2137 else
2138 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2139 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2140 int i, j;
2141 for (i = 0; i < compiled_data->backward.to_size; i++)
2142 {
2143 const int* tos;
2144 int to_size;
2145 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2146 for (j = 0; j < to_size; j++)
2147 {
2148 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2149 // gradients graph.
2150 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2151 .d = tos[j],
2152 .graph = model->graph,
2153 });
2154 if (!exec.graph)
2155 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2156 }
2157 }
2158 const int from_size = apply_gradients_from->rnum;
2159 if (from_size == 0)
2160 {
2161 ccv_array_free(apply_gradients_from);
2162 ccv_array_free(tensor_binds);
2163 return;
2164 }
2165 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2166 for (i = 0; i < from_size; i++)
2167 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2168 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2169 .graph = model->graph
2170 };
2171 ccv_array_free(apply_gradients_from);
2172 // It can only ends with updates on the parameters.
2173 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2174 for (i = 0; i < parameter_size; i++)
2175 {
2176 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2177 continue;
2178 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2179 for (j = 1; j < parallel_count; j++)
2180 {
2181 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2182 ccv_array_push(tos, &copy);
2183 }
2184 }
2185 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2186 ccv_array_free(tos);
2187 ccv_array_free(tensor_binds);
2188 ccfreefree(froms);
2189 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2190 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2191 {
2192 // Skip on no tensor.
2193 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2194 continue;
2195 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2196 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2197 for (j = 1; j < parallel_count; j++)
2198 {
2199 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2200 if (copy)
2201 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2202 }
2203 }
2204 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2205}
2206
2207void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2208{
2209 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2210 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2210, __extension__ __PRETTY_FUNCTION__); }))
;
2211 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2211, __extension__ __PRETTY_FUNCTION__
); }))
;
2212 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2213 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2213, __extension__ __PRETTY_FUNCTION__); }))
;
2214 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2214, __extension__ __PRETTY_FUNCTION__
); }))
;
2215 // Skip if there is no backward pass.
2216 if (compiled_data->backward.count <= 0)
2217 return;
2218 // Skip if there is no parameters.
2219 if (compiled_data->parameters->rnum == 0)
2220 {
2221 compiled_data->backward.count = 0;
2222 return;
2223 }
2224 if (!compiled_data->apply_gradients.graph)
2225 _ccv_cnnp_model_multistage_jit_2(model);
2226 else {
2227 const int parameter_size = compiled_data->parameters->rnum;
2228 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2229 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2230 if (compiled_data->backward.count > 1)
2231 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2232 else
2233 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2234 }
2235 if (compiled_data->apply_gradients.graph)
2236 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2237 // Reset backward count to 0.
2238 compiled_data->backward.count = 0;
2239}
2240
2241void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2242{
2243 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2244 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2245 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2245, __extension__ __PRETTY_FUNCTION__
); }))
;
2246 const int tensors_init = !!compiled_data->tensors_init.v;
2247 int this_tensor_init = tensors_init;
2248 if (!tensors_init)
2249 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2250 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2251 // Check if it is not fully allocated, if it is not, init_1.
2252 this_tensor_init = 0;
2253 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2254 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2255 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2256 if (param_ref < 0)
2257 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2257
, __extension__ __PRETTY_FUNCTION__); }))
; }
2258 else
2259 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2259, __extension__ __PRETTY_FUNCTION__
); }))
; }
2260 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2261 ccv_array_free(parameter_indices);
2262 const int parameter_size = compiled_data->parameters->rnum;
2263 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2263
, __extension__ __PRETTY_FUNCTION__); }))
;
2264 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2264, __extension__ __PRETTY_FUNCTION__
); }))
;
2265 const int parallel_count = _ccv_cnnp_compiled_data_parallel_count(model, compiled_data);
2266 int i;
2267 if (!this_tensor_init)
2268 {
2269 if (compiled_data->tensors.parameters[d])
2270 {
2271 for (i = 1; i < parallel_count; i++)
2272 { assert(compiled_data->tensors.parameters[d + i * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[d + i *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[d + i * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[d + i * parameter_size]"
, "ccv_cnnp_model.c", 2272, __extension__ __PRETTY_FUNCTION__
); }))
; }
2273 this_tensor_init = 1;
2274 } else {
2275 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
;
2276 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2277 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2278 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2279 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2280 compiled_data->tensors.parameters[d] = ccv_nnc_tensor_new(0, info, 0);
2281 for (i = 1; i < parallel_count; i++)
2282 {
2283 if (i != device_id)
2284 CCV_TENSOR_SET_DEVICE_ID(info.type, i)(info.type) = (((info.type) & ~0xfff00) | (((i) & 0xfff
) << 8))
;
2285 else
2286 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2287 compiled_data->tensors.parameters[d + i * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2288 }
2289 }
2290 }
2291 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2292 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2292, __extension__
__PRETTY_FUNCTION__); }))
;
2293 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2294 for (i = 1; i < parallel_count; i++)
2295 {
2296 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2297 if (copy_tensor)
2298 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2299 }
2300 // Mark this symbol as init'ed.
2301 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2302 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2303 init_v[s >> 5] |= (1u << (s & 0x1f));
2304 // If we just allocated this tensor, now it is time to check if we need to mark it as fully allocated.
2305 if (!this_tensor_init)
2306 {
2307 if (ccv_cnnp_model_tensors_any_to_alloc(model, compiled_data))
2308 compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)compiled_data->tensors_init.v | (uintptr_t)1);
2309 else // Remove the flag.
2310 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2311 }
2312}
2313
2314void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2315{
2316 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2317 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2318 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2318, __extension__ __PRETTY_FUNCTION__
); }))
;
2319 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2319, __extension__ __PRETTY_FUNCTION__
); }))
;
2320 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2321 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2322 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2323 if (param_ref < 0)
2324 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2324
, __extension__ __PRETTY_FUNCTION__); }))
; }
2325 else
2326 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2326, __extension__ __PRETTY_FUNCTION__
); }))
; }
2327 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2328 ccv_array_free(parameter_indices);
2329 const int parameter_size = compiled_data->parameters->rnum;
2330 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2330
, __extension__ __PRETTY_FUNCTION__); }))
;
2331 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2331, __extension__ __PRETTY_FUNCTION__
); }))
;
2332 // We don't need to consider parallel_count, every parameter on each device is identical.
2333 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2334 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2334, __extension__
__PRETTY_FUNCTION__); }))
;
2335 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2336}
2337
2338ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2339{
2340 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2341 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2342 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2342, __extension__ __PRETTY_FUNCTION__
); }))
;
2343 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2343, __extension__ __PRETTY_FUNCTION__
); }))
;
2344 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2345 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2346 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2347 if (param_ref < 0)
2348 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2348
, __extension__ __PRETTY_FUNCTION__); }))
; }
2349 else
2350 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2350, __extension__ __PRETTY_FUNCTION__
); }))
; }
2351 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2352 ccv_array_free(parameter_indices);
2353 const int parameter_size = compiled_data->parameters->rnum;
2354 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2354
, __extension__ __PRETTY_FUNCTION__); }))
;
2355 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2355, __extension__ __PRETTY_FUNCTION__
); }))
;
2356 // We don't need to consider parallel_count, every parameter on each device is identical.
2357 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2358 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2358, __extension__
__PRETTY_FUNCTION__); }))
;
2359 return tensor->info;
2360}
2361
2362const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2363{
2364 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2365 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2366 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2366, __extension__ __PRETTY_FUNCTION__
); }))
;
2367 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2368 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2369 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2370 if (param_ref < 0)
2371 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2371
, __extension__ __PRETTY_FUNCTION__); }))
; }
2372 else
2373 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2373, __extension__ __PRETTY_FUNCTION__
); }))
; }
2374 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2375 ccv_array_free(parameter_indices);
2376 const int parameter_size = compiled_data->parameters->rnum;
2377 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2377
, __extension__ __PRETTY_FUNCTION__); }))
;
2378 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2378, __extension__ __PRETTY_FUNCTION__
); }))
;
2379 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2380}
2381
2382int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2383{
2384 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2384, __extension__ __PRETTY_FUNCTION__
); }))
;
2385 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2386 return compiled_data->parameters->rnum;
2387}
2388
2389uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2390{
2391 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2391, __extension__ __PRETTY_FUNCTION__
); }))
;
2392 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2393 const int parameter_size = compiled_data->parameters->rnum;
2394 int i;
2395 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2396 uint64_t size = 0;
2397 const int tensors_init = !!compiled_data->tensors_init.v;
2398 uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
: 0;
2399 for (i = 0; i < parameter_size; i++)
2400 {
2401 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2402 if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i])
2403 {
2404 ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info;
2405 size += ccv_nnc_tensor_data_size(params);
2406 continue;
2407 }
2408 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2409 .graph = graph,
2410 .d = d
2411 });
2412 size += ccv_nnc_tensor_data_size(params);
2413 }
2414 return size;
2415}
2416
2417int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type)
2418{
2419 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2419, __extension__ __PRETTY_FUNCTION__
); }))
;
2420 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2421 if (count != compiled_data->parameters->rnum)
2422 return 0;
2423 if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2424 CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) <<
8))
;
2425 int i;
2426 // We don't need to consider parallel_count, every parameter on each device is identical.
2427 for (i = 0; i < count; i++)
2428 {
2429 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2430 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2431 {
2432 tensors[i] = 0;
2433 continue;
2434 }
2435 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2436 if (tensor->info.type == type)
2437 tensors[i] = tensor;
2438 else {
2439 ccv_nnc_tensor_param_t info = tensor->info;
2440 info.type = type;
2441 tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet.
2442 }
2443 }
2444 for (i = 0; i < count; i++)
2445 {
2446 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2447 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2448 continue;
2449 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2450 // Now initiate transfer. We should do this one on a stream.
2451 if (tensor->info.type != type)
2452 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2453 }
2454 // Copy names and remove parameters.
2455 for (i = 0; i < count; i++)
2456 {
2457 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i];
2458 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2459 {
2460 names[i] = 0;
2461 continue;
2462 }
2463 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2464 const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof
(1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2465 names[i] = ccmallocmalloc(name_len + 1);
2466 names[i][name_len] = 0;
2467 memcpy(names[i], name, name_len);
2468 if (tensor->info.type == type)
2469 compiled_data->tensors.parameters[i] = 0; // Only move when it is moved.
2470 }
2471 return 1;
2472}
2473
2474KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
6
Null pointer value stored to field 'vals'
2475
2476void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates)
2477{
2478 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2478, __extension__ __PRETTY_FUNCTION__
); }))
;
1
Assuming field 'compiled_data' is non-null
2
Taking true branch
2479 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2480 int i;
2481 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2482 if (count != compiled_data->parameters->rnum)
3
Assuming 'count' is not equal to field 'rnum'
4
Taking true branch
2483 {
2484 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
5
Calling 'kh_init_ccv_cnnp_parameter_id'
7
Returning from 'kh_init_ccv_cnnp_parameter_id'
2485 // Build the map between name and the index.
2486 for (i = 0; i < count; i++)
8
Assuming 'i' is >= 'count'
9
Loop condition is false. Execution continues on line 2494
2487 {
2488 int ret;
2489 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret);
2490 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2490
, __extension__ __PRETTY_FUNCTION__); }))
;
2491 kh_val(id_map, k)((id_map)->vals[k]) = i;
2492 }
2493 }
2494 const int parameter_size = compiled_data->parameters->rnum;
2495 int* copy_back = 0;
2496 const int tensors_init = !!compiled_data->tensors_init.v;
10
Assuming field 'v' is non-null
2497 if (!tensors_init
10.1
'tensors_init' is 1
)
11
Taking false branch
2498 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2499 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
12
Assuming '_a' is <= '_b'
13
'?' condition is false
2500 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2501 for (i = 0; i < parameter_size; i++)
14
Assuming 'i' is < 'parameter_size'
15
Loop condition is true. Entering loop body
2502 {
2503 int j = i;
2504 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2505 if (i
15.1
'i' is >= 0
>= 0 || strncmp(name, names[i], 1023) != 0)
2506 {
2507 // Build the map.
2508 if (id_map == 0)
16
Assuming 'id_map' is not equal to null
17
Taking false branch
2509 {
2510 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2511 for (j = 0; j < count; j++)
2512 {
2513 int ret;
2514 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret);
2515 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2515
, __extension__ __PRETTY_FUNCTION__); }))
;
2516 kh_val(id_map, k)((id_map)->vals[k]) = j;
2517 }
2518 }
2519 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name);
2520 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
18
Assuming 'k' is not equal to field 'n_buckets'
19
Taking false branch
2521 continue;
2522 j = kh_val(id_map, k)((id_map)->vals[k]);
20
Array access (via field 'vals') results in a null pointer dereference
2523 }
2524 if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read.
2525 { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters
[i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t
)compiled_data->tensors.parameters[i] & (uintptr_t)1))
; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)"
, "ccv_cnnp_model.c", 2525, __extension__ __PRETTY_FUNCTION__
); }))
; }
2526 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
2527 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2528 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2529 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2530 const int d = parameter.d;
2531 if (info.type == tensors[j]->info.type && invalidates) // Can move.
2532 {
2533 // Deallocate it if needed.
2534 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2535 if (compiled_data->tensors.parameters[i])
2536 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2537 compiled_data->tensors.parameters[i] = tensors[j];
2538 tensors[j] = 0;
2539 } else {
2540 if (!compiled_data->tensors.parameters[i])
2541 { // Not allocated, to allocate first.
2542 // Create new one, make sure we create this by having the right parameters.
2543 const int type = info.type;
2544 info = tensors[j]->info;
2545 info.type = type; // Revert back the type.
2546 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
2547 }
2548 if (!copy_back)
2549 copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int));
2550 copy_back[i] = j + 1;
2551 }
2552 init_v[d >> 5] |= (1u << (d & 0x1f));
2553 // Create this tensor for other data parallel allocations.
2554 info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
2555 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2556 for (j = 1; j < parallel_count; j++)
2557 if (!compiled_data->tensors.parameters[i + j * parameter_size])
2558 {
2559 if (j != device_id)
2560 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
2561 else
2562 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2563 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2564 }
2565 // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
2566 }
2567 if (id_map)
2568 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2569 // Now do the transfer.
2570 if (copy_back)
2571 {
2572 for (i = 0; i < parameter_size; i++)
2573 {
2574 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[i]) & ~(uintptr_t)1))
;
2575 if (copy_back[i] == 0)
2576 continue;
2577 const int j = copy_back[i] - 1;
2578 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2579 }
2580 ccfreefree(copy_back);
2581 }
2582}
2583
2584ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2585{
2586 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2587 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2587, __extension__ __PRETTY_FUNCTION__); }))
;
2588 const int parameter_size = compiled_data->parameters->rnum;
2589 int i;
2590 for (i = 0; i < parameter_size; i++)
2591 {
2592 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2593 if (first(model, name, context))
2594 return ccv_cnnp_model_parameters(model, -1, i);
2595 }
2596 return 0;
2597}
2598
2599ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2600{
2601 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2602 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2602, __extension__ __PRETTY_FUNCTION__); }))
;
2603 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2604 const int parameter_size = compiled_data->parameters->rnum;
2605 int i;
2606 for (i = 0; i < parameter_size; i++)
2607 {
2608 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2609 if (filter(model, name, context))
2610 {
2611 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2612 ccv_array_push(parameters, &parameter);
2613 }
2614 }
2615 return parameters;
2616
2617}
2618
2619CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2620{
2621 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2622 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2622, __extension__ __PRETTY_FUNCTION__); }))
;
2623 const int tensors_init = !!compiled_data->tensors_init.v;
2624 if (!tensors_init) // If nothing initialized, we return parameter 0.
2625 return ccv_cnnp_model_parameters(model, -1, 0);
2626 const int parameter_size = compiled_data->parameters->rnum;
2627 int i;
2628 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2629 for (i = 0; i < parameter_size; i++)
2630 {
2631 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2632 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2633 return ccv_cnnp_model_parameters(model, -1, i);
2634 }
2635 return 0;
2636}
2637
2638static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2639{
2640 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2641 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2641, __extension__
__PRETTY_FUNCTION__); }))
;
2642 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2643 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2644 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2645 return to_parameter_indices;
2646}
2647
2648static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2649{
2650 // If the model is not compiled yet. Compile them now.
2651 if (!model->graph)
2652 {
2653 model->graph = ccv_nnc_symbolic_graph_new();
2654 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2654, __extension__ __PRETTY_FUNCTION__
); }))
;
2655 const int input_size = from_model->input_size;
2656 ccv_nnc_tensor_param_t input_params[input_size];
2657 int i;
2658 for (i = 0; i < input_size; i++)
2659 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2660 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2661 model->parallel_count = from_model->parallel_count;
2662 model->memory_compression = from_model->memory_compression;
2663 model->memory_reduction = from_model->memory_reduction;
2664 model->gradient_checkpointing = from_model->gradient_checkpointing;
2665 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2666 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2667 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2668 }
2669 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2670 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2670, __extension__ __PRETTY_FUNCTION__
); }))
;
2671 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2672 if (!to_tensors_init)
2673 {
2674 if (only_init_0)
2675 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2676 else
2677 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2678 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2679 // Check if it is not fully allocated, if it is not, init_1.
2680 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2681 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2681, __extension__ __PRETTY_FUNCTION__
); }))
;
2682 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2683 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2684 if (*from_param_ref < 0 && *param_ref >= 0)
2685 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2685, __extension__ __PRETTY_FUNCTION__
); }))
; }
2686 else if (*from_param_ref >= 0)
2687 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2687, __extension__ __PRETTY_FUNCTION__
); }))
; }
2688 if (*param_ref < 0 && *from_param_ref >= 0)
2689 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2689, __extension__ __PRETTY_FUNCTION__); }))
; }
2690 else if (*param_ref >= 0)
2691 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2691, __extension__ __PRETTY_FUNCTION__
); }))
; }
2692}
2693
2694void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2695{
2696 ccv_array_t* to_parameter_indices;
2697 int to_param_ref;
2698 ccv_array_t* from_parameter_indices;
2699 int from_param_ref;
2700 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2701 // Should be exactly the same tensor.
2702 if (to_param_ref < 0 && from_param_ref < 0)
2703 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2703, __extension__ __PRETTY_FUNCTION__
); }))
; }
2704 // To models.
2705 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2706 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2706, __extension__ __PRETTY_FUNCTION__
); }))
;
2707 // From models.
2708 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2709 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2710 const int to_parameter_size = to_compiled_data->parameters->rnum;
2711 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2712 int i, j;
2713 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2714 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2715 for (i = 0; i < rnum; i++)
2716 {
2717 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2718 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2718, __extension__ __PRETTY_FUNCTION__); }))
;
2719 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2719, __extension__ __PRETTY_FUNCTION__
); }))
;
2720 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2721 // If the original is not init'ed. We cannot copy from.
2722 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2723 continue;
2724 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2725 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2725, __extension__ __PRETTY_FUNCTION__); }))
;
2726 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2726, __extension__ __PRETTY_FUNCTION__
); }))
;
2727 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2728 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2728, __extension__
__PRETTY_FUNCTION__); }))
;
2729 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2730 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2730, __extension__
__PRETTY_FUNCTION__); }))
;
2731 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2732 for (j = 1; j < parallel_count; j++)
2733 {
2734 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2735 if (copy_tensor)
2736 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2737 }
2738 // Mark this symbol as init'ed.
2739 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2740 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2741 }
2742 ccv_array_free(to_parameter_indices);
2743 ccv_array_free(from_parameter_indices);
2744}
2745
2746void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2747{
2748 ccv_array_t* to_parameter_indices;
2749 int to_param_ref;
2750 ccv_array_t* from_parameter_indices;
2751 int from_param_ref;
2752 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2753 // Should be exactly the same tensor.
2754 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
2755 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2755, __extension__ __PRETTY_FUNCTION__
); }))
; }
2756 // To models.
2757 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2758 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2758, __extension__ __PRETTY_FUNCTION__
); }))
;
2759 // From models.
2760 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2761 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2762 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2762, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
2763 const int from_parameter_size = from_compiled_data->parameters->rnum;
2764 const int to_parameter_size = to_compiled_data->parameters->rnum;
2765 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
2766 int i, j;
2767 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2768 char* updated_name = 0;
2769 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2770 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2771 for (i = 0; i < rnum; i++)
2772 {
2773 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
2774 // Need to figure out how to use the renamer here.
2775 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2776 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2776, __extension__ __PRETTY_FUNCTION__); }))
;
2777 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2777, __extension__
__PRETTY_FUNCTION__); }))
;
2778 if (renamer)
2779 {
2780 const char* const src_name = (src_d < from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2781 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2782 if (!updated_name)
2783 updated_name = (char*)ccmallocmalloc(1024);
2784 const size_t src_name_len = src_name == 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2785 if (src_name_len > 0)
2786 memcpy(updated_name, src_name, src_name_len);
2787 updated_name[src_name_len] = 0;
2788 if (renamer(context, dest_name, updated_name, 1024) != 0)
2789 continue; // Skip this.
2790 if (src_name != 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2791 {
2792 // Nothing changed.
2793 } else {
2794 if (!id_map)
2795 {
2796 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2797 for (j = 0; j < from_parameter_size; j++)
2798 {
2799 int ret;
2800 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
2801 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2801
, __extension__ __PRETTY_FUNCTION__); }))
;
2802 kh_val(id_map, k)((id_map)->vals[k]) = j;
2803 }
2804 }
2805 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2806 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2807 continue;
2808 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2809 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2809, __extension__ __PRETTY_FUNCTION__); }))
;
2810 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2810, __extension__
__PRETTY_FUNCTION__); }))
;
2811 }
2812 }
2813 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2813, __extension__ __PRETTY_FUNCTION__); }))
;
2814 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2814, __extension__
__PRETTY_FUNCTION__); }))
;
2815 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2816 // If the original is not init'ed. We cannot share from.
2817 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2818 continue;
2819 for (j = 0; j < parallel_count; j++)
2820 {
2821 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2822 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2822, __extension__
__PRETTY_FUNCTION__); }))
;
2823 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2824 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2825 ccv_nnc_tensor_free(dest);
2826 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2827 }
2828 // Mark this symbol as init'ed.
2829 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2830 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2831 }
2832 ccv_array_free(to_parameter_indices);
2833 ccv_array_free(from_parameter_indices);
2834 if (id_map)
2835 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2836 if (updated_name)
2837 ccfreefree(updated_name);
2838 // Mark it as incomplete so we will call init_1.
2839 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2840 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2841 else // Remove the flag.
2842 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2843}
2844
2845ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2846{
2847 if (!compiled_data->stream_map)
2848 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2849 int ret = 0;
2850 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2851 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2851, __extension__ __PRETTY_FUNCTION__); }))
;
2852 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2853 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2854 if (ret != 0)
2855 {
2856 stream = ccv_nnc_stream_context_new(type);
2857 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2858 }
2859 return stream;
2860}
2861
2862void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2863{
2864 ccv_array_t* to_parameter_indices;
2865 int to_param_ref;
2866 ccv_array_t* from_parameter_indices;
2867 int from_param_ref;
2868 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2869 // Should be exactly the same tensor.
2870 if (to_param_ref < 0 && from_param_ref < 0)
2871 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2871, __extension__ __PRETTY_FUNCTION__
); }))
; }
2872 // To models.
2873 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2874 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2874, __extension__ __PRETTY_FUNCTION__
); }))
;
2875 // From models.
2876 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2877 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2878 const int to_parameter_size = to_compiled_data->parameters->rnum;
2879 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2880 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2880, __extension__ __PRETTY_FUNCTION__
); }))
;
2881 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2881, __extension__ __PRETTY_FUNCTION__
); }))
;
2882 int i, j;
2883 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2884 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2885 for (i = 0; i < aux_in_size; i++)
2886 inputs[i + 2] = aux_ins[i];
2887 for (i = 0; i < aux_out_size; i++)
2888 outputs[i + 1] = aux_outs[i];
2889 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2890 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2891 for (i = 0; i < rnum; i++)
2892 {
2893 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2894 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2894, __extension__ __PRETTY_FUNCTION__); }))
;
2895 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2895, __extension__ __PRETTY_FUNCTION__
); }))
;
2896 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2897 // If the original is not init'ed. We cannot copy from.
2898 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2899 continue;
2900 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2901 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2901, __extension__ __PRETTY_FUNCTION__); }))
;
2902 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2902, __extension__ __PRETTY_FUNCTION__
); }))
;
2903 if (parallel_count > 1)
2904 {
2905 ccv_nnc_stream_context_t* streams[parallel_count];
2906 ccv_nnc_stream_signal_t* signal;
2907 if (stream_context)
2908 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2909 for (j = 0; j < parallel_count; j++)
2910 {
2911 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2912 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2913 if (!dest || !src)
2914 {
2915 streams[j] = 0;
2916 continue;
2917 }
2918 // At the moment, can only handle them on the same device.
2919 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2919, __extension__ __PRETTY_FUNCTION__
); }))
;
2920 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2920, __extension__ __PRETTY_FUNCTION__
); }))
;
2921 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2922 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2923 int type = stream_type;
2924 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2925 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2926 // Wait signal to finish.
2927 if (stream_context)
2928 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2929 inputs[0] = outputs[0] = dest;
2930 inputs[1] = src;
2931 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2932 if (stream_context)
2933 {
2934 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2935 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2936 }
2937 streams[j] = stream_0;
2938 }
2939 // If this should be blocking, blocking it.
2940 if (!stream_context)
2941 for (j = 0; j < parallel_count; j++)
2942 if (streams[j])
2943 ccv_nnc_stream_context_wait(streams[j]);
2944 } else {
2945 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2946 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2946, __extension__
__PRETTY_FUNCTION__); }))
;
2947 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2948 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2948, __extension__
__PRETTY_FUNCTION__); }))
;
2949 inputs[0] = outputs[0] = dest;
2950 inputs[1] = src;
2951 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2952 }
2953 // Mark this symbol as init'ed.
2954 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2955 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2956 }
2957 ccv_array_free(to_parameter_indices);
2958 ccv_array_free(from_parameter_indices);
2959}
2960
2961void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2962{
2963 int to_param_ref;
2964 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2965 // To models.
2966 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2967 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2967, __extension__ __PRETTY_FUNCTION__
); }))
;
2968 // Tensor has to be inited already.
2969 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2969, __extension__ __PRETTY_FUNCTION__
); }))
;
2970 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2970, __extension__ __PRETTY_FUNCTION__
); }))
;
2971 // From models.
2972 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2973 const int to_parameter_size = to_compiled_data->parameters->rnum;
2974 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2975 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2975, __extension__ __PRETTY_FUNCTION__
); }))
;
2976 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2976, __extension__ __PRETTY_FUNCTION__
); }))
;
2977 int i, j;
2978 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2979 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2980 for (i = 0; i < aux_in_size; i++)
2981 inputs[i + 1] = aux_ins[i];
2982 for (i = 0; i < aux_out_size; i++)
2983 outputs[i + 1] = aux_outs[i];
2984 for (i = 0; i < rnum; i++)
2985 {
2986 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2987 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2987, __extension__ __PRETTY_FUNCTION__); }))
;
2988 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2988, __extension__ __PRETTY_FUNCTION__
); }))
;
2989 if (parallel_count > 1)
2990 {
2991 ccv_nnc_stream_context_t* streams[parallel_count];
2992 ccv_nnc_stream_signal_t* signal;
2993 if (stream_context)
2994 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2995 for (j = 0; j < parallel_count; j++)
2996 {
2997 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2998 if (!dest)
2999 {
3000 streams[j] = 0;
3001 continue;
3002 }
3003 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3004 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3005 int type = stream_type;
3006 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3007 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3008 // Wait signal to finish.
3009 if (stream_context)
3010 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3011 inputs[0] = outputs[0] = dest;
3012 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3013 if (stream_context)
3014 {
3015 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3016 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3017 }
3018 streams[j] = stream_0;
3019 }
3020 // If this should be blocking, blocking it.
3021 if (!stream_context)
3022 for (j = 0; j < parallel_count; j++)
3023 if (streams[j])
3024 ccv_nnc_stream_context_wait(streams[j]);
3025 } else {
3026 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
3027 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3027, __extension__
__PRETTY_FUNCTION__); }))
;
3028 inputs[0] = outputs[0] = dest;
3029 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3030 }
3031 // No need to mark this symbol as init'ed, it is already.
3032 }
3033 ccv_array_free(to_parameter_indices);
3034}
3035
3036void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
3037{
3038 int to_param_ref;
3039 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3040 // To models.
3041 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
3042 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 3042, __extension__ __PRETTY_FUNCTION__
); }))
;
3043 // Tensor has to be inited already.
3044 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 3044, __extension__ __PRETTY_FUNCTION__
); }))
;
3045 ccv_nnc_tensor_t** tensor_gradients;
3046 if (to_compiled_data->backward.count > 1)
3047 tensor_gradients = to_compiled_data->tensors.accum_gradients;
3048 else
3049 tensor_gradients = to_compiled_data->tensors.gradients;
3050 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 3050, __extension__ __PRETTY_FUNCTION__
); }))
;
3051 // From models.
3052 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3053 const int to_parameter_size = to_compiled_data->parameters->rnum;
3054 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3055 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 3055, __extension__ __PRETTY_FUNCTION__
); }))
;
3056 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 3056, __extension__ __PRETTY_FUNCTION__
); }))
;
3057 int i, j;
3058 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
3059 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
3060 for (i = 0; i < aux_in_size; i++)
3061 inputs[i + 1] = aux_ins[i];
3062 for (i = 0; i < aux_out_size; i++)
3063 outputs[i + 1] = aux_outs[i];
3064 for (i = 0; i < rnum; i++)
3065 {
3066 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3067 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3067, __extension__ __PRETTY_FUNCTION__); }))
;
3068 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3068, __extension__ __PRETTY_FUNCTION__
); }))
;
3069 if (parallel_count > 1)
3070 {
3071 ccv_nnc_stream_context_t* streams[parallel_count];
3072 ccv_nnc_stream_signal_t* signal;
3073 if (stream_context)
3074 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
3075 for (j = 0; j < parallel_count; j++)
3076 {
3077 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
3078 if (!dest)
3079 {
3080 streams[j] = 0;
3081 continue;
3082 }
3083 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3084 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3085 int type = stream_type;
3086 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3087 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3088 // Wait signal to finish.
3089 if (stream_context)
3090 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3091 inputs[0] = outputs[0] = dest;
3092 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3093 if (stream_context)
3094 {
3095 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3096 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3097 }
3098 streams[j] = stream_0;
3099 }
3100 // If this should be blocking, blocking it.
3101 if (!stream_context)
3102 for (j = 0; j < parallel_count; j++)
3103 if (streams[j])
3104 ccv_nnc_stream_context_wait(streams[j]);
3105 } else {
3106 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
3107 if (!dest)
3108 continue;
3109 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3109, __extension__
__PRETTY_FUNCTION__); }))
;
3110 inputs[0] = outputs[0] = dest;
3111 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3112 }
3113 // No need to mark this symbol as init'ed, it is already.
3114 }
3115 ccv_array_free(to_parameter_indices);
3116}
3117
3118void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
3119{
3120 // Only CUDA backend has this feature.
3121#ifdef HAVE_CUDA1
3122 int to_param_ref;
3123 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3124 // To models.
3125 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3126 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3126, __extension__ __PRETTY_FUNCTION__); }))
;
3127 // Tensor has to be inited already.
3128 assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0)
, __extension__ ({ if (!!compiled_data->tensors_init.v) ; else
__assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c"
, 3128, __extension__ __PRETTY_FUNCTION__); }))
;
3129 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 3129, __extension__ __PRETTY_FUNCTION__
); }))
;
3130 // From models.
3131 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3132 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3133 int i;
3134 for (i = 0; i < rnum; i++)
3135 {
3136 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3137 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3137, __extension__ __PRETTY_FUNCTION__); }))
;
3138 assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3138, __extension__ __PRETTY_FUNCTION__
); }))
;
3139 if (parallel_count > 1)
3140 {
3141 assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect."
) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect."
) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\""
, "ccv_cnnp_model.c", 3141, __extension__ __PRETTY_FUNCTION__
); }))
;
3142 } else {
3143 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[dest_d]) & ~(uintptr_t)1))
;
3144 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 3144, __extension__
__PRETTY_FUNCTION__); }))
;
3145 ccv_nnc_tensor_param_t params = src->info;
3146 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY)
3147 continue;
3148 const size_t size = ccv_nnc_tensor_data_size(params);
3149 if (size <= 0)
3150 continue;
3151 const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1);
3152 const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0);
3153 ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t));
3154 tensor->dataof = 0;
3155 tensor->alias_ref = 0;
3156 tensor->sig = 0;
3157 tensor->refcount = 1;
3158 tensor->info = params;
3159 if (tfb)
3160 {
3161 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2];
3162 // This corresponding to mat->step
3163 tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype
) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12
] * (((((params.datatype) & 0xFF000) | params.dim[2])) &
0xFFF) + 3) & -4)
;
3164 } else // This won't be recognized by ccv_dense_matrix_t
3165 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000);
3166 // Remove this flag so it can be deallocated as usual.
3167 tensor->type &= ~CCV_NO_DATA_ALLOC;
3168 assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY
) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00
) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY"
, "ccv_cnnp_model.c", 3168, __extension__ __PRETTY_FUNCTION__
); }))
;
3169 void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size);
3170 if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
3171 {
3172 tensor->data.u8 = (uint8_t*)ptr;
3173 tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
3174 } else {
3175 // Allocation failed.
3176 ccfreefree(tensor);
3177 continue;
3178 }
3179 // TODO: Cannot run this on the stream context yet, due to allocation and deallocations.
3180 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0);
3181 cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size);
3182 compiled_data->tensors.parameters[dest_d] = tensor;
3183 // Can free out the old one.
3184 if (should_free)
3185 ccv_nnc_tensor_free(src);
3186 }
3187 // No need to mark this symbol as init'ed, it is already.
3188 }
3189 ccv_array_free(to_parameter_indices);
3190#endif
3191}
3192
3193ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
3194{
3195 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3196 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3196, __extension__ __PRETTY_FUNCTION__); }))
;
3197 return compiled_data->minimize.minimizer;
3198}
3199
3200void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
3201{
3202 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3203 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3203, __extension__ __PRETTY_FUNCTION__); }))
;
3204 const int parameter_size = compiled_data->parameters->rnum;
3205 if (parameter_size == 0)
3206 return;
3207 if (reset)
3208 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 3208, __extension__ __PRETTY_FUNCTION__
); }))
; }
3209 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3210 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
3211 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
3212 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
3213 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3214 // We update all parameters, at this point, we have one minimizer.
3215 if (set_parameters == 0 || set_parameter_size == 0)
3216 compiled_data->minimize.minimizer = minimizer;
3217 int i;
3218 if (set_parameters && set_parameter_size)
3219 {
3220 // I need to save what's the minimizer along with this.
3221 if (!compiled_data->minimize.parameters)
3222 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
3223 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
3224 set_minimizer_for_parameter->minimizer = minimizer;
3225 set_minimizer_for_parameter->parameter_size = set_parameter_size;
3226 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
3227 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
3228 }
3229 // If reset is true, clear the parameters array.
3230 if (reset && compiled_data->minimize.parameters)
3231 {
3232 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3233 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3234 ccv_array_clear(compiled_data->minimize.parameters);
3235 }
3236 if (!compiled_data->update_nodes)
3237 return;
3238 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
3239 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 3239, __extension__ __PRETTY_FUNCTION__); }))
;
3240 if (saved_aux_size > old_max_saved_aux_size)
3241 {
3242 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 3242, __extension__ __PRETTY_FUNCTION__
); }))
;
3243 // Reallocate first, move them around later.
3244 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
3245 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
3246 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
3247 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
3248 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
3249 }
3250 int flag = 0;
3251 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3252 if (set_parameters && set_parameter_size)
3253 {
3254 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
3255 for (i = 0; i < set_parameter_size; i++)
3256 {
3257 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
3258 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 3258, __extension__ __PRETTY_FUNCTION__
); }))
;
3259 const int old_rnum = parameter_indices->rnum;
3260 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
3261 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
3262 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 3262, __extension__ __PRETTY_FUNCTION__
); }))
;
3263 if (param_ref >= 0)
3264 {
3265 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 3265, __extension__ __PRETTY_FUNCTION__
); }))
;
3266 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
3267 parameter_indices->rnum = old_rnum + 1;
3268 }
3269 }
3270 // We may have duplicated indices, but that is OK, we will set it twice.
3271 for (i = 0; i < parameter_indices->rnum; i++)
3272 {
3273 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
3274 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
3275 flag = 1;
3276 }
3277 ccv_array_free(parameter_indices);
3278 } else {
3279 for (i = 0; i < parameter_size; i++)
3280 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
3281 flag = 1;
3282 if (compiled_data->minimize.parameters)
3283 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
3284 flag = 1;
3285 }
3286 if (flag)
3287 {
3288 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
3289 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
3290 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3291 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3292 }
3293}
3294
3295void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
3296{
3297 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3298 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3298, __extension__ __PRETTY_FUNCTION__); }))
;
3299 compiled_data->compile_params = compile_params;
3300}
3301
3302void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
3303{
3304 if (model->graph && out_size > 0)
3305 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
3306 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
3307 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
3308 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
3309 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
3310 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
3311 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
3312}
3313
3314void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
3315{
3316 if (model->graph)
3317 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
3318}
3319
3320static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
3321{
3322 int i;
3323 const int parameter_size = compiled_data->parameters->rnum;
3324 ccv_array_free(compiled_data->parameters);
3325 if (compiled_data->parameter_flags)
3326 ccfreefree(compiled_data->parameter_flags);
3327 const int internal_size = compiled_data->internals->rnum;
3328 ccv_array_free(compiled_data->internals);
3329 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 3329, __extension__ __PRETTY_FUNCTION__
); }))
;
3330 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 3330, __extension__ __PRETTY_FUNCTION__
); }))
;
3331 for (i = 0; i < parameter_size; i++)
3332 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3333 ccv_array_free(compiled_data->ids.parameters);
3334 for (i = 0; i < internal_size; i++)
3335 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3336 ccv_array_free(compiled_data->ids.internals);
3337 const int parallel_count = compiled_data->parallel_count > 0 ? compiled_data->parallel_count : _ccv_cnnp_model_root_parallel_count(model);
3338 if (compiled_data->tensors.parameters)
3339 {
3340 for (i = 0; i < parameter_size * parallel_count; i++)
3341 // If it is not marked as not belonging, we can free it.
3342 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3343 if (compiled_data->tensors.parameters[i])
3344 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3345 for (i = 0; i < internal_size * parallel_count; i++)
3346 if (compiled_data->tensors.internals[i])
3347 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3348 ccfreefree(compiled_data->tensors.parameters);
3349 }
3350 if (compiled_data->tensors.gradients)
3351 {
3352 for (i = 0; i < parameter_size * parallel_count; i++)
3353 {
3354 if (compiled_data->tensors.gradients[i])
3355 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3356 if (compiled_data->tensors.accum_gradients[i])
3357 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3358 }
3359 ccfreefree(compiled_data->tensors.gradients);
3360 }
3361 if (compiled_data->minimize.parameters)
3362 {
3363 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3364 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3365 ccv_array_free(compiled_data->minimize.parameters);
3366 }
3367 if (compiled_data->rewindables)
3368 ccv_array_free(compiled_data->rewindables);
3369 if (compiled_data->tensors_init.v)
3370 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3371 if (compiled_data->evaluate.tos)
3372 ccfreefree(compiled_data->evaluate.tos);
3373 compiled_data->evaluate.tos = 0;
3374 if (compiled_data->stream_map)
3375 {
3376 khiter_t k;
3377 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3378 {
3379 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3380 continue;
3381 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3382 ccv_nnc_stream_context_free(stream);
3383 }
3384 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3385 }
3386 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3387 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3388 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3389 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3390 if (compiled_data->gradient_checkpoints)
3391 {
3392 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3393 {
3394 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3395 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3395, __extension__ __PRETTY_FUNCTION__
); }))
;
3396 ccfreefree(checkpoint->inputs);
3397 ccv_array_free(checkpoint->tensor_symbols);
3398 }
3399 ccv_array_free(compiled_data->gradient_checkpoints);
3400 }
3401 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3402 ccfreefree(compiled_data);
3403}
3404
3405void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3406{
3407 ccv_cnnp_model_deinit(model);
3408 if (model->isa->dealloc)
3409 model->isa->dealloc(model);
3410 if (model->io)
3411 {
3412 int i;
3413 for (i = 0; i < model->io->rnum; i++)
3414 {
3415 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3416 if (model_io->outgoings)
3417 ccv_array_free(model_io->outgoings);
3418 if (model_io->incomings)
3419 ccv_array_free(model_io->incomings);
3420 if (model_io->dependencies)
3421 ccv_array_free(model_io->dependencies);
3422 ccfreefree(model_io);
3423 }
3424 ccv_array_free(model->io);
3425 }
3426 if (model->parameter_indices)
3427 ccv_array_free(model->parameter_indices);
3428 if (model->inputs)
3429 ccfreefree(model->inputs);
3430 if (model->graph)
3431 ccv_nnc_symbolic_graph_free(model->graph);
3432 if (model->compiled_data)
3433 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3434 if (model->name)
3435 ccfreefree(model->name);
3436 ccfreefree(model);
3437}
3438
3439void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3440{
3441 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3442 if (!compiled_data)
3443 return;
3444 if (compiled_data->graph)
3445 ccv_nnc_graph_cancel(compiled_data->graph);
3446 if (compiled_data->apply_gradients.graph)
3447 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3448}
3449
3450void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3451{
3452 model->exec_flags = flags;
3453}
3454
3455int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3456{
3457 return model->exec_flags;
3458}