Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2427, column 1
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2025-12-18-173245-1246484-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7#ifdef HAVE_CUDA1
8#include "gpu/ccv_nnc_compat.h"
9#endif
10
11// MARK - Level-5 API
12
13ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
14{
15 if (!model->io)
16 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
17 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
18 model_io->param_ref = 0;
19 model_io->param_sel = 0;
20 model_io->visit = 0;
21 model_io->model = model;
22 model_io->dependencies = 0;
23 model_io->dependents = 0;
24 model_io->outgoings = 0;
25 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
26 ccv_array_push(model->io, &model_io);
27 if (input_size > 0)
28 {
29 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
30 ccv_array_resize(model_io->incomings, input_size);
31 int i;
32 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
33 for (i = 0; i < input_size; i++)
34 {
35 if (!inputs[i]->outgoings)
36 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
37 ccv_array_push(inputs[i]->outgoings, &model_io);
38 }
39 } else {
40 model_io->incomings = 0;
41 }
42 return model_io;
43}
44
45void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
46{
47 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 47, __extension__ __PRETTY_FUNCTION__);
}))
;
48 if (!model_io->dependencies)
49 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
50 int i, j;
51 for (i = 0; i < dependency_size; i++)
52 {
53 int flag = 0;
54 // Check if it is already exist or not.
55 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
56 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
57 flag = 1;
58 if (flag)
59 continue;
60 ccv_array_push(model_io->dependencies, dependencies + i);
61 ++dependencies[i]->dependents;
62 }
63}
64
65int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
66{
67 return model->output_size;
68}
69
70int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
71{
72 // If the model is compiled, it is default to 1 unless it is not.
73 if (model->compiled_data)
74 return model->is_trainable >= 0 ? model->is_trainable : 1;
75 return model->is_trainable;
76}
77
78ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
79{
80 if (!model->io)
81 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
82 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
83 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
84 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
85 model_io->visit = 0;
86 model_io->model = model;
87 model_io->outputs = 0;
88 model_io->dependencies = 0;
89 model_io->dependents = 0;
90 model_io->incomings = 0;
91 model_io->outgoings = 0;
92 ccv_array_push(model->io, &model_io);
93 return model_io;
94}
95
96void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
97{
98 model->notify_hook.func = func;
99 model->notify_hook.context = context;
100}
101
102void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
103{
104 if (model->notify_hook.func)
105 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
106 if (model->isa->notify)
107 model->isa->notify(model, tag, payload);
108}
109
110static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
111{
112 int i, j;
113 for (i = 0; i < graph_exec_symbol_size; i++)
114 {
115 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
116 // Check whether this tensor symbol has any duplicate.
117 for (j = i + 1; j < graph_exec_symbol_size;)
118 {
119 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
120 // If there is a same tensor symbol, remove it.
121 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
122 {
123 if (j + 1 < graph_exec_symbol_size)
124 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
125 --graph_exec_symbol_size;
126 continue;
127 }
128 ++j;
129 }
130 }
131 return graph_exec_symbol_size;
132}
133
134void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
135{
136 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
137 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
138 int i;
139 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
140 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
141 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
142 {
143 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
144 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
145 {
146 // Only add to parameter_indices if it is trainable.
147 if (add_to_array_context->add_parameter_indices)
148 ccv_array_add_unique_int(model->parameter_indices, i);
149 // Found it, return, don't add it.
150 return;
151 }
152 }
153 // Only add to parameter_indices if it is trainable.
154 if (add_to_array_context->add_parameter_indices)
155 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
156 // This is a new one, no need to add_unique_int, it is unique.
157 ccv_array_push(add_to_array_context->symbols, &symbol);
158 if (add_to_array_context->trainables)
159 ccv_array_push(add_to_array_context->trainables, &is_trainable);
160 char id[2048];
161 id[0] = add_to_array_context->prefix;
162 id[1] = '-';
163 int total_len = 2;
164 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
165 {
166 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
167 int len;
168 if (name->name && name->name[0] != '\0')
169 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
170 else
171 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
172 total_len += len;
173 if (total_len >= 2047)
174 break;
175 }
176 if (total_len < 2047)
177 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
178 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 178, __extension__ __PRETTY_FUNCTION__)
; }))
;
179 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
180 memcpy(heap_id, id, total_len + 1);
181 ccv_array_push(add_to_array_context->ids, &heap_id);
182 ++add_to_array_context->sequence->it;
183}
184
185static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
186{
187 compiled_data->f = compiled_data->fits + output_size;
188 compiled_data->xpu_alloc.mp_hdr = -1;
189 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
190 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
191 compiled_data->gradient_checkpoints = gradient_checkpoints;
192}
193
194typedef struct {
195 void* old_graph_exec_symbol_new_hook_context;
196 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
197 ccv_nnc_symbolic_graph_t* graph;
198 ccv_cnnp_model_build_data_t* build_data;
199} ccv_cnnp_model_set_exec_flags_context_t;
200
201static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
202{
203 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
204 if (flags_context->build_data->exec_flags)
205 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
206 if (flags_context->old_graph_exec_symbol_new_hook)
207 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
208}
209
210static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
211{
212 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 212, __extension__ __PRETTY_FUNCTION__); }))
;
213 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
214 int i;
215 for (i = 0; i < input_size; i++)
216 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
217 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
218 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
219 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
220 ccv_cnnp_model_sequence_t model_sequence = {
221 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
222 };
223 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
224 .add_parameter_indices = 1,
225 .prefix = 't',
226 .sequence = &model_sequence,
227 .symbols = parameters,
228 .ids = parameter_ids,
229 .trainables = parameter_trainables,
230 };
231 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
232 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
233 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
234 .add_parameter_indices = 0,
235 .prefix = 'r',
236 .sequence = &model_sequence,
237 .symbols = internals,
238 .ids = internal_ids,
239 .trainables = 0,
240 };
241 ccv_cnnp_model_build_data_t build_data = {
242 .exec_flags = 0,
243 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
244 .model_sequence = &model_sequence,
245 .add_to_array = ccv_cnnp_model_add_to_array,
246 .parameters = parameters,
247 .context = {
248 .add_to_parameter = &add_to_parameter_context,
249 .add_to_output = &add_to_output_context,
250 },
251 .gradient_checkpoints = 0,
252 };
253 model->data = &build_data;
254 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
255 .graph = model->graph,
256 .build_data = &build_data,
257 .old_graph_exec_symbol_new_hook = 0,
258 .old_graph_exec_symbol_new_hook_context = 0
259 };
260 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
261 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
262 // Reset back to previous hook.
263 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
264 for (i = 0; i < model->output_size; i++)
265 {
266 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
267 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
268 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
269 continue;
270 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
271 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
272 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
273 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
274 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
275 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
276 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
277 }
278 model->data = 0;
279 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
280 if (model_sequence.sequences)
281 ccv_array_free(model_sequence.sequences);
282 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
283 int not_trainables = 0;
284 // Assert no parameter is alias.
285 for (i = 0; i < parameters->rnum; i++)
286 {
287 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
288 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
289 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 289, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
290 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
291 not_trainables = 1;
292 }
293 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 293, __extension__ __PRETTY_FUNCTION__)
; }))
;
294 uint64_t* parameter_flags = 0;
295 if (not_trainables)
296 {
297 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
298 for (i = 0; i < parameter_trainables->rnum; i++)
299 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
300 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
301 }
302 ccv_array_free(parameter_trainables);
303 // Assert no internal is alias.
304 for (i = 0; i < internals->rnum; i++)
305 {
306 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
307 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
308 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 308, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
309 }
310 const int output_size = model->output_size;
311 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
312 const int parameters_rnum = parameters->rnum;
313 if (input_size > 0)
314 {
315 ccv_array_resize(parameters, parameters_rnum + input_size);
316 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
317 }
318 ccv_nnc_symbolic_graph_simplify(model->graph,
319 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
320 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
321 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
322 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
323 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
324 model->outputs, output_size,
325 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
326 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
327 // Size it down.
328 parameters->rnum = parameters_rnum;
329 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
330 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
331 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
332 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 332, __extension__ __PRETTY_FUNCTION__)
; }))
;
333 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
334 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
335 compiled_data->loss = loss;
336 if (loss.cmd == CCV_NNC_NOOP)
337 {
338 // If no loss function provided, there is no fits.
339 for (i = 0; i < output_size; i++)
340 {
341 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
342 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
343 if (alias_to.d < 0)
344 compiled_data->f[i] = model->outputs[i];
345 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
346 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
347 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
348 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
349 int j;
350 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
351 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 351, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
352 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
353 }
354 }
355 } else {
356 for (i = 0; i < output_size; i++)
357 {
358 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
359 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
360 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
361 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
362 }
363 }
364 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
365 ccv_nnc_symbolic_graph_simplify(model->graph,
366 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
367 0, 0, // No need to provide binds at this point.
368 compiled_data->f, model->output_size,
369 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
370 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
371 // If inputs are from GPU, stream type is GPU.
372 compiled_data->parameters = parameters;
373 compiled_data->parameter_flags = parameter_flags;
374 compiled_data->internals = internals;
375 compiled_data->ids.parameters = parameter_ids;
376 compiled_data->ids.internals = internal_ids;
377 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
378}
379
380static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
381{
382 ccv_array_t* const stack = (ccv_array_t*)context;
383 ccv_array_push(stack, &symbol.d);
384}
385
386static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
387{
388 const ccv_nnc_tensor_symbol_t src_symbol = {
389 .d = src_index,
390 .graph = src_graph
391 };
392 const ccv_nnc_tensor_symbol_t dest_symbol = {
393 .d = dest_index,
394 .graph = dest_graph
395 };
396 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
397 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
398 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
399 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
400 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
401 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
402}
403
404static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
405{
406 const ccv_nnc_tensor_symbol_t src_symbol = {
407 .d = src_index,
408 .graph = src_graph
409 };
410 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
411 const ccv_nnc_tensor_symbol_t dest_symbol = {
412 .d = dest_index,
413 .graph = dest_graph
414 };
415 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
416 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
417}
418
419static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
420static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
421
422typedef struct {
423 int parallel_count;
424 ccv_nnc_symbolic_graph_t* graph;
425 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
426} ccv_nnc_graph_exec_update_t;
427
428static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
429{
430 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
431 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
432 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
433 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
434 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
435 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
436 const int parallel_count = graph_exec_update->parallel_count;
437 int i;
438 for (i = 1; i < parallel_count; i++)
439 {
440 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
441 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
442 {
443 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
444 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
445 }
446 }
447}
448
449void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
450{
451 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 451, __extension__ __PRETTY_FUNCTION__); }))
;
452 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 452, __extension__ __PRETTY_FUNCTION__)
; }))
;
453 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 453, __extension__ __PRETTY_FUNCTION__); }))
;
454 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
455 init->graph = ccv_nnc_symbolic_graph_new();
456 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
457 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
458 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
459 init->parallel_count = model->parallel_count;
460 init->memory_compression = model->memory_compression;
461 init->memory_reduction = model->memory_reduction;
462 init->gradient_checkpointing = model->gradient_checkpointing;
463 init->compiled_data->stream_type = model->compiled_data->stream_type;
464 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
465 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
466 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
467 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
468 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
469 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
470 int i, j;
471 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
472 for (i = 0; i < compiled_data->parameters->rnum; i++)
473 {
474 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
475 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 475, __extension__ __PRETTY_FUNCTION__)
; }))
;
476 }
477 for (i = 0; i < compiled_data->internals->rnum; i++)
478 {
479 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
480 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 480, __extension__ __PRETTY_FUNCTION__)
; }))
;
481 }
482 // Update inputs.
483 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 483, __extension__ __PRETTY_FUNCTION__)
; }))
;
484 for (i = 0; i < model->input_size; i++)
485 if (model->inputs[i].d >= 0)
486 {
487 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 487, __extension__ __PRETTY_FUNCTION__)
; }))
;
488 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
489 }
490 // Update outputs.
491 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 491, __extension__ __PRETTY_FUNCTION__)
; }))
;
492 for (i = 0; i < model->output_size; i++)
493 {
494 if (model->outputs[i].d >= 0)
495 {
496 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 496, __extension__
__PRETTY_FUNCTION__); }))
;
497 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
498 }
499 if (model->outputs[i].d != model->compiled_data->f[i].d)
500 {
501 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__)
; }))
;
502 if (model->compiled_data->f[i].d >= 0)
503 {
504 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 504, __extension__ __PRETTY_FUNCTION__)
; }))
;
505 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
506 }
507 }
508 }
509 // Go through the graph to set tensor on matching symbols
510 for (i = 0; i < stack->rnum; i++)
511 {
512 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
513 // If exceed range, skip.
514 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
515 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
516 continue;
517 const ccv_nnc_graph_exec_symbol_t src_symbol = {
518 .d = d,
519 .graph = init->graph
520 };
521 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
522 .d = d,
523 .graph = model->graph
524 };
525 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
526 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
527 // If the name doesn't match, skip.
528 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
529 continue;
530 // Now get all the inputs and outputs, if matches, set them.
531 const int* src_inputs;
532 int src_input_size;
533 const int* src_outputs;
534 int src_output_size;
535 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
536 const int* dest_inputs;
537 int dest_input_size;
538 const int* dest_outputs;
539 int dest_output_size;
540 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
541 // We may have unmatched input / output size because this is the minimizer and it has
542 // different saved_aux (for example, when we shrunk with CMD_NOOP).
543 if (src_input_size != dest_input_size)
544 continue;
545 if (src_output_size != dest_output_size)
546 continue;
547 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
548 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
549 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
550 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
551 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
552 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
553 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
554 // a new exec symbol.
555 for (j = 0; j < src_input_size; j++)
556 if (src_inputs[j] >= 0)
557 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
558 for (j = 0; j < src_output_size; j++)
559 if (src_outputs[j] >= 0)
560 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
561 }
562 ccv_array_free(stack);
563 // After this, we get all tensors in the model graph resolved through tensor_auto.
564 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
565 // Verify symbols we get matches.
566 const int parameter_size = compiled_data->parameters->rnum;
567 for (i = 0; i < parameter_size; i++)
568 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 568, __extension__ __PRETTY_FUNCTION__)
; }))
; }
569 const int internal_size = compiled_data->internals->rnum;
570 for (i = 0; i < internal_size; i++)
571 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 571, __extension__ __PRETTY_FUNCTION__)
; }))
; }
572 // Go through compiled data.
573 if (compiled_data->tensor_arena)
574 {
575 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
576 if (flag == 0 && compiled_data->graph_exec_arena)
577 {
578 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
579 // Since we will reinit, if we previously set is_test, we need to set it again.
580 if (compiled_data->is_test)
581 {
582 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
583 ccv_nnc_graph_exec_update_t update = {
584 .parallel_count = parallel_count,
585 .graph = model->graph,
586 .graph_exec_arena = compiled_data->graph_exec_arena,
587 };
588 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
589 }
590 } else
591 // Free-up tensor arena & graph exec arena.
592 _ccv_cnnp_compiled_data_graph_free(compiled_data);
593 }
594 // There are other compiled graphs, for accum and apply gradients.
595 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
596 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
597 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
598 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
599 // That is why we don't update these compiled graphs at all this point.
600 // Free the model, we've already "absorbed" it.
601 ccv_cnnp_model_free(init);
602}
603
604void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
605{
606 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 606, __extension__ __PRETTY_FUNCTION__)
; }))
;
607 if (model->input_size == 0)
608 model->input_size = input_size;
609 if (!model->graph) // The graph is not compiled yet.
610 {
611 model->graph = ccv_nnc_symbolic_graph_new();
612 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
613 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 613, __extension__ __PRETTY_FUNCTION__)
; }))
;
614 int i, flag = 0;
615 for (i = 0; !flag && i < input_size; i++)
616 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
617 // If inputs are from GPU, stream type is GPU.
618 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
619 model->compiled_data->minimize.minimizer = minimizer;
620 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
621 } else {
622 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
623 // And then absorb the "new model" to the old one.
624 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
625 ccv_cnnp_model_absorb(model, init, inputs, input_size);
626 // Reset minimizer.
627 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
628 }
629}
630
631ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
632{
633 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
634 new_model->is_trainable = is_trainable;
635 return new_model;
636}
637
638void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
639{
640 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 640, __extension__ __PRETTY_FUNCTION__); }))
;
641 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 641, __extension__ __PRETTY_FUNCTION__)
; }))
;
642 ccv_nnc_symbolic_graph_t* const graph = model->graph;
643 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
644 int i;
645 for (i = 0; i < output_size; i++)
646 {
647 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 647, __extension__ __PRETTY_FUNCTION__)
; }))
;
648 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
649 }
650}
651
652void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
653{
654 if (workspace_size == model->workspace_size)
655 return;
656 model->workspace_size = workspace_size;
657 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
658 if (compiled_data && compiled_data->graph)
659 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
660}
661
662size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
663{
664 return model->workspace_size;
665}
666
667void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
668{
669 if (parallel == 0)
670 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
671 else
672 model->parallel_count = parallel;
673 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
674 if (compiled_data)
675 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 675, __extension__ __PRETTY_FUNCTION__)
; }))
; }
676}
677
678void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
679{
680 model->max_stream_count = max_stream_count;
681 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
682 if (compiled_data)
683 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 683, __extension__ __PRETTY_FUNCTION__)
; }))
; }
684}
685
686void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
687{
688 model->memory_compression = memory_compression;
689 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
690 if (compiled_data)
691 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 691, __extension__ __PRETTY_FUNCTION__)
; }))
; }
692}
693
694void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
695{
696 model->memory_reduction = memory_reduction;
697 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
698 if (compiled_data)
699 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 699, __extension__ __PRETTY_FUNCTION__)
; }))
; }
700}
701
702void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
703{
704 model->gradient_checkpointing = gradient_checkpointing;
705}
706
707int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
708{
709 return model->gradient_checkpointing;
710}
711
712typedef struct {
713 int parallel_count;
714 ccv_nnc_symbolic_graph_t* graph;
715 ccv_cnnp_compiled_data_t* compiled_data;
716 ccv_nnc_tensor_arena_t* tensor_arena;
717} ccv_nnc_tensor_init_states_t;
718
719static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
720{
721 int i;
722 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
723 for (i = 0; i < compiled_data->parameters->rnum; i++)
724 {
725 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
726 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
727 return 1;
728 }
729 for (i = 0; i < compiled_data->internals->rnum; i++)
730 {
731 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
732 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
733 return 1;
734 }
735 return 0;
736}
737
738static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
739{
740 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
741 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
742 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
743 if (!output_tensor)
744 return;
745 const int d = output_symbol.d;
746 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 746, __extension__ __PRETTY_FUNCTION__)
; }))
;
747 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
748 if (init_v[d >> 5] & (1u << (d & 0x1f)))
749 return;
750 init_v[d >> 5] |= (1u << (d & 0x1f));
751 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
752 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
753 const int parallel_count = tensor_init_states->parallel_count;
754 int i;
755 for (i = 1; i < parallel_count; i++)
756 {
757 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
758 if (copy)
759 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
760 }
761}
762
763// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
764// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
765static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
766{
767 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 767, __extension__ __PRETTY_FUNCTION__); }))
;
768 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 768, __extension__ __PRETTY_FUNCTION__)
; }))
;
769 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
770 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 770, __extension__
__PRETTY_FUNCTION__); }))
;
771 int i;
772 for (i = 0; i < compiled_data->rewindables->rnum; i++)
773 {
774 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
775 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
776 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
777 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
778 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
779 }
780 ccv_array_clear(compiled_data->rewindables);
781 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
782}
783
784static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
785{
786 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
787 .type = CCV_CNNP_REWIND_TENSOR,
788 .tensor = symbol
789 };
790 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
791 ccv_array_push(rewind_symbols, &rewind_symbol);
792}
793
794static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
795{
796 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
797 .type = CCV_CNNP_REWIND_TENSOR,
798 .tensor = symbol
799 };
800 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
801 ccv_array_push(rewind_symbols, &rewind_symbol);
802}
803
804static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
805{
806 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
807 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
808 .graph_exec = symbol
809 };
810 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
811 ccv_array_push(rewind_symbols, &rewind_symbol);
812}
813
814static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
815{
816 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
817 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
818 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
819 int i;
820 for (i = 1; i < parallel_count; i++)
821 {
822 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
823 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
824 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
825 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
826 }
827}
828
829static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
830{
831 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 831, __extension__ __PRETTY_FUNCTION__); }))
;
832 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 832, __extension__ __PRETTY_FUNCTION__); }))
;
833 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
834 int i;
835 for (i = 1; i < parallel_count; i++)
836 {
837 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
838 if (copy_symbol.graph)
839 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
840 }
841 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
842 if (graph_exec_arena)
843 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
844 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
845 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
846 if (gradient_graph_exec_arena)
847 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
848}
849
850static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
851{
852 int this_parameter_flag = 0;
853 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
854 return this_parameter_flag;
855 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
856 int j, k;
857 // For no-op, we can preserve previous saved_aux_size.
858 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
859 {
860 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
861 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
862 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
863 // make sure some model parameters don't update if we don't want them to.
864 int old_saved_aux_size;
865 if (old_minimizer.cmd == CCV_NNC_NOOP)
866 {
867 int input_size;
868 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
869 if (input_size < 2) // This is not legit.
870 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
871 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
872 old_saved_aux_size = input_size - 2;
873 } else
874 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
875 if (old_saved_aux_size != saved_aux_size)
876 {
877 this_parameter_flag = 1;
878 if (saved_aux_size > old_saved_aux_size)
879 {
880 // Allocate new tensor symbols.
881 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
882 for (j = old_saved_aux_size; j < saved_aux_size; j++)
883 {
884 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
885 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
886 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
887 for (k = 1; k < parallel_count; k++)
888 {
889 ccv_nnc_tensor_param_t dev_info = info;
890 if (k != device_id)
891 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
892 else
893 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
894 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
895 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
896 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
897 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
898 }
899 }
900 } else {
901 for (j = saved_aux_size; j < old_saved_aux_size; j++)
902 {
903 for (k = 1; k < parallel_count; k++)
904 {
905 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
906 if (src_copy.d >= 0)
907 {
908 ccv_nnc_tensor_symbol_free(graph, src_copy);
909 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
910 }
911 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
912 if (dest_copy.d >= 0)
913 {
914 ccv_nnc_tensor_symbol_free(graph, dest_copy);
915 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
916 }
917 }
918 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
919 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
920 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
921 }
922 }
923 }
924 }
925 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
926 if (this_parameter_flag)
927 {
928 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
929 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
930 const int* inputs = 0;
931 int input_size = 0;
932 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
933 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 933, __extension__ __PRETTY_FUNCTION__)
; }))
;
934 update_inputs[0].d = inputs[0];
935 update_inputs[0].graph = graph;
936 update_inputs[1].d = inputs[1];
937 update_inputs[1].graph = graph;
938 update_outputs[0] = updated_parameters[parameter_indice];
939 for (j = 0; j < saved_aux_size; j++)
940 {
941 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
942 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
943 }
944 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
945 for (k = 1; k < parallel_count; k++)
946 {
947 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
948 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 948, __extension__ __PRETTY_FUNCTION__); }))
;
949 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
950 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 950, __extension__ __PRETTY_FUNCTION__)
; }))
;
951 update_inputs[0].d = inputs[0];
952 update_inputs[0].graph = graph;
953 update_inputs[1].d = inputs[1];
954 update_inputs[1].graph = graph;
955 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
956 for (j = 0; j < saved_aux_size; j++)
957 {
958 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
959 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
960 }
961 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
962 }
963 }
964 return this_parameter_flag;
965}
966
967typedef struct {
968 int parameter_size;
969 ccv_nnc_cmd_t minimizer;
970 ccv_cnnp_model_io_t parameters[1];
971} ccv_cnnp_set_minimizer_for_parameter_t;
972
973static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
974{
975 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
976 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 976, __extension__ __PRETTY_FUNCTION__); }))
;
977 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
978 // We update all parameters, at this point, we have one minimizer.
979 const int parameter_size = compiled_data->parameters->rnum;
980 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
981 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
982 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 982, __extension__ __PRETTY_FUNCTION__); }))
;
983 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
984 ccv_array_t* const parameters = compiled_data->minimize.parameters;
985 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
986 int i, j, flag = 0;
987 for (i = 0; i < parameters->rnum; i++)
988 {
989 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
990 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
991 {
992 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
993 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 993, __extension__ __PRETTY_FUNCTION__)
; }))
;
994 const int old_rnum = parameter_indices->rnum;
995 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
996 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
997 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__)
; }))
;
998 if (param_ref >= 0)
999 {
1000 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1000, __extension__ __PRETTY_FUNCTION__
); }))
;
1001 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
1002 parameter_indices->rnum = old_rnum + 1;
1003 }
1004 }
1005 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1006 // We may have duplicated indices, but that is OK, we will set it twice.
1007 for (j = 0; j < parameter_indices->rnum; j++)
1008 {
1009 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1010 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1010, __extension__ __PRETTY_FUNCTION__
); }))
;
1011 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1012 flag = 1;
1013 }
1014 ccv_array_clear(parameter_indices);
1015 }
1016 ccv_array_free(parameter_indices);
1017 return flag;
1018}
1019
1020static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1021{
1022 if (new_saved_aux_size == old_saved_aux_size)
1023 return;
1024 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1024, __extension__ __PRETTY_FUNCTION__
); }))
;
1025 int i, j;
1026 for (i = parameter_size - 1; i >= 0; i--)
1027 {
1028 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1029 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1030 for (j = old_saved_aux_size - 1; j >= 0; j--)
1031 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1032 }
1033}
1034
1035static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1036{
1037 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1038 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1038, __extension__ __PRETTY_FUNCTION__); }))
;
1039 if (!compiled_data->rewindables)
1040 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1041 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1042 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1043 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1044}
1045
1046static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1047{
1048 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1049 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__
); }))
;
1050 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1050, __extension__ __PRETTY_FUNCTION__
); }))
;
1051 const int evaluate_to_size = compiled_data->evaluate.to_size;
1052 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1052, __extension__ __PRETTY_FUNCTION__
); }))
;
1053 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1054 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1055 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1056 int i, j;
1057 const int output_size = model->output_size;
1058 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1058, __extension__ __PRETTY_FUNCTION__
); }))
;
1059 if (fits)
1060 for (i = 0; i < output_size; i++)
1061 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1062 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1063 const int parameter_size = compiled_data->parameters->rnum;
1064 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1065 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1066 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1067 int parameter_size_maybe_more = parameter_size;
1068 compiled_data->disable_outgrad = disable_outgrad;
1069 int outgrad_size;
1070 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1071 outgrad_size = 0;
1072 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1073 outgrad_size = model->input_size;
1074 else {
1075 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1075, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1076 outgrad_size = 0;
1077 for (i = 0; i < model->input_size; i++)
1078 if (!(disable_outgrad & ((uint64_t)1 << i)))
1079 ++outgrad_size;
1080 }
1081 compiled_data->outgrad_size = outgrad_size;
1082 parameter_size_maybe_more += outgrad_size;
1083 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1084 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1085 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1086 compiled_data->backward.to_size = parameter_size_maybe_more;
1087 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1088 if (compiled_data->parameter_flags)
1089 {
1090 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1091 for (i = 0; i < parameter_size; i++)
1092 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1093 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1094 else
1095 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1096 }
1097 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1098 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1099 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1100 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1101 else { // Compute minimize with gradients including selected inputs.
1102 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1102, __extension__ __PRETTY_FUNCTION__
); }))
;
1103 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1103, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1104 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1104, __extension__ __PRETTY_FUNCTION__
); }))
;
1105 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1106 j = 0;
1107 for (i = 0; i < model->input_size; i++)
1108 if (!(disable_outgrad & ((uint64_t)1 << i)))
1109 outgrads[j++] = model->inputs[i];
1110 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1111 }
1112 if (compiled_data->parameter_flags)
1113 ccfreefree(parameters);
1114 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1115 if (compiled_data->minimize.parameters)
1116 _ccv_cnnp_apply_parameters_with_minimizer(model);
1117 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1118 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1119 for (i = 0; i < output_size; i++)
1120 {
1121 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1122 // Init this to 1 so we can backprop.
1123 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1124 }
1125 compiled_data->backward.to_size = 0;
1126 for (i = 0; i < parameter_size_maybe_more; i++)
1127 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1128 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1129 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1130 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1131 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1132 {
1133 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1134 continue;
1135 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1136 const int* tos;
1137 int to_size;
1138 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1139 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1140 {
1141 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1142 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1143 int flag = 0;
1144 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1145 for (j = i - 1; !flag && j >= 0; j--)
1146 if (j + outgrad_destination_start < destination_count)
1147 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1148 if (!flag) // Only if we cannot find it, we add it.
1149 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1150 }
1151 }
1152 if (parallel_count > 1)
1153 {
1154 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1155 0, 0,
1156 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1157 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1158 0, 0, 0,
1159 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1160 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1161 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1162 for (i = 0; i < evaluate_to_size; i++)
1163 for (j = 1; j < parallel_count; j++)
1164 {
1165 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1166 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1167 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1168 }
1169 const int backward_to_size = compiled_data->backward.to_size;
1170 for (i = 0; i < backward_to_size; i++)
1171 for (j = 1; j < parallel_count; j++)
1172 {
1173 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1174 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1175 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1176 }
1177 }
1178 // Only use memory compression if we are in gradient parameter mode.
1179 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1180 {
1181 if (model->memory_compression)
1182 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1183 if (model->memory_reduction)
1184 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1185 }
1186 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1187 compiled_data->gradient_mode = gradient_mode;
1188}
1189
1190void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1191{
1192 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1192, __extension__ __PRETTY_FUNCTION__
); }))
;
1193 const int parameter_size = compiled_data->parameters->rnum;
1194 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1195 const int internal_size = compiled_data->internals->rnum;
1196 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1197 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1198 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1199 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1200}
1201
1202int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1203{
1204 int i, j;
1205 const int parameter_size = compiled_data->parameters->rnum;
1206 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1207 const int internal_size = compiled_data->internals->rnum;
1208 for (i = 0; i < parameter_size; i++)
1209 {
1210 // parameters has to be allocated all together.
1211 if (compiled_data->tensors.parameters[i])
1212 {
1213 for (j = 1; j < parallel_count; j++)
1214 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1214, __extension__ __PRETTY_FUNCTION__
); }))
; }
1215 continue;
1216 }
1217 return 1;
1218 }
1219 for (i = 0; i < internal_size; i++)
1220 {
1221 if (!compiled_data->tensors.internals[i])
1222 return 1;
1223 for (j = 1; j < parallel_count; j++)
1224 if (!compiled_data->tensors.internals[i + j * internal_size])
1225 return 1;
1226 }
1227 return 0;
1228}
1229
1230void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1231{
1232 int i, j;
1233 const int parameter_size = compiled_data->parameters->rnum;
1234 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1235 const int internal_size = compiled_data->internals->rnum;
1236 for (i = 0; i < parameter_size; i++)
1237 {
1238 // parameters has to be allocated all together.
1239 if (compiled_data->tensors.parameters[i])
1240 {
1241 for (j = 1; j < parallel_count; j++)
1242 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1242, __extension__ __PRETTY_FUNCTION__
); }))
; }
1243 continue;
1244 }
1245 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1246 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1247 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1248 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1249 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1250 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1251 for (j = 1; j < parallel_count; j++)
1252 {
1253 if (j != device_id)
1254 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1255 else
1256 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1257 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1258 }
1259 }
1260 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1261 for (i = 0; i < internal_size; i++)
1262 {
1263 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1264 const int d = retained.d;
1265 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1266 continue;
1267 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1268 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1269 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1270 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1271 if (!compiled_data->tensors.internals[i])
1272 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1273 for (j = 1; j < parallel_count; j++)
1274 {
1275 if (j != device_id)
1276 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1277 else
1278 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1279 if (!compiled_data->tensors.internals[i + j * internal_size])
1280 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1281 }
1282 }
1283 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1284}
1285
1286static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1287{
1288 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1289 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1290}
1291
1292static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1293{
1294 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1294, __extension__ __PRETTY_FUNCTION__
); }))
;
1295 int i, j;
1296 for (i = 0; i < tensor_size; i++)
1297 {
1298 if (!tensors[i])
1299 continue;
1300 const int d = tensor_symbols[i].d;
1301 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1302 continue;
1303 for (j = 1; j < parallel_count; j++)
1304 if (tensors[i + j * tensor_size])
1305 {
1306 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1307 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1308 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1309 }
1310 }
1311}
1312
1313static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1314{
1315 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1315, __extension__ __PRETTY_FUNCTION__
); }))
;
1316 int i, j;
1317 for (i = 0; i < tensor_size; i++)
1318 {
1319 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1320 for (j = 1; j < parallel_count; j++)
1321 {
1322 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1323 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1324 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1325 { // We shouldn't allocate this, free it up.
1326 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1327 tensors[i + j * tensor_size] = 0;
1328 }
1329 }
1330 }
1331}
1332
1333static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1334{
1335 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1335, __extension__ __PRETTY_FUNCTION__
); }))
;
1336 int i, j;
1337 for (i = 0; i < tensor_size; i++)
1338 {
1339 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1340 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1341 continue;
1342 if (graph)
1343 {
1344 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1345 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1346 tensor_symbol = alias_to;
1347 }
1348 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1349 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1350 {
1351 const ccv_nnc_tensor_bind_t retained_bind = {
1352 .symbol = tensor_symbol,
1353 .tensor = tensor
1354 };
1355 ccv_array_push(tensor_binds, &retained_bind);
1356 }
1357 for (j = 1; j < parallel_count; j++)
1358 {
1359 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1360 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1361 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1362 {
1363 const ccv_nnc_tensor_bind_t bind = {
1364 .symbol = copy,
1365 .tensor = tensors[i + j * tensor_size]
1366 };
1367 ccv_array_push(tensor_binds, &bind);
1368 }
1369 }
1370 }
1371}
1372
1373static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1374{
1375 if (compiled_data->graph)
1376 ccv_nnc_graph_free(compiled_data->graph);
1377 compiled_data->graph = 0;
1378 compiled_data->is_test = 0;
1379 if (compiled_data->tensor_arena)
1380 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1381 compiled_data->tensor_arena = 0;
1382 if (compiled_data->graph_exec_arena)
1383 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1384 compiled_data->graph_exec_arena = 0;
1385 if (compiled_data->backward.from_ops)
1386 ccfreefree(compiled_data->backward.from_ops);
1387 compiled_data->backward.from_ops = 0;
1388 if (compiled_data->evaluate.schedule)
1389 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1390 compiled_data->evaluate.schedule = 0;
1391 if (compiled_data->backward.schedule)
1392 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1393 compiled_data->backward.schedule = 0;
1394}
1395
1396static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1397{
1398 if (compiled_data->gradients)
1399 ccfreefree(compiled_data->gradients);
1400 compiled_data->gradients = 0;
1401 if (compiled_data->updated_parameters)
1402 ccfreefree(compiled_data->updated_parameters);
1403 compiled_data->updated_parameters = 0;
1404 compiled_data->update_nodes = 0;
1405 compiled_data->saved_aux = 0;
1406}
1407
1408static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1409{
1410 if (compiled_data->backward.gradients)
1411 ccfreefree(compiled_data->backward.gradients);
1412 compiled_data->backward.gradients = 0;
1413 if (compiled_data->backward.accum)
1414 ccv_nnc_graph_free(compiled_data->backward.accum);
1415 compiled_data->backward.accum = 0;
1416 if (compiled_data->backward.tensor_arena)
1417 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1418 compiled_data->backward.tensor_arena = 0;
1419 if (compiled_data->backward.graph_exec_arena)
1420 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1421 compiled_data->backward.graph_exec_arena = 0;
1422}
1423
1424static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1425{
1426 if (compiled_data->apply_gradients.graph)
1427 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1428 compiled_data->apply_gradients.graph = 0;
1429 if (compiled_data->apply_gradients.tensor_arena)
1430 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1431 compiled_data->apply_gradients.tensor_arena = 0;
1432 if (compiled_data->apply_gradients.graph_exec_arena)
1433 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1434 compiled_data->apply_gradients.graph_exec_arena = 0;
1435}
1436
1437// Compile the graph to run ccv_cnnp_model_fit
1438static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1439{
1440 int i, j;
1441 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1442 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1442, __extension__ __PRETTY_FUNCTION__
); }))
;
1443 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1444 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1445 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1445, __extension__ __PRETTY_FUNCTION__
); }))
;
1446 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1446
, __extension__ __PRETTY_FUNCTION__); }))
;
1447 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1447, __extension__ __PRETTY_FUNCTION__
); }))
;
1448 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1449 {
1450 _ccv_cnnp_model_set_rewindables(model);
1451 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1452 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1453 _ccv_cnnp_model_rewind_graph(model);
1454 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1455 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1456 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1457 }
1458 const int tensors_init = !!compiled_data->tensors_init.v;
1459 if (!tensors_init)
1460 _ccv_cnnp_model_tensors_init(model, compiled_data);
1461 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1462 // Check if it is not fully allocated, if it is not, init_1.
1463 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1464 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1465 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1465, __extension__ __PRETTY_FUNCTION__); }))
;
1466 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1466, __extension__ __PRETTY_FUNCTION__); }))
;
1467 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1467
, __extension__ __PRETTY_FUNCTION__); }))
;
1468 const int input_size_per_p = input_size / parallel_count;
1469 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1470 const int output_size_per_p = output_size / parallel_count;
1471 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1472 const int fit_size_per_p = fit_size / parallel_count;
1473 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1474 const int parameter_size = compiled_data->parameters->rnum;
1475 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1476 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1477 const int internal_size = compiled_data->internals->rnum;
1478 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1479 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1480 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1481 ccv_array_free(tensor_binds);
1482 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1483 if (tensors_init && parallel_count > 1)
1484 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1485 // If tensor is not init'ed, we need to init states first.
1486 if (_ccv_cnnp_any_to_init(compiled_data))
1487 {
1488 ccv_nnc_tensor_init_states_t tensor_init_states = {
1489 .parallel_count = parallel_count,
1490 .graph = model->graph,
1491 .compiled_data = compiled_data,
1492 .tensor_arena = compiled_data->tensor_arena
1493 };
1494 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1495 }
1496 compiled_data->is_test = 0;
1497 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1498 // No need to set because it is default to training mode.
1499 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1500 for (i = 0; i < saved_aux_size * parameter_size; i++)
1501 {
1502 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1503 continue;
1504 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1505 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1506 for (j = 1; j < parallel_count; j++)
1507 {
1508 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1509 if (copy)
1510 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1511 }
1512 }
1513 const int evaluate_to_size = compiled_data->evaluate.to_size;
1514 compiled_data->evaluate.to_op_size = 0;
1515 for (i = 0; i < evaluate_to_size; i++)
1516 {
1517 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1518 if (to.graph)
1519 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1520 }
1521 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1522 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1523}
1524
1525ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1526{
1527 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1528 if (!compiled_data || !compiled_data->graph)
1529 return 0;
1530 return ccv_nnc_graph_default_stream(compiled_data->graph);
1531}
1532
1533uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1534{
1535 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1536 if (!compiled_data || !compiled_data->tensor_arena)
1537 return 0;
1538 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1539}
1540
1541static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1542{
1543 int i, j;
1544 for (i = 0; i < tensor_size; i++)
1545 {
1546 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1547 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1548 continue;
1549 if (graph)
1550 {
1551 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1552 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1553 tensor_symbol = alias_to;
1554 }
1555 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1556 for (j = 1; j < parallel_count; j++)
1557 {
1558 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1559 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1560 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1561 }
1562 }
1563}
1564
1565void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1566{
1567 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1568 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1568, __extension__ __PRETTY_FUNCTION__); }))
;
1569 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1570 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1570, __extension__ __PRETTY_FUNCTION__
); }))
;
1571 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1571, __extension__ __PRETTY_FUNCTION__
); }))
;
1572 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1572
, __extension__ __PRETTY_FUNCTION__); }))
;
1573 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1573, __extension__ __PRETTY_FUNCTION__); }))
;
1574 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1575 {
1576 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1577 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1578 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1579 // Compile the symbolic graph down only when needed.
1580 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1581 } else {
1582 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1582, __extension__ __PRETTY_FUNCTION__); }))
;
1583 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1583, __extension__ __PRETTY_FUNCTION__); }))
;
1584 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1584
, __extension__ __PRETTY_FUNCTION__); }))
;
1585 const int input_size_per_p = input_size / parallel_count;
1586 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1587 const int output_size_per_p = output_size / parallel_count;
1588 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1589 const int fit_size_per_p = fit_size / parallel_count;
1590 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1591 }
1592 if (compiled_data->is_test)
1593 {
1594 compiled_data->is_test = 0;
1595 ccv_nnc_graph_exec_update_t update = {
1596 .parallel_count = parallel_count,
1597 .graph = model->graph,
1598 .graph_exec_arena = compiled_data->graph_exec_arena,
1599 };
1600 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1601 }
1602 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1603}
1604
1605// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1606static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1607{
1608 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1609 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1610 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1611 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1611, __extension__ __PRETTY_FUNCTION__
); }))
;
1612 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1612, __extension__ __PRETTY_FUNCTION__
); }))
;
1613 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1614 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1615 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1616 {
1617 const int evaluate_to_size = compiled_data->evaluate.to_size;
1618 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1619 _ccv_cnnp_model_set_rewindables(model);
1620 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1621 0, 0,
1622 0, 0, 0,
1623 0, 0, 0,
1624 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1625 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1626 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1627 int i, j;
1628 for (i = 0; i < evaluate_to_size; i++)
1629 for (j = 1; j < parallel_count; j++)
1630 {
1631 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1632 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1633 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1634 }
1635 }
1636 const int tensors_init = !!compiled_data->tensors_init.v;
1637 if (!tensors_init)
1638 _ccv_cnnp_model_tensors_init(model, compiled_data);
1639 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1640 // Check if it is not fully allocated, if it is not, init_1.
1641 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1642 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1643 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1643, __extension__ __PRETTY_FUNCTION__); }))
;
1644 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1644, __extension__ __PRETTY_FUNCTION__); }))
;
1645 const int input_size_per_p = input_size / parallel_count;
1646 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1647 const int output_size_per_p = output_size / parallel_count;
1648 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1649 const int parameter_size = compiled_data->parameters->rnum;
1650 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1651 const int internal_size = compiled_data->internals->rnum;
1652 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1653 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1654 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1655 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1656 ccv_array_free(tensor_binds);
1657 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1658 // If tensor is not init'ed, we need to init states first.
1659 if (tensors_init && parallel_count > 1)
1660 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1661 if (_ccv_cnnp_any_to_init(compiled_data))
1662 {
1663 ccv_nnc_tensor_init_states_t tensor_init_states = {
1664 .parallel_count = parallel_count,
1665 .graph = model->graph,
1666 .compiled_data = compiled_data,
1667 .tensor_arena = compiled_data->tensor_arena
1668 };
1669 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1670 }
1671 compiled_data->is_test = 1;
1672 ccv_nnc_graph_exec_update_t update = {
1673 .parallel_count = parallel_count,
1674 .graph = model->graph,
1675 .graph_exec_arena = compiled_data->graph_exec_arena,
1676 };
1677 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1678 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1679 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1680}
1681
1682static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1683{
1684 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1684, __extension__ __PRETTY_FUNCTION__
); }))
;
1685 const int parameter_size = compiled_data->parameters->rnum;
1686 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1687 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1688 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1689 int i, j;
1690 for (i = 0; i < parameter_size; i++)
1691 {
1692 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1693 {
1694 compiled_data->tensors.gradients[i] = 0;
1695 compiled_data->tensors.accum_gradients[i] = 0;
1696 for (j = 1; j < parallel_count; j++)
1697 {
1698 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1699 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1700 }
1701 continue;
1702 }
1703 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1704 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1705 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1706 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1707 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1708 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1709 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1710 for (j = 1; j < parallel_count; j++)
1711 {
1712 if (j != device_id)
1713 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1714 else
1715 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1716 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1717 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1718 }
1719 }
1720}
1721
1722static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1723{
1724 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1725 return 1;
1726 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1727 return 0;
1728 int i;
1729 for (i = 0; i < input_size; i++)
1730 if (!(disable_outgrad & ((uint64_t)1 << i)))
1731 return 0;
1732 return 1;
1733}
1734
1735// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1736// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1737static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1738{
1739 int i, j;
1740 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1741 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1742 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1742, __extension__ __PRETTY_FUNCTION__
); }))
;
1743 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1744 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1745 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1745, __extension__ __PRETTY_FUNCTION__
); }))
;
1746 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1746, __extension__ __PRETTY_FUNCTION__
); }))
;
1747 // There shouldn't be a loss function if we evaluate with multistage jit.
1748 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1748, __extension__ __PRETTY_FUNCTION__
); }))
;
1749 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1750 {
1751 _ccv_cnnp_model_set_rewindables(model);
1752 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1753 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1754 _ccv_cnnp_model_rewind_graph(model);
1755 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1756 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1757 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1758 }
1759 const int tensors_init = !!compiled_data->tensors_init.v;
1760 if (!tensors_init)
1761 _ccv_cnnp_model_tensors_init(model, compiled_data);
1762 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1763 // Check if it is not fully allocated, if it is not, init_1.
1764 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1765 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1766 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1766, __extension__ __PRETTY_FUNCTION__); }))
;
1767 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1767, __extension__ __PRETTY_FUNCTION__); }))
;
1768 const int input_size_per_p = input_size / parallel_count;
1769 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1770 const int output_size_per_p = output_size / parallel_count;
1771 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1772 const int parameter_size = compiled_data->parameters->rnum;
1773 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1774 const int internal_size = compiled_data->internals->rnum;
1775 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1776 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1777 if (!compiled_data->tensors.gradients)
1778 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1779 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1780 if (compiled_data->backward.to_size > 0)
1781 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1782 else
1783 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1784 ccv_array_free(tensor_binds);
1785 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1786 if (tensors_init && parallel_count > 1)
1787 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1788 // If tensor is not init'ed, we need to init states first.
1789 if (_ccv_cnnp_any_to_init(compiled_data))
1790 {
1791 ccv_nnc_tensor_init_states_t tensor_init_states = {
1792 .parallel_count = parallel_count,
1793 .graph = model->graph,
1794 .compiled_data = compiled_data,
1795 .tensor_arena = compiled_data->tensor_arena
1796 };
1797 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1798 }
1799 compiled_data->is_test = is_test;
1800 ccv_nnc_graph_exec_update_t update = {
1801 .parallel_count = parallel_count,
1802 .graph = model->graph,
1803 .graph_exec_arena = compiled_data->graph_exec_arena,
1804 };
1805 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1806 const int evaluate_to_size = compiled_data->evaluate.to_size;
1807 compiled_data->evaluate.to_op_size = 0;
1808 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1809 for (i = 0; i < evaluate_to_size; i++)
1810 {
1811 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1812 if (to_op.graph)
1813 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1814 const int* tos;
1815 int to_size;
1816 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1817 for (j = 0; j < to_size; j++)
1818 {
1819 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1820 .d = tos[j],
1821 .graph = model->graph
1822 });
1823 if (to_op.graph)
1824 ccv_array_add_unique_int(backward_from, to_op.d);
1825 }
1826 }
1827 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1827, __extension__
__PRETTY_FUNCTION__); }))
;
1828 compiled_data->backward.from_op_size = backward_from->rnum;
1829 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1830 for (i = 0; i < backward_from->rnum; i++)
1831 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1832 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1833 .graph = compiled_data->graph,
1834 };
1835 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1836 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1837 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1838 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1839 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1840 const int source_size = compiled_data->graph->sources->rnum;
1841 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1841, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1842 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1843 visited[(idx >> 5)] |= (1u << (idx & 31));
1844 } ccv_nnc_graph_visit_endfor} }
1845 ccv_nnc_graph_visit_free(visit);
1846 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1847 const int destination_size = compiled_data->graph->destinations->rnum;
1848 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1848, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1849 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1850 visited[(idx >> 5)] |= (1u << (idx & 31));
1851 } ccv_nnc_graph_visit_endfor} }
1852 ccv_nnc_graph_visit_free(visit);
1853 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1854 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1855 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1856 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1857 {
1858 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1858, __extension__ __PRETTY_FUNCTION__
); }))
;
1859 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1860 ccv_array_add_unique_int(backward_from, idx);
1861 }
1862 } ccv_nnc_graph_visit_endfor} }
1863 ccv_nnc_graph_visit_free(visit);
1864 ccfreefree(visited);
1865 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1866 {
1867 compiled_data->backward.from_op_size = backward_from->rnum;
1868 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1869 for (i = 0; i < backward_from->rnum; i++)
1870 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1871 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1872 .graph = compiled_data->graph,
1873 };
1874 }
1875 ccv_array_free(backward_from);
1876 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1877 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1878}
1879
1880void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1881{
1882 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1883 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1883, __extension__ __PRETTY_FUNCTION__); }))
;
1884 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1885 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1885, __extension__ __PRETTY_FUNCTION__
); }))
;
1886 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1886, __extension__ __PRETTY_FUNCTION__
); }))
;
1887 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1887, __extension__ __PRETTY_FUNCTION__); }))
;
1888 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1889 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1890 if (!compiled_data->graph || mode_mismatch)
1891 {
1892 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1893 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1894 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1895 if (params.requires_grad)
1896 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1897 else
1898 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1899 } else {
1900 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1901 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1901, __extension__ __PRETTY_FUNCTION__); }))
;
1902 const int input_size_per_p = input_size / parallel_count;
1903 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1904 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1904, __extension__ __PRETTY_FUNCTION__); }))
;
1905 const int output_size_per_p = output_size / parallel_count;
1906 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1907 }
1908 if (compiled_data->is_test != params.is_test)
1909 {
1910 compiled_data->is_test = params.is_test;
1911 ccv_nnc_graph_exec_update_t update = {
1912 .parallel_count = parallel_count,
1913 .graph = model->graph,
1914 .graph_exec_arena = compiled_data->graph_exec_arena,
1915 };
1916 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1917 }
1918}
1919
1920void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1921{
1922 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1923 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1923, __extension__ __PRETTY_FUNCTION__); }))
;
1924 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1925 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1926 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1927 else {
1928 if (!compiled_data->evaluate.schedule)
1929 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1930 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1931 }
1932}
1933
1934// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1935// Particularly, this method compiles the accumulator graph.
1936static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1937{
1938 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1939 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1939, __extension__ __PRETTY_FUNCTION__); }))
;
1940 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1940, __extension__ __PRETTY_FUNCTION__
); }))
;
1941 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1942 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1943 const int parameter_size = compiled_data->parameters->rnum;
1944 int i, j;
1945 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1946 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1947 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1948 for (i = 0; i < parameter_size; i++)
1949 for (j = 0; j < parallel_count; j++)
1950 if (compiled_data->tensors.gradients[i + j * parameter_size])
1951 {
1952 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1953 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1954 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1955 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1956 ccv_nnc_tensor_symbol_t inputs[2];
1957 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1958 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1959 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1960 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1961 } else {
1962 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1963 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1964 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1965 }
1966 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1967 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1968 {
1969 ccv_nnc_symbolic_graph_free(accum);
1970 // Create empty graph.
1971 compiled_data->backward.accum = ccv_nnc_graph_new();
1972 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1973 return;
1974 }
1975 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1976 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1977 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1978 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1979 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1980 ccv_nnc_symbolic_graph_free(accum);
1981 ccv_array_free(tensor_binds);
1982 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1983}
1984
1985void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1986{
1987 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1988 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1988, __extension__ __PRETTY_FUNCTION__); }))
;
1989 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1989, __extension__ __PRETTY_FUNCTION__
); }))
;
1990 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1991 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1991, __extension__ __PRETTY_FUNCTION__
); }))
;
1992 if (outgrad_size > 0)
1993 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1993, __extension__ __PRETTY_FUNCTION__
); }))
; }
1994 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1994, __extension__ __PRETTY_FUNCTION__); }))
;
1995 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1995, __extension__ __PRETTY_FUNCTION__
); }))
;
1996 const int parameter_size = compiled_data->parameters->rnum;
1997 // If we need to accumulate the gradients now, do jit on accumulator.
1998 if (compiled_data->backward.count > 0)
1999 {
2000 if (!compiled_data->backward.accum)
2001 _ccv_cnnp_model_multistage_jit_1(model);
2002 else if (compiled_data->backward.count == 1) {
2003 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2004 int i;
2005 for (i = 0; i < parameter_size * parallel_count; i++)
2006 {
2007 ccv_nnc_tensor_t* tensor;
2008 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2009 }
2010 if (compiled_data->backward.tensor_arena)
2011 {
2012 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2013 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2014 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2015 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2016 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2017 }
2018 }
2019 }
2020 const int ingrad_size_per_p = model->output_size;
2021 const int outgrad_size_per_p = compiled_data->outgrad_size;
2022 int i, j;
2023 for (i = 0; i < ingrad_size_per_p; i++)
2024 {
2025 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2026 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2027 {
2028 // Set it to 1 if it is not specified.
2029 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2030 if (ingrad_tensor)
2031 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2032 for (j = 1; j < parallel_count; j++)
2033 {
2034 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2035 if (ingrad_tensor)
2036 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2037 }
2038 } else {
2039 // Make sure the length matches, in case it is an alias.
2040 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2040, __extension__ __PRETTY_FUNCTION__
); }))
;
2041 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2042 for (j = 1; j < parallel_count; j++)
2043 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2044 }
2045 }
2046 if (outgrad_size > 0)
2047 {
2048 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2048, __extension__ __PRETTY_FUNCTION__
); }))
;
2049 for (i = 0; i < outgrad_size_per_p; i++)
2050 if (outgrads[i])
2051 {
2052 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2053 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2054 for (j = 1; j < parallel_count; j++)
2055 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2056 }
2057 } else {
2058 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__
); }))
2059 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__
); }))
;
2060 }
2061 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2062 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2063 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2064 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2065 if (!compiled_data->backward.schedule)
2066 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2067 // Run the backward pass.
2068 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2069 // If we need to run accumulation round, do that now.
2070 if (compiled_data->backward.count > 0)
2071 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2072 // Update the count, this determines whether we need to accumulate or not.
2073 ++compiled_data->backward.count;
2074}
2075
2076// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2077// Particularly, this method compiles the parameter update graph.
2078static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2079{
2080 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2081 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2081, __extension__ __PRETTY_FUNCTION__
); }))
;
2082 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2083 const int parameter_size = compiled_data->parameters->rnum;
2084 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2085 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2086 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2087 // Bind accumulated gradients.
2088 if (compiled_data->backward.count > 1)
2089 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2090 else
2091 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2092 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2093 int i, j;
2094 for (i = 0; i < compiled_data->backward.to_size; i++)
2095 {
2096 const int* tos;
2097 int to_size;
2098 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2099 for (j = 0; j < to_size; j++)
2100 {
2101 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2102 // gradients graph.
2103 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2104 .d = tos[j],
2105 .graph = model->graph,
2106 });
2107 if (!exec.graph)
2108 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2109 }
2110 }
2111 const int from_size = apply_gradients_from->rnum;
2112 if (from_size == 0)
2113 {
2114 ccv_array_free(apply_gradients_from);
2115 ccv_array_free(tensor_binds);
2116 return;
2117 }
2118 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2119 for (i = 0; i < from_size; i++)
2120 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2121 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2122 .graph = model->graph
2123 };
2124 ccv_array_free(apply_gradients_from);
2125 // It can only ends with updates on the parameters.
2126 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2127 for (i = 0; i < parameter_size; i++)
2128 {
2129 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2130 continue;
2131 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2132 for (j = 1; j < parallel_count; j++)
2133 {
2134 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2135 ccv_array_push(tos, &copy);
2136 }
2137 }
2138 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2139 ccv_array_free(tos);
2140 ccv_array_free(tensor_binds);
2141 ccfreefree(froms);
2142 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2143 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2144 {
2145 // Skip on no tensor.
2146 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2147 continue;
2148 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2149 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2150 for (j = 1; j < parallel_count; j++)
2151 {
2152 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2153 if (copy)
2154 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2155 }
2156 }
2157 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2158}
2159
2160void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2161{
2162 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2163 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2163, __extension__ __PRETTY_FUNCTION__); }))
;
2164 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2164, __extension__ __PRETTY_FUNCTION__
); }))
;
2165 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2166 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2166, __extension__ __PRETTY_FUNCTION__); }))
;
2167 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2167, __extension__ __PRETTY_FUNCTION__
); }))
;
2168 // Skip if there is no backward pass.
2169 if (compiled_data->backward.count <= 0)
2170 return;
2171 // Skip if there is no parameters.
2172 if (compiled_data->parameters->rnum == 0)
2173 {
2174 compiled_data->backward.count = 0;
2175 return;
2176 }
2177 if (!compiled_data->apply_gradients.graph)
2178 _ccv_cnnp_model_multistage_jit_2(model);
2179 else {
2180 const int parameter_size = compiled_data->parameters->rnum;
2181 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2182 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2183 if (compiled_data->backward.count > 1)
2184 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2185 else
2186 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2187 }
2188 if (compiled_data->apply_gradients.graph)
2189 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2190 // Reset backward count to 0.
2191 compiled_data->backward.count = 0;
2192}
2193
2194void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2195{
2196 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2197 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2198 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2198, __extension__ __PRETTY_FUNCTION__
); }))
;
2199 const int tensors_init = !!compiled_data->tensors_init.v;
2200 int this_tensor_init = tensors_init;
2201 if (!tensors_init)
2202 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2203 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2204 // Check if it is not fully allocated, if it is not, init_1.
2205 this_tensor_init = 0;
2206 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2207 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2208 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2209 if (param_ref < 0)
2210 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2210
, __extension__ __PRETTY_FUNCTION__); }))
; }
2211 else
2212 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2212, __extension__ __PRETTY_FUNCTION__
); }))
; }
2213 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2214 ccv_array_free(parameter_indices);
2215 const int parameter_size = compiled_data->parameters->rnum;
2216 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2216
, __extension__ __PRETTY_FUNCTION__); }))
;
2217 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2217, __extension__ __PRETTY_FUNCTION__
); }))
;
2218 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2219 int i;
2220 if (!this_tensor_init)
2221 {
2222 if (compiled_data->tensors.parameters[d])
2223 {
2224 for (i = 1; i < parallel_count; i++)
2225 { assert(compiled_data->tensors.parameters[d + i * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[d + i *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[d + i * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[d + i * parameter_size]"
, "ccv_cnnp_model.c", 2225, __extension__ __PRETTY_FUNCTION__
); }))
; }
2226 this_tensor_init = 1;
2227 } else {
2228 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
;
2229 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2230 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2231 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2232 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2233 compiled_data->tensors.parameters[d] = ccv_nnc_tensor_new(0, info, 0);
2234 for (i = 1; i < parallel_count; i++)
2235 {
2236 if (i != device_id)
2237 CCV_TENSOR_SET_DEVICE_ID(info.type, i)(info.type) = (((info.type) & ~0xfff00) | (((i) & 0xfff
) << 8))
;
2238 else
2239 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2240 compiled_data->tensors.parameters[d + i * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2241 }
2242 }
2243 }
2244 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2245 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2245, __extension__
__PRETTY_FUNCTION__); }))
;
2246 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2247 for (i = 1; i < parallel_count; i++)
2248 {
2249 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2250 if (copy_tensor)
2251 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2252 }
2253 // Mark this symbol as init'ed.
2254 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2255 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2256 init_v[s >> 5] |= (1u << (s & 0x1f));
2257 // If we just allocated this tensor, now it is time to check if we need to mark it as fully allocated.
2258 if (!this_tensor_init)
2259 {
2260 if (ccv_cnnp_model_tensors_any_to_alloc(model, compiled_data))
2261 compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)compiled_data->tensors_init.v | (uintptr_t)1);
2262 else // Remove the flag.
2263 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2264 }
2265}
2266
2267void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2268{
2269 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2270 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2271 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2271, __extension__ __PRETTY_FUNCTION__
); }))
;
2272 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2272, __extension__ __PRETTY_FUNCTION__
); }))
;
2273 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2274 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2275 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2276 if (param_ref < 0)
2277 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2277
, __extension__ __PRETTY_FUNCTION__); }))
; }
2278 else
2279 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2279, __extension__ __PRETTY_FUNCTION__
); }))
; }
2280 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2281 ccv_array_free(parameter_indices);
2282 const int parameter_size = compiled_data->parameters->rnum;
2283 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2283
, __extension__ __PRETTY_FUNCTION__); }))
;
2284 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2284, __extension__ __PRETTY_FUNCTION__
); }))
;
2285 // We don't need to consider parallel_count, every parameter on each device is identical.
2286 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2287 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2287, __extension__
__PRETTY_FUNCTION__); }))
;
2288 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2289}
2290
2291ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2292{
2293 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2294 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2295 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2295, __extension__ __PRETTY_FUNCTION__
); }))
;
2296 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2296, __extension__ __PRETTY_FUNCTION__
); }))
;
2297 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2298 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2299 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2300 if (param_ref < 0)
2301 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2301
, __extension__ __PRETTY_FUNCTION__); }))
; }
2302 else
2303 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2303, __extension__ __PRETTY_FUNCTION__
); }))
; }
2304 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2305 ccv_array_free(parameter_indices);
2306 const int parameter_size = compiled_data->parameters->rnum;
2307 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2307
, __extension__ __PRETTY_FUNCTION__); }))
;
2308 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2308, __extension__ __PRETTY_FUNCTION__
); }))
;
2309 // We don't need to consider parallel_count, every parameter on each device is identical.
2310 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2311 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2311, __extension__
__PRETTY_FUNCTION__); }))
;
2312 return tensor->info;
2313}
2314
2315const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2316{
2317 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2318 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2319 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2319, __extension__ __PRETTY_FUNCTION__
); }))
;
2320 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2321 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2322 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2323 if (param_ref < 0)
2324 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2324
, __extension__ __PRETTY_FUNCTION__); }))
; }
2325 else
2326 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2326, __extension__ __PRETTY_FUNCTION__
); }))
; }
2327 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2328 ccv_array_free(parameter_indices);
2329 const int parameter_size = compiled_data->parameters->rnum;
2330 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2330
, __extension__ __PRETTY_FUNCTION__); }))
;
2331 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2331, __extension__ __PRETTY_FUNCTION__
); }))
;
2332 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2333}
2334
2335int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2336{
2337 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2337, __extension__ __PRETTY_FUNCTION__
); }))
;
2338 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2339 return compiled_data->parameters->rnum;
2340}
2341
2342uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2343{
2344 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2344, __extension__ __PRETTY_FUNCTION__
); }))
;
2345 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2346 const int parameter_size = compiled_data->parameters->rnum;
2347 int i;
2348 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2349 uint64_t size = 0;
2350 const int tensors_init = !!compiled_data->tensors_init.v;
2351 uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
: 0;
2352 for (i = 0; i < parameter_size; i++)
2353 {
2354 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2355 if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i])
2356 {
2357 ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info;
2358 size += ccv_nnc_tensor_data_size(params);
2359 continue;
2360 }
2361 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2362 .graph = graph,
2363 .d = d
2364 });
2365 size += ccv_nnc_tensor_data_size(params);
2366 }
2367 return size;
2368}
2369
2370int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type)
2371{
2372 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2372, __extension__ __PRETTY_FUNCTION__
); }))
;
2373 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2374 if (count != compiled_data->parameters->rnum)
2375 return 0;
2376 if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2377 CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) <<
8))
;
2378 int i;
2379 // We don't need to consider parallel_count, every parameter on each device is identical.
2380 for (i = 0; i < count; i++)
2381 {
2382 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2383 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2384 {
2385 tensors[i] = 0;
2386 continue;
2387 }
2388 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2389 if (tensor->info.type == type)
2390 tensors[i] = tensor;
2391 else {
2392 ccv_nnc_tensor_param_t info = tensor->info;
2393 info.type = type;
2394 tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet.
2395 }
2396 }
2397 for (i = 0; i < count; i++)
2398 {
2399 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2400 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2401 continue;
2402 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2403 // Now initiate transfer. We should do this one on a stream.
2404 if (tensor->info.type != type)
2405 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2406 }
2407 // Copy names and remove parameters.
2408 for (i = 0; i < count; i++)
2409 {
2410 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i];
2411 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2412 {
2413 names[i] = 0;
2414 continue;
2415 }
2416 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2417 const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof
(1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2418 names[i] = ccmallocmalloc(name_len + 1);
2419 names[i][name_len] = 0;
2420 memcpy(names[i], name, name_len);
2421 if (tensor->info.type == type)
2422 compiled_data->tensors.parameters[i] = 0; // Only move when it is moved.
2423 }
2424 return 1;
2425}
2426
2427KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2427
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
The value 0 is assigned to 'i'
50
Assuming the condition is false
51
Taking false branch
52
Assuming the condition is false
53
1st function call argument is an uninitialized value
2428
2429void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates)
2430{
2431 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2431, __extension__ __PRETTY_FUNCTION__
); }))
;
2432 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2433 int i;
2434 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2435 if (count != compiled_data->parameters->rnum)
2436 {
2437 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2438 // Build the map between name and the index.
2439 for (i = 0; i < count; i++)
2440 {
2441 int ret;
2442 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret);
2443 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2443
, __extension__ __PRETTY_FUNCTION__); }))
;
2444 kh_val(id_map, k)((id_map)->vals[k]) = i;
2445 }
2446 }
2447 const int parameter_size = compiled_data->parameters->rnum;
2448 int* copy_back = 0;
2449 const int tensors_init = !!compiled_data->tensors_init.v;
2450 if (!tensors_init)
2451 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2452 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2453 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2454 for (i = 0; i < parameter_size; i++)
2455 {
2456 int j = i;
2457 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2458 if (i >= 0 || strncmp(name, names[i], 1023) != 0)
2459 {
2460 // Build the map.
2461 if (id_map == 0)
2462 {
2463 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2464 for (j = 0; j < count; j++)
2465 {
2466 int ret;
2467 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret);
2468 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2468
, __extension__ __PRETTY_FUNCTION__); }))
;
2469 kh_val(id_map, k)((id_map)->vals[k]) = j;
2470 }
2471 }
2472 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name);
2473 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2474 continue;
2475 j = kh_val(id_map, k)((id_map)->vals[k]);
2476 }
2477 if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read.
2478 { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters
[i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t
)compiled_data->tensors.parameters[i] & (uintptr_t)1))
; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)"
, "ccv_cnnp_model.c", 2478, __extension__ __PRETTY_FUNCTION__
); }))
; }
2479 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
2480 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2481 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2482 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2483 const int d = parameter.d;
2484 if (info.type == tensors[j]->info.type && invalidates) // Can move.
2485 {
2486 // Deallocate it if needed.
2487 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2488 if (compiled_data->tensors.parameters[i])
2489 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2490 compiled_data->tensors.parameters[i] = tensors[j];
2491 tensors[j] = 0;
2492 } else {
2493 if (!compiled_data->tensors.parameters[i])
2494 { // Not allocated, to allocate first.
2495 // Create new one, make sure we create this by having the right parameters.
2496 const int type = info.type;
2497 info = tensors[j]->info;
2498 info.type = type; // Revert back the type.
2499 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
2500 }
2501 if (!copy_back)
2502 copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int));
2503 copy_back[i] = j + 1;
2504 }
2505 init_v[d >> 5] |= (1u << (d & 0x1f));
2506 // Create this tensor for other data parallel allocations.
2507 info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
2508 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2509 for (j = 1; j < parallel_count; j++)
2510 if (!compiled_data->tensors.parameters[i + j * parameter_size])
2511 {
2512 if (j != device_id)
2513 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
2514 else
2515 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2516 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2517 }
2518 // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
2519 }
2520 if (id_map)
2521 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2522 // Now do the transfer.
2523 if (copy_back)
2524 {
2525 for (i = 0; i < parameter_size; i++)
2526 {
2527 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[i]) & ~(uintptr_t)1))
;
2528 if (copy_back[i] == 0)
2529 continue;
2530 const int j = copy_back[i] - 1;
2531 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2532 }
2533 ccfreefree(copy_back);
2534 }
2535}
2536
2537ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2538{
2539 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2540 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2540, __extension__ __PRETTY_FUNCTION__); }))
;
2541 const int parameter_size = compiled_data->parameters->rnum;
2542 int i;
2543 for (i = 0; i < parameter_size; i++)
2544 {
2545 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2546 if (first(model, name, context))
2547 return ccv_cnnp_model_parameters(model, -1, i);
2548 }
2549 return 0;
2550}
2551
2552ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2553{
2554 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2555 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2555, __extension__ __PRETTY_FUNCTION__); }))
;
2556 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2557 const int parameter_size = compiled_data->parameters->rnum;
2558 int i;
2559 for (i = 0; i < parameter_size; i++)
2560 {
2561 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2562 if (filter(model, name, context))
2563 {
2564 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2565 ccv_array_push(parameters, &parameter);
2566 }
2567 }
2568 return parameters;
2569
2570}
2571
2572CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2573{
2574 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2575 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2575, __extension__ __PRETTY_FUNCTION__); }))
;
2576 const int tensors_init = !!compiled_data->tensors_init.v;
2577 if (!tensors_init) // If nothing initialized, we return parameter 0.
2578 return ccv_cnnp_model_parameters(model, -1, 0);
2579 const int parameter_size = compiled_data->parameters->rnum;
2580 int i;
2581 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2582 for (i = 0; i < parameter_size; i++)
2583 {
2584 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2585 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2586 return ccv_cnnp_model_parameters(model, -1, i);
2587 }
2588 return 0;
2589}
2590
2591static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2592{
2593 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2594 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2594, __extension__
__PRETTY_FUNCTION__); }))
;
2595 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2596 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2597 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2598 return to_parameter_indices;
2599}
2600
2601static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2602{
2603 // If the model is not compiled yet. Compile them now.
2604 if (!model->graph)
2605 {
2606 model->graph = ccv_nnc_symbolic_graph_new();
2607 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2607, __extension__ __PRETTY_FUNCTION__
); }))
;
2608 const int input_size = from_model->input_size;
2609 ccv_nnc_tensor_param_t input_params[input_size];
2610 int i;
2611 for (i = 0; i < input_size; i++)
2612 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2613 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2614 model->parallel_count = from_model->parallel_count;
2615 model->memory_compression = from_model->memory_compression;
2616 model->memory_reduction = from_model->memory_reduction;
2617 model->gradient_checkpointing = from_model->gradient_checkpointing;
2618 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2619 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2620 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2621 }
2622 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2623 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2623, __extension__ __PRETTY_FUNCTION__
); }))
;
2624 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2625 if (!to_tensors_init)
2626 {
2627 if (only_init_0)
2628 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2629 else
2630 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2631 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2632 // Check if it is not fully allocated, if it is not, init_1.
2633 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2634 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2634, __extension__ __PRETTY_FUNCTION__
); }))
;
2635 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2636 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2637 if (*from_param_ref < 0 && *param_ref >= 0)
2638 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2638, __extension__ __PRETTY_FUNCTION__
); }))
; }
2639 else if (*from_param_ref >= 0)
2640 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2640, __extension__ __PRETTY_FUNCTION__
); }))
; }
2641 if (*param_ref < 0 && *from_param_ref >= 0)
2642 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2642, __extension__ __PRETTY_FUNCTION__); }))
; }
2643 else if (*param_ref >= 0)
2644 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2644, __extension__ __PRETTY_FUNCTION__
); }))
; }
2645}
2646
2647void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2648{
2649 ccv_array_t* to_parameter_indices;
2650 int to_param_ref;
2651 ccv_array_t* from_parameter_indices;
2652 int from_param_ref;
2653 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2654 // Should be exactly the same tensor.
2655 if (to_param_ref < 0 && from_param_ref < 0)
2656 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2656, __extension__ __PRETTY_FUNCTION__
); }))
; }
2657 // To models.
2658 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2659 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2659, __extension__ __PRETTY_FUNCTION__
); }))
;
2660 // From models.
2661 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2662 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2663 const int to_parameter_size = to_compiled_data->parameters->rnum;
2664 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2665 int i, j;
2666 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2667 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2668 for (i = 0; i < rnum; i++)
2669 {
2670 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2671 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2671, __extension__ __PRETTY_FUNCTION__); }))
;
2672 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2672, __extension__ __PRETTY_FUNCTION__
); }))
;
2673 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2674 // If the original is not init'ed. We cannot copy from.
2675 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2676 continue;
2677 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2678 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2678, __extension__ __PRETTY_FUNCTION__); }))
;
2679 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2679, __extension__ __PRETTY_FUNCTION__
); }))
;
2680 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2681 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2681, __extension__
__PRETTY_FUNCTION__); }))
;
2682 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2683 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2683, __extension__
__PRETTY_FUNCTION__); }))
;
2684 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2685 for (j = 1; j < parallel_count; j++)
2686 {
2687 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2688 if (copy_tensor)
2689 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2690 }
2691 // Mark this symbol as init'ed.
2692 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2693 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2694 }
2695 ccv_array_free(to_parameter_indices);
2696 ccv_array_free(from_parameter_indices);
2697}
2698
2699void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2700{
2701 ccv_array_t* to_parameter_indices;
2702 int to_param_ref;
2703 ccv_array_t* from_parameter_indices;
2704 int from_param_ref;
2705 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2706 // Should be exactly the same tensor.
2707 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2708 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2708, __extension__ __PRETTY_FUNCTION__
); }))
; }
2709 // To models.
2710 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2711 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2711, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2712 // From models.
2713 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2714 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2715 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2715, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2716 const int from_parameter_size = from_compiled_data->parameters->rnum;
2717 const int to_parameter_size = to_compiled_data->parameters->rnum;
2718 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2719 int i, j;
2720 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2721 char* updated_name = 0;
2722 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2723 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2724 for (i = 0; i < rnum; i++)
2725 {
2726 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2727 // Need to figure out how to use the renamer here.
2728 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2729 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2729, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2730 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2730, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2731 if (renamer
18.1
'renamer' is non-null
)
2732 {
2733 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2734 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2735 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2736 updated_name = (char*)ccmallocmalloc(1024);
2737 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2738 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2739 memcpy(updated_name, src_name, src_name_len);
2740 updated_name[src_name_len] = 0;
2741 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2742 continue; // Skip this.
2743 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2744 {
2745 // Nothing changed.
2746 } else {
2747 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2748 {
2749 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2750 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
2751 {
2752 int ret;
2753 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
2754 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2754
, __extension__ __PRETTY_FUNCTION__); }))
;
2755 kh_val(id_map, k)((id_map)->vals[k]) = j;
2756 }
2757 }
2758 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2759 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2760 continue;
2761 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2762 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2762, __extension__ __PRETTY_FUNCTION__); }))
;
2763 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2763, __extension__
__PRETTY_FUNCTION__); }))
;
2764 }
2765 }
2766 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2766, __extension__ __PRETTY_FUNCTION__); }))
;
2767 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2767, __extension__
__PRETTY_FUNCTION__); }))
;
2768 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2769 // If the original is not init'ed. We cannot share from.
2770 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2771 continue;
2772 for (j = 0; j < parallel_count; j++)
2773 {
2774 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2775 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2775, __extension__
__PRETTY_FUNCTION__); }))
;
2776 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2777 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2778 ccv_nnc_tensor_free(dest);
2779 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2780 }
2781 // Mark this symbol as init'ed.
2782 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2783 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2784 }
2785 ccv_array_free(to_parameter_indices);
2786 ccv_array_free(from_parameter_indices);
2787 if (id_map)
2788 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2789 if (updated_name)
2790 ccfreefree(updated_name);
2791 // Mark it as incomplete so we will call init_1.
2792 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2793 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2794 else // Remove the flag.
2795 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2796}
2797
2798ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2799{
2800 if (!compiled_data->stream_map)
2801 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2802 int ret = 0;
2803 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2804 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2804, __extension__ __PRETTY_FUNCTION__); }))
;
2805 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2806 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2807 if (ret != 0)
2808 {
2809 stream = ccv_nnc_stream_context_new(type);
2810 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2811 }
2812 return stream;
2813}
2814
2815void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2816{
2817 ccv_array_t* to_parameter_indices;
2818 int to_param_ref;
2819 ccv_array_t* from_parameter_indices;
2820 int from_param_ref;
2821 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2822 // Should be exactly the same tensor.
2823 if (to_param_ref < 0 && from_param_ref < 0)
2824 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2824, __extension__ __PRETTY_FUNCTION__
); }))
; }
2825 // To models.
2826 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2827 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2827, __extension__ __PRETTY_FUNCTION__
); }))
;
2828 // From models.
2829 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2830 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2831 const int to_parameter_size = to_compiled_data->parameters->rnum;
2832 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2833 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2833, __extension__ __PRETTY_FUNCTION__
); }))
;
2834 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2834, __extension__ __PRETTY_FUNCTION__
); }))
;
2835 int i, j;
2836 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2837 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2838 for (i = 0; i < aux_in_size; i++)
2839 inputs[i + 2] = aux_ins[i];
2840 for (i = 0; i < aux_out_size; i++)
2841 outputs[i + 1] = aux_outs[i];
2842 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2843 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2844 for (i = 0; i < rnum; i++)
2845 {
2846 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2847 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2847, __extension__ __PRETTY_FUNCTION__); }))
;
2848 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2848, __extension__ __PRETTY_FUNCTION__
); }))
;
2849 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2850 // If the original is not init'ed. We cannot copy from.
2851 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2852 continue;
2853 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2854 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2854, __extension__ __PRETTY_FUNCTION__); }))
;
2855 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2855, __extension__ __PRETTY_FUNCTION__
); }))
;
2856 if (parallel_count > 1)
2857 {
2858 ccv_nnc_stream_context_t* streams[parallel_count];
2859 ccv_nnc_stream_signal_t* signal;
2860 if (stream_context)
2861 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2862 for (j = 0; j < parallel_count; j++)
2863 {
2864 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2865 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2866 if (!dest || !src)
2867 {
2868 streams[j] = 0;
2869 continue;
2870 }
2871 // At the moment, can only handle them on the same device.
2872 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2872, __extension__ __PRETTY_FUNCTION__
); }))
;
2873 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2873, __extension__ __PRETTY_FUNCTION__
); }))
;
2874 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2875 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2876 int type = stream_type;
2877 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2878 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2879 // Wait signal to finish.
2880 if (stream_context)
2881 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2882 inputs[0] = outputs[0] = dest;
2883 inputs[1] = src;
2884 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2885 if (stream_context)
2886 {
2887 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2888 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2889 }
2890 streams[j] = stream_0;
2891 }
2892 // If this should be blocking, blocking it.
2893 if (!stream_context)
2894 for (j = 0; j < parallel_count; j++)
2895 if (streams[j])
2896 ccv_nnc_stream_context_wait(streams[j]);
2897 } else {
2898 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2899 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2899, __extension__
__PRETTY_FUNCTION__); }))
;
2900 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2901 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2901, __extension__
__PRETTY_FUNCTION__); }))
;
2902 inputs[0] = outputs[0] = dest;
2903 inputs[1] = src;
2904 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2905 }
2906 // Mark this symbol as init'ed.
2907 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2908 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2909 }
2910 ccv_array_free(to_parameter_indices);
2911 ccv_array_free(from_parameter_indices);
2912}
2913
2914void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2915{
2916 int to_param_ref;
2917 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2918 // To models.
2919 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2920 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2920, __extension__ __PRETTY_FUNCTION__
); }))
;
2921 // Tensor has to be inited already.
2922 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2922, __extension__ __PRETTY_FUNCTION__
); }))
;
2923 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2923, __extension__ __PRETTY_FUNCTION__
); }))
;
2924 // From models.
2925 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2926 const int to_parameter_size = to_compiled_data->parameters->rnum;
2927 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2928 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2928, __extension__ __PRETTY_FUNCTION__
); }))
;
2929 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2929, __extension__ __PRETTY_FUNCTION__
); }))
;
2930 int i, j;
2931 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2932 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2933 for (i = 0; i < aux_in_size; i++)
2934 inputs[i + 1] = aux_ins[i];
2935 for (i = 0; i < aux_out_size; i++)
2936 outputs[i + 1] = aux_outs[i];
2937 for (i = 0; i < rnum; i++)
2938 {
2939 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2940 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2940, __extension__ __PRETTY_FUNCTION__); }))
;
2941 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2941, __extension__ __PRETTY_FUNCTION__
); }))
;
2942 if (parallel_count > 1)
2943 {
2944 ccv_nnc_stream_context_t* streams[parallel_count];
2945 ccv_nnc_stream_signal_t* signal;
2946 if (stream_context)
2947 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2948 for (j = 0; j < parallel_count; j++)
2949 {
2950 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2951 if (!dest)
2952 {
2953 streams[j] = 0;
2954 continue;
2955 }
2956 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2957 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2958 int type = stream_type;
2959 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2960 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2961 // Wait signal to finish.
2962 if (stream_context)
2963 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2964 inputs[0] = outputs[0] = dest;
2965 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2966 if (stream_context)
2967 {
2968 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2969 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2970 }
2971 streams[j] = stream_0;
2972 }
2973 // If this should be blocking, blocking it.
2974 if (!stream_context)
2975 for (j = 0; j < parallel_count; j++)
2976 if (streams[j])
2977 ccv_nnc_stream_context_wait(streams[j]);
2978 } else {
2979 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2980 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2980, __extension__
__PRETTY_FUNCTION__); }))
;
2981 inputs[0] = outputs[0] = dest;
2982 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2983 }
2984 // No need to mark this symbol as init'ed, it is already.
2985 }
2986 ccv_array_free(to_parameter_indices);
2987}
2988
2989void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2990{
2991 int to_param_ref;
2992 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2993 // To models.
2994 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2995 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2995, __extension__ __PRETTY_FUNCTION__
); }))
;
2996 // Tensor has to be inited already.
2997 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2997, __extension__ __PRETTY_FUNCTION__
); }))
;
2998 ccv_nnc_tensor_t** tensor_gradients;
2999 if (to_compiled_data->backward.count > 1)
3000 tensor_gradients = to_compiled_data->tensors.accum_gradients;
3001 else
3002 tensor_gradients = to_compiled_data->tensors.gradients;
3003 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 3003, __extension__ __PRETTY_FUNCTION__
); }))
;
3004 // From models.
3005 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3006 const int to_parameter_size = to_compiled_data->parameters->rnum;
3007 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3008 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 3008, __extension__ __PRETTY_FUNCTION__
); }))
;
3009 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 3009, __extension__ __PRETTY_FUNCTION__
); }))
;
3010 int i, j;
3011 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
3012 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
3013 for (i = 0; i < aux_in_size; i++)
3014 inputs[i + 1] = aux_ins[i];
3015 for (i = 0; i < aux_out_size; i++)
3016 outputs[i + 1] = aux_outs[i];
3017 for (i = 0; i < rnum; i++)
3018 {
3019 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3020 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3020, __extension__ __PRETTY_FUNCTION__); }))
;
3021 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3021, __extension__ __PRETTY_FUNCTION__
); }))
;
3022 if (parallel_count > 1)
3023 {
3024 ccv_nnc_stream_context_t* streams[parallel_count];
3025 ccv_nnc_stream_signal_t* signal;
3026 if (stream_context)
3027 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
3028 for (j = 0; j < parallel_count; j++)
3029 {
3030 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
3031 if (!dest)
3032 {
3033 streams[j] = 0;
3034 continue;
3035 }
3036 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3037 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3038 int type = stream_type;
3039 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3040 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3041 // Wait signal to finish.
3042 if (stream_context)
3043 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3044 inputs[0] = outputs[0] = dest;
3045 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3046 if (stream_context)
3047 {
3048 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3049 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3050 }
3051 streams[j] = stream_0;
3052 }
3053 // If this should be blocking, blocking it.
3054 if (!stream_context)
3055 for (j = 0; j < parallel_count; j++)
3056 if (streams[j])
3057 ccv_nnc_stream_context_wait(streams[j]);
3058 } else {
3059 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
3060 if (!dest)
3061 continue;
3062 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3062, __extension__
__PRETTY_FUNCTION__); }))
;
3063 inputs[0] = outputs[0] = dest;
3064 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3065 }
3066 // No need to mark this symbol as init'ed, it is already.
3067 }
3068 ccv_array_free(to_parameter_indices);
3069}
3070
3071void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
3072{
3073 // Only CUDA backend has this feature.
3074#ifdef HAVE_CUDA1
3075 int to_param_ref;
3076 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3077 // To models.
3078 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3079 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3079, __extension__ __PRETTY_FUNCTION__); }))
;
3080 // Tensor has to be inited already.
3081 assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0)
, __extension__ ({ if (!!compiled_data->tensors_init.v) ; else
__assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c"
, 3081, __extension__ __PRETTY_FUNCTION__); }))
;
3082 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 3082, __extension__ __PRETTY_FUNCTION__
); }))
;
3083 // From models.
3084 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3085 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3086 int i;
3087 for (i = 0; i < rnum; i++)
3088 {
3089 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3090 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3090, __extension__ __PRETTY_FUNCTION__); }))
;
3091 assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3091, __extension__ __PRETTY_FUNCTION__
); }))
;
3092 if (parallel_count > 1)
3093 {
3094 assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect."
) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect."
) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\""
, "ccv_cnnp_model.c", 3094, __extension__ __PRETTY_FUNCTION__
); }))
;
3095 } else {
3096 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[dest_d]) & ~(uintptr_t)1))
;
3097 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 3097, __extension__
__PRETTY_FUNCTION__); }))
;
3098 ccv_nnc_tensor_param_t params = src->info;
3099 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY)
3100 continue;
3101 const size_t size = ccv_nnc_tensor_data_size(params);
3102 if (size <= 0)
3103 continue;
3104 const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1);
3105 const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0);
3106 ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t));
3107 tensor->dataof = 0;
3108 tensor->alias_ref = 0;
3109 tensor->sig = 0;
3110 tensor->refcount = 1;
3111 tensor->info = params;
3112 if (tfb)
3113 {
3114 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2];
3115 // This corresponding to mat->step
3116 tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype
) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12
] * (((((params.datatype) & 0xFF000) | params.dim[2])) &
0xFFF) + 3) & -4)
;
3117 } else // This won't be recognized by ccv_dense_matrix_t
3118 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000);
3119 // Remove this flag so it can be deallocated as usual.
3120 tensor->type &= ~CCV_NO_DATA_ALLOC;
3121 assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY
) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00
) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY"
, "ccv_cnnp_model.c", 3121, __extension__ __PRETTY_FUNCTION__
); }))
;
3122 void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size);
3123 if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
3124 {
3125 tensor->data.u8 = (uint8_t*)ptr;
3126 tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
3127 } else {
3128 // Allocation failed.
3129 ccfreefree(tensor);
3130 continue;
3131 }
3132 // TODO: Cannot run this on the stream context yet, due to allocation and deallocations.
3133 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0);
3134 cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size);
3135 compiled_data->tensors.parameters[dest_d] = tensor;
3136 // Can free out the old one.
3137 if (should_free)
3138 ccv_nnc_tensor_free(src);
3139 }
3140 // No need to mark this symbol as init'ed, it is already.
3141 }
3142 ccv_array_free(to_parameter_indices);
3143#endif
3144}
3145
3146ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
3147{
3148 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3149 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3149, __extension__ __PRETTY_FUNCTION__); }))
;
3150 return compiled_data->minimize.minimizer;
3151}
3152
3153void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
3154{
3155 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3156 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3156, __extension__ __PRETTY_FUNCTION__); }))
;
3157 const int parameter_size = compiled_data->parameters->rnum;
3158 if (parameter_size == 0)
3159 return;
3160 if (reset)
3161 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 3161, __extension__ __PRETTY_FUNCTION__
); }))
; }
3162 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3163 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
3164 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
3165 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
3166 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3167 // We update all parameters, at this point, we have one minimizer.
3168 if (set_parameters == 0 || set_parameter_size == 0)
3169 compiled_data->minimize.minimizer = minimizer;
3170 int i;
3171 if (set_parameters && set_parameter_size)
3172 {
3173 // I need to save what's the minimizer along with this.
3174 if (!compiled_data->minimize.parameters)
3175 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
3176 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
3177 set_minimizer_for_parameter->minimizer = minimizer;
3178 set_minimizer_for_parameter->parameter_size = set_parameter_size;
3179 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
3180 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
3181 }
3182 // If reset is true, clear the parameters array.
3183 if (reset && compiled_data->minimize.parameters)
3184 {
3185 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3186 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3187 ccv_array_clear(compiled_data->minimize.parameters);
3188 }
3189 if (!compiled_data->update_nodes)
3190 return;
3191 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
3192 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 3192, __extension__ __PRETTY_FUNCTION__); }))
;
3193 if (saved_aux_size > old_max_saved_aux_size)
3194 {
3195 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 3195, __extension__ __PRETTY_FUNCTION__
); }))
;
3196 // Reallocate first, move them around later.
3197 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
3198 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
3199 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
3200 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
3201 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
3202 }
3203 int flag = 0;
3204 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3205 if (set_parameters && set_parameter_size)
3206 {
3207 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
3208 for (i = 0; i < set_parameter_size; i++)
3209 {
3210 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
3211 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 3211, __extension__ __PRETTY_FUNCTION__
); }))
;
3212 const int old_rnum = parameter_indices->rnum;
3213 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
3214 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
3215 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 3215, __extension__ __PRETTY_FUNCTION__
); }))
;
3216 if (param_ref >= 0)
3217 {
3218 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 3218, __extension__ __PRETTY_FUNCTION__
); }))
;
3219 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
3220 parameter_indices->rnum = old_rnum + 1;
3221 }
3222 }
3223 // We may have duplicated indices, but that is OK, we will set it twice.
3224 for (i = 0; i < parameter_indices->rnum; i++)
3225 {
3226 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
3227 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
3228 flag = 1;
3229 }
3230 ccv_array_free(parameter_indices);
3231 } else {
3232 for (i = 0; i < parameter_size; i++)
3233 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
3234 flag = 1;
3235 if (compiled_data->minimize.parameters)
3236 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
3237 flag = 1;
3238 }
3239 if (flag)
3240 {
3241 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
3242 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
3243 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3244 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3245 }
3246}
3247
3248void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
3249{
3250 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3251 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3251, __extension__ __PRETTY_FUNCTION__); }))
;
3252 compiled_data->compile_params = compile_params;
3253}
3254
3255void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
3256{
3257 if (model->graph && out_size > 0)
3258 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
3259 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
3260 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
3261 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
3262 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
3263 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
3264 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
3265}
3266
3267void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
3268{
3269 if (model->graph)
3270 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
3271}
3272
3273static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
3274{
3275 int i;
3276 const int parameter_size = compiled_data->parameters->rnum;
3277 ccv_array_free(compiled_data->parameters);
3278 if (compiled_data->parameter_flags)
3279 ccfreefree(compiled_data->parameter_flags);
3280 const int internal_size = compiled_data->internals->rnum;
3281 ccv_array_free(compiled_data->internals);
3282 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 3282, __extension__ __PRETTY_FUNCTION__
); }))
;
3283 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 3283, __extension__ __PRETTY_FUNCTION__
); }))
;
3284 for (i = 0; i < parameter_size; i++)
3285 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3286 ccv_array_free(compiled_data->ids.parameters);
3287 for (i = 0; i < internal_size; i++)
3288 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3289 ccv_array_free(compiled_data->ids.internals);
3290 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3291 if (compiled_data->tensors.parameters)
3292 {
3293 for (i = 0; i < parameter_size * parallel_count; i++)
3294 // If it is not marked as not belonging, we can free it.
3295 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3296 if (compiled_data->tensors.parameters[i])
3297 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3298 for (i = 0; i < internal_size * parallel_count; i++)
3299 if (compiled_data->tensors.internals[i])
3300 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3301 ccfreefree(compiled_data->tensors.parameters);
3302 }
3303 if (compiled_data->tensors.gradients)
3304 {
3305 for (i = 0; i < parameter_size * parallel_count; i++)
3306 {
3307 if (compiled_data->tensors.gradients[i])
3308 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3309 if (compiled_data->tensors.accum_gradients[i])
3310 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3311 }
3312 ccfreefree(compiled_data->tensors.gradients);
3313 }
3314 if (compiled_data->minimize.parameters)
3315 {
3316 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3317 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3318 ccv_array_free(compiled_data->minimize.parameters);
3319 }
3320 if (compiled_data->rewindables)
3321 ccv_array_free(compiled_data->rewindables);
3322 if (compiled_data->tensors_init.v)
3323 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3324 if (compiled_data->evaluate.tos)
3325 ccfreefree(compiled_data->evaluate.tos);
3326 compiled_data->evaluate.tos = 0;
3327 if (compiled_data->stream_map)
3328 {
3329 khiter_t k;
3330 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3331 {
3332 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3333 continue;
3334 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3335 ccv_nnc_stream_context_free(stream);
3336 }
3337 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3338 }
3339 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3340 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3341 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3342 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3343 if (compiled_data->gradient_checkpoints)
3344 {
3345 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3346 {
3347 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3348 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3348, __extension__ __PRETTY_FUNCTION__
); }))
;
3349 ccfreefree(checkpoint->inputs);
3350 ccv_array_free(checkpoint->tensor_symbols);
3351 }
3352 ccv_array_free(compiled_data->gradient_checkpoints);
3353 }
3354 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3355 ccfreefree(compiled_data);
3356}
3357
3358void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3359{
3360 ccv_cnnp_model_deinit(model);
3361 if (model->isa->dealloc)
3362 model->isa->dealloc(model);
3363 if (model->io)
3364 {
3365 int i;
3366 for (i = 0; i < model->io->rnum; i++)
3367 {
3368 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3369 if (model_io->outgoings)
3370 ccv_array_free(model_io->outgoings);
3371 if (model_io->incomings)
3372 ccv_array_free(model_io->incomings);
3373 if (model_io->dependencies)
3374 ccv_array_free(model_io->dependencies);
3375 ccfreefree(model_io);
3376 }
3377 ccv_array_free(model->io);
3378 }
3379 if (model->parameter_indices)
3380 ccv_array_free(model->parameter_indices);
3381 if (model->inputs)
3382 ccfreefree(model->inputs);
3383 if (model->graph)
3384 ccv_nnc_symbolic_graph_free(model->graph);
3385 if (model->compiled_data)
3386 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3387 if (model->name)
3388 ccfreefree(model->name);
3389 ccfreefree(model);
3390}
3391
3392void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3393{
3394 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3395 if (!compiled_data)
3396 return;
3397 if (compiled_data->graph)
3398 ccv_nnc_graph_cancel(compiled_data->graph);
3399 if (compiled_data->apply_gradients.graph)
3400 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3401}
3402
3403void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3404{
3405 model->exec_flags = flags;
3406}
3407
3408int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3409{
3410 return model->exec_flags;
3411}