Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2468, column 1
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2025-03-17-111301-58955-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7
8// MARK - Level-5 API
9
10ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
11{
12 if (!model->io)
13 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
14 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
15 model_io->param_ref = 0;
16 model_io->param_sel = 0;
17 model_io->visit = 0;
18 model_io->model = model;
19 model_io->dependencies = 0;
20 model_io->dependents = 0;
21 model_io->outgoings = 0;
22 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
23 ccv_array_push(model->io, &model_io);
24 if (input_size > 0)
25 {
26 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
27 ccv_array_resize(model_io->incomings, input_size);
28 int i;
29 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
30 for (i = 0; i < input_size; i++)
31 {
32 if (!inputs[i]->outgoings)
33 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
34 ccv_array_push(inputs[i]->outgoings, &model_io);
35 }
36 } else {
37 model_io->incomings = 0;
38 }
39 return model_io;
40}
41
42void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
43{
44 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 44, __extension__ __PRETTY_FUNCTION__);
}))
;
45 if (!model_io->dependencies)
46 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
47 int i, j;
48 for (i = 0; i < dependency_size; i++)
49 {
50 int flag = 0;
51 // Check if it is already exist or not.
52 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
53 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
54 flag = 1;
55 if (flag)
56 continue;
57 ccv_array_push(model_io->dependencies, dependencies + i);
58 ++dependencies[i]->dependents;
59 }
60}
61
62int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
63{
64 return model->output_size;
65}
66
67int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
68{
69 // If the model is compiled, it is default to 1 unless it is not.
70 if (model->compiled_data)
71 return model->is_trainable >= 0 ? model->is_trainable : 1;
72 return model->is_trainable;
73}
74
75ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
76{
77 if (!model->io)
78 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
79 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
80 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
81 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
82 model_io->visit = 0;
83 model_io->model = model;
84 model_io->outputs = 0;
85 model_io->dependencies = 0;
86 model_io->dependents = 0;
87 model_io->incomings = 0;
88 model_io->outgoings = 0;
89 ccv_array_push(model->io, &model_io);
90 return model_io;
91}
92
93void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
94{
95 model->notify_hook.func = func;
96 model->notify_hook.context = context;
97}
98
99void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
100{
101 if (model->notify_hook.func)
102 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
103 if (model->isa->notify)
104 model->isa->notify(model, tag, payload);
105}
106
107static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
108{
109 int i, j;
110 for (i = 0; i < graph_exec_symbol_size; i++)
111 {
112 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
113 // Check whether this tensor symbol has any duplicate.
114 for (j = i + 1; j < graph_exec_symbol_size;)
115 {
116 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
117 // If there is a same tensor symbol, remove it.
118 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
119 {
120 if (j + 1 < graph_exec_symbol_size)
121 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
122 --graph_exec_symbol_size;
123 continue;
124 }
125 ++j;
126 }
127 }
128 return graph_exec_symbol_size;
129}
130
131void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
132{
133 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
134 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
135 int i;
136 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
137 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
138 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
139 {
140 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
141 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
142 {
143 // Only add to parameter_indices if it is trainable.
144 if (add_to_array_context->add_parameter_indices)
145 ccv_array_add_unique_int(model->parameter_indices, i);
146 // Found it, return, don't add it.
147 return;
148 }
149 }
150 // Only add to parameter_indices if it is trainable.
151 if (add_to_array_context->add_parameter_indices)
152 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
153 // This is a new one, no need to add_unique_int, it is unique.
154 ccv_array_push(add_to_array_context->symbols, &symbol);
155 if (add_to_array_context->trainables)
156 ccv_array_push(add_to_array_context->trainables, &is_trainable);
157 char id[2048];
158 id[0] = add_to_array_context->prefix;
159 id[1] = '-';
160 int total_len = 2;
161 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
162 {
163 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
164 int len;
165 if (name->name && name->name[0] != '\0')
166 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
167 else
168 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
169 total_len += len;
170 if (total_len >= 2047)
171 break;
172 }
173 if (total_len < 2047)
174 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
175 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 175, __extension__ __PRETTY_FUNCTION__)
; }))
;
176 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
177 memcpy(heap_id, id, total_len + 1);
178 ccv_array_push(add_to_array_context->ids, &heap_id);
179 ++add_to_array_context->sequence->it;
180}
181
182static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
183{
184 compiled_data->f = compiled_data->fits + output_size;
185 compiled_data->xpu_alloc.mp_hdr = -1;
186 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
187 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
188 compiled_data->gradient_checkpoints = gradient_checkpoints;
189}
190
191typedef struct {
192 void* old_graph_exec_symbol_new_hook_context;
193 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
194 ccv_nnc_symbolic_graph_t* graph;
195 ccv_cnnp_model_build_data_t* build_data;
196} ccv_cnnp_model_set_exec_flags_context_t;
197
198static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
199{
200 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
201 if (flags_context->build_data->exec_flags)
202 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
203 if (flags_context->old_graph_exec_symbol_new_hook)
204 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
205}
206
207static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
208{
209 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 209, __extension__ __PRETTY_FUNCTION__); }))
;
210 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
211 int i;
212 for (i = 0; i < input_size; i++)
213 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
214 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
215 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
216 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
217 ccv_cnnp_model_sequence_t model_sequence = {
218 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
219 };
220 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
221 .add_parameter_indices = 1,
222 .prefix = 't',
223 .sequence = &model_sequence,
224 .symbols = parameters,
225 .ids = parameter_ids,
226 .trainables = parameter_trainables,
227 };
228 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
229 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
230 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
231 .add_parameter_indices = 0,
232 .prefix = 'r',
233 .sequence = &model_sequence,
234 .symbols = internals,
235 .ids = internal_ids,
236 .trainables = 0,
237 };
238 ccv_cnnp_model_build_data_t build_data = {
239 .exec_flags = 0,
240 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
241 .model_sequence = &model_sequence,
242 .add_to_array = ccv_cnnp_model_add_to_array,
243 .parameters = parameters,
244 .context = {
245 .add_to_parameter = &add_to_parameter_context,
246 .add_to_output = &add_to_output_context,
247 },
248 .gradient_checkpoints = 0,
249 };
250 model->data = &build_data;
251 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
252 .graph = model->graph,
253 .build_data = &build_data,
254 .old_graph_exec_symbol_new_hook = 0,
255 .old_graph_exec_symbol_new_hook_context = 0
256 };
257 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
258 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
259 // Reset back to previous hook.
260 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
261 for (i = 0; i < model->output_size; i++)
262 {
263 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
264 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
265 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
266 continue;
267 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
268 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
269 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
270 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
271 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
272 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
273 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
274 }
275 model->data = 0;
276 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
277 if (model_sequence.sequences)
278 ccv_array_free(model_sequence.sequences);
279 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
280 int not_trainables = 0;
281 // Assert no parameter is alias.
282 for (i = 0; i < parameters->rnum; i++)
283 {
284 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
285 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
286 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 286, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
287 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
288 not_trainables = 1;
289 }
290 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 290, __extension__ __PRETTY_FUNCTION__)
; }))
;
291 uint64_t* parameter_flags = 0;
292 if (not_trainables)
293 {
294 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
295 for (i = 0; i < parameter_trainables->rnum; i++)
296 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
297 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
298 }
299 ccv_array_free(parameter_trainables);
300 // Assert no internal is alias.
301 for (i = 0; i < internals->rnum; i++)
302 {
303 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
304 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
305 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 305, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
306 }
307 const int output_size = model->output_size;
308 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
309 const int parameters_rnum = parameters->rnum;
310 if (input_size > 0)
311 {
312 ccv_array_resize(parameters, parameters_rnum + input_size);
313 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
314 }
315 ccv_nnc_symbolic_graph_simplify(model->graph,
316 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
317 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
318 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
319 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
320 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
321 model->outputs, output_size,
322 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
323 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
324 // Size it down.
325 parameters->rnum = parameters_rnum;
326 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
327 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
328 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
329 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 329, __extension__ __PRETTY_FUNCTION__)
; }))
;
330 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
331 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
332 compiled_data->loss = loss;
333 if (loss.cmd == CCV_NNC_NOOP)
334 {
335 // If no loss function provided, there is no fits.
336 for (i = 0; i < output_size; i++)
337 {
338 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
339 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
340 if (alias_to.d < 0)
341 compiled_data->f[i] = model->outputs[i];
342 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
343 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
344 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
345 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
346 int j;
347 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
348 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 348, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
349 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
350 }
351 }
352 } else {
353 for (i = 0; i < output_size; i++)
354 {
355 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
356 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
357 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
358 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
359 }
360 }
361 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
362 ccv_nnc_symbolic_graph_simplify(model->graph,
363 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
364 0, 0, // No need to provide binds at this point.
365 compiled_data->f, model->output_size,
366 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
367 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
368 // If inputs are from GPU, stream type is GPU.
369 compiled_data->parameters = parameters;
370 compiled_data->parameter_flags = parameter_flags;
371 compiled_data->internals = internals;
372 compiled_data->ids.parameters = parameter_ids;
373 compiled_data->ids.internals = internal_ids;
374 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
375}
376
377static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
378{
379 ccv_array_t* const stack = (ccv_array_t*)context;
380 ccv_array_push(stack, &symbol.d);
381}
382
383static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
384{
385 const ccv_nnc_tensor_symbol_t src_symbol = {
386 .d = src_index,
387 .graph = src_graph
388 };
389 const ccv_nnc_tensor_symbol_t dest_symbol = {
390 .d = dest_index,
391 .graph = dest_graph
392 };
393 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
394 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
395 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
396 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
397 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
398 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
399}
400
401static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
402{
403 const ccv_nnc_tensor_symbol_t src_symbol = {
404 .d = src_index,
405 .graph = src_graph
406 };
407 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
408 const ccv_nnc_tensor_symbol_t dest_symbol = {
409 .d = dest_index,
410 .graph = dest_graph
411 };
412 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
413 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
414}
415
416static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
417static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
418
419typedef struct {
420 int parallel_count;
421 ccv_nnc_symbolic_graph_t* graph;
422 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
423} ccv_nnc_graph_exec_update_t;
424
425static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
426{
427 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
428 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
429 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
430 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
431 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
432 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
433 const int parallel_count = graph_exec_update->parallel_count;
434 int i;
435 for (i = 1; i < parallel_count; i++)
436 {
437 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
438 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
439 {
440 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
441 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
442 }
443 }
444}
445
446void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
447{
448 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 448, __extension__ __PRETTY_FUNCTION__); }))
;
449 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 449, __extension__ __PRETTY_FUNCTION__)
; }))
;
450 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 450, __extension__ __PRETTY_FUNCTION__); }))
;
451 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
452 init->graph = ccv_nnc_symbolic_graph_new();
453 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
454 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
455 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
456 init->parallel_count = model->parallel_count;
457 init->memory_compression = model->memory_compression;
458 init->memory_reduction = model->memory_reduction;
459 init->gradient_checkpointing = model->gradient_checkpointing;
460 init->compiled_data->stream_type = model->compiled_data->stream_type;
461 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
462 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
463 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
464 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
465 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
466 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
467 int i, j;
468 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
469 for (i = 0; i < compiled_data->parameters->rnum; i++)
470 {
471 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
472 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 472, __extension__ __PRETTY_FUNCTION__)
; }))
;
473 }
474 for (i = 0; i < compiled_data->internals->rnum; i++)
475 {
476 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
477 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 477, __extension__ __PRETTY_FUNCTION__)
; }))
;
478 }
479 // Update inputs.
480 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 480, __extension__ __PRETTY_FUNCTION__)
; }))
;
481 for (i = 0; i < model->input_size; i++)
482 if (model->inputs[i].d >= 0)
483 {
484 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 484, __extension__ __PRETTY_FUNCTION__)
; }))
;
485 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
486 }
487 // Update outputs.
488 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 488, __extension__ __PRETTY_FUNCTION__)
; }))
;
489 for (i = 0; i < model->output_size; i++)
490 {
491 if (model->outputs[i].d >= 0)
492 {
493 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 493, __extension__
__PRETTY_FUNCTION__); }))
;
494 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
495 }
496 if (model->outputs[i].d != model->compiled_data->f[i].d)
497 {
498 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 498, __extension__ __PRETTY_FUNCTION__)
; }))
;
499 if (model->compiled_data->f[i].d >= 0)
500 {
501 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__)
; }))
;
502 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
503 }
504 }
505 }
506 // Go through the graph to set tensor on matching symbols
507 for (i = 0; i < stack->rnum; i++)
508 {
509 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
510 // If exceed range, skip.
511 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
512 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
513 continue;
514 const ccv_nnc_graph_exec_symbol_t src_symbol = {
515 .d = d,
516 .graph = init->graph
517 };
518 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
519 .d = d,
520 .graph = model->graph
521 };
522 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
523 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
524 // If the name doesn't match, skip.
525 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
526 continue;
527 // Now get all the inputs and outputs, if matches, set them.
528 const int* src_inputs;
529 int src_input_size;
530 const int* src_outputs;
531 int src_output_size;
532 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
533 const int* dest_inputs;
534 int dest_input_size;
535 const int* dest_outputs;
536 int dest_output_size;
537 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
538 // We may have unmatched input / output size because this is the minimizer and it has
539 // different saved_aux (for example, when we shrunk with CMD_NOOP).
540 if (src_input_size != dest_input_size)
541 continue;
542 if (src_output_size != dest_output_size)
543 continue;
544 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
545 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
546 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
547 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
548 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
549 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
550 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
551 // a new exec symbol.
552 for (j = 0; j < src_input_size; j++)
553 if (src_inputs[j] >= 0)
554 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
555 for (j = 0; j < src_output_size; j++)
556 if (src_outputs[j] >= 0)
557 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
558 }
559 ccv_array_free(stack);
560 // After this, we get all tensors in the model graph resolved through tensor_auto.
561 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
562 // Verify symbols we get matches.
563 const int parameter_size = compiled_data->parameters->rnum;
564 for (i = 0; i < parameter_size; i++)
565 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 565, __extension__ __PRETTY_FUNCTION__)
; }))
; }
566 const int internal_size = compiled_data->internals->rnum;
567 for (i = 0; i < internal_size; i++)
568 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 568, __extension__ __PRETTY_FUNCTION__)
; }))
; }
569 // Go through compiled data.
570 if (compiled_data->tensor_arena)
571 {
572 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
573 if (flag == 0 && compiled_data->graph_exec_arena)
574 {
575 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
576 // Since we will reinit, if we previously set is_test, we need to set it again.
577 if (compiled_data->is_test)
578 {
579 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
580 ccv_nnc_graph_exec_update_t update = {
581 .parallel_count = parallel_count,
582 .graph = model->graph,
583 .graph_exec_arena = compiled_data->graph_exec_arena,
584 };
585 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
586 }
587 } else
588 // Free-up tensor arena & graph exec arena.
589 _ccv_cnnp_compiled_data_graph_free(compiled_data);
590 }
591 // There are other compiled graphs, for accum and apply gradients.
592 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
593 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
594 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
595 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
596 // That is why we don't update these compiled graphs at all this point.
597 // Free the model, we've already "absorbed" it.
598 ccv_cnnp_model_free(init);
599}
600
601void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
602{
603 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 603, __extension__ __PRETTY_FUNCTION__)
; }))
;
604 if (model->input_size == 0)
605 model->input_size = input_size;
606 if (!model->graph) // The graph is not compiled yet.
607 {
608 model->graph = ccv_nnc_symbolic_graph_new();
609 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
610 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 610, __extension__ __PRETTY_FUNCTION__)
; }))
;
611 int i, flag = 0;
612 for (i = 0; !flag && i < input_size; i++)
613 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
614 // If inputs are from GPU, stream type is GPU.
615 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
616 model->compiled_data->minimize.minimizer = minimizer;
617 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
618 } else {
619 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
620 // And then absorb the "new model" to the old one.
621 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
622 ccv_cnnp_model_absorb(model, init, inputs, input_size);
623 // Reset minimizer.
624 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
625 }
626}
627
628ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
629{
630 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
631 new_model->is_trainable = is_trainable;
632 return new_model;
633}
634
635void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
636{
637 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 637, __extension__ __PRETTY_FUNCTION__); }))
;
638 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 638, __extension__ __PRETTY_FUNCTION__)
; }))
;
639 ccv_nnc_symbolic_graph_t* const graph = model->graph;
640 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
641 int i;
642 for (i = 0; i < output_size; i++)
643 {
644 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 644, __extension__ __PRETTY_FUNCTION__)
; }))
;
645 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
646 }
647}
648
649void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
650{
651 if (workspace_size == model->workspace_size)
652 return;
653 model->workspace_size = workspace_size;
654 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
655 if (compiled_data && compiled_data->graph)
656 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
657}
658
659size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
660{
661 return model->workspace_size;
662}
663
664void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
665{
666 if (parallel == 0)
667 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
668 else
669 model->parallel_count = parallel;
670 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
671 if (compiled_data)
672 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 672, __extension__ __PRETTY_FUNCTION__)
; }))
; }
673}
674
675void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
676{
677 model->max_stream_count = max_stream_count;
678 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
679 if (compiled_data)
680 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 680, __extension__ __PRETTY_FUNCTION__)
; }))
; }
681}
682
683void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
684{
685 model->memory_compression = memory_compression;
686 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
687 if (compiled_data)
688 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 688, __extension__ __PRETTY_FUNCTION__)
; }))
; }
689}
690
691void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
692{
693 model->memory_reduction = memory_reduction;
694 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
695 if (compiled_data)
696 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 696, __extension__ __PRETTY_FUNCTION__)
; }))
; }
697}
698
699void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
700{
701 model->gradient_checkpointing = gradient_checkpointing;
702}
703
704int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
705{
706 return model->gradient_checkpointing;
707}
708
709typedef struct {
710 int parallel_count;
711 ccv_nnc_symbolic_graph_t* graph;
712 ccv_cnnp_compiled_data_t* compiled_data;
713 ccv_nnc_tensor_arena_t* tensor_arena;
714} ccv_nnc_tensor_init_states_t;
715
716static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
717{
718 int i;
719 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
720 for (i = 0; i < compiled_data->parameters->rnum; i++)
721 {
722 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
723 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
724 return 1;
725 }
726 for (i = 0; i < compiled_data->internals->rnum; i++)
727 {
728 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
729 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
730 return 1;
731 }
732 return 0;
733}
734
735static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
736{
737 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
738 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
739 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
740 if (!output_tensor)
741 return;
742 const int d = output_symbol.d;
743 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 743, __extension__ __PRETTY_FUNCTION__)
; }))
;
744 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
745 if (init_v[d >> 5] & (1u << (d & 0x1f)))
746 return;
747 init_v[d >> 5] |= (1u << (d & 0x1f));
748 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
749 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
750 const int parallel_count = tensor_init_states->parallel_count;
751 int i;
752 for (i = 1; i < parallel_count; i++)
753 {
754 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
755 if (copy)
756 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
757 }
758}
759
760// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
761// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
762static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
763{
764 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 764, __extension__ __PRETTY_FUNCTION__); }))
;
765 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 765, __extension__ __PRETTY_FUNCTION__)
; }))
;
766 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
767 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 767, __extension__
__PRETTY_FUNCTION__); }))
;
768 int i;
769 for (i = 0; i < compiled_data->rewindables->rnum; i++)
770 {
771 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
772 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
773 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
774 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
775 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
776 }
777 ccv_array_clear(compiled_data->rewindables);
778 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
779}
780
781static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
782{
783 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
784 .type = CCV_CNNP_REWIND_TENSOR,
785 .tensor = symbol
786 };
787 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
788 ccv_array_push(rewind_symbols, &rewind_symbol);
789}
790
791static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
792{
793 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
794 .type = CCV_CNNP_REWIND_TENSOR,
795 .tensor = symbol
796 };
797 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
798 ccv_array_push(rewind_symbols, &rewind_symbol);
799}
800
801static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
802{
803 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
804 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
805 .graph_exec = symbol
806 };
807 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
808 ccv_array_push(rewind_symbols, &rewind_symbol);
809}
810
811static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
812{
813 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
814 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
815 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
816 int i;
817 for (i = 1; i < parallel_count; i++)
818 {
819 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
820 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
821 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
822 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
823 }
824}
825
826static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
827{
828 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 828, __extension__ __PRETTY_FUNCTION__); }))
;
829 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 829, __extension__ __PRETTY_FUNCTION__); }))
;
830 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
831 int i;
832 for (i = 1; i < parallel_count; i++)
833 {
834 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
835 if (copy_symbol.graph)
836 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
837 }
838 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
839 if (graph_exec_arena)
840 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
841 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
842 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
843 if (gradient_graph_exec_arena)
844 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
845}
846
847static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
848{
849 int this_parameter_flag = 0;
850 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
851 return this_parameter_flag;
852 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
853 int j, k;
854 // For no-op, we can preserve previous saved_aux_size.
855 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
856 {
857 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
858 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
859 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
860 // make sure some model parameters don't update if we don't want them to.
861 int old_saved_aux_size;
862 if (old_minimizer.cmd == CCV_NNC_NOOP)
863 {
864 int input_size;
865 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
866 if (input_size < 2) // This is not legit.
867 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
868 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
869 old_saved_aux_size = input_size - 2;
870 } else
871 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
872 if (old_saved_aux_size != saved_aux_size)
873 {
874 this_parameter_flag = 1;
875 if (saved_aux_size > old_saved_aux_size)
876 {
877 // Allocate new tensor symbols.
878 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
879 for (j = old_saved_aux_size; j < saved_aux_size; j++)
880 {
881 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
882 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
883 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
884 for (k = 1; k < parallel_count; k++)
885 {
886 ccv_nnc_tensor_param_t dev_info = info;
887 if (k != device_id)
888 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
889 else
890 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
891 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
892 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
893 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
894 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
895 }
896 }
897 } else {
898 for (j = saved_aux_size; j < old_saved_aux_size; j++)
899 {
900 for (k = 1; k < parallel_count; k++)
901 {
902 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
903 if (src_copy.d >= 0)
904 {
905 ccv_nnc_tensor_symbol_free(graph, src_copy);
906 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
907 }
908 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
909 if (dest_copy.d >= 0)
910 {
911 ccv_nnc_tensor_symbol_free(graph, dest_copy);
912 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
913 }
914 }
915 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
916 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
917 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
918 }
919 }
920 }
921 }
922 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
923 if (this_parameter_flag)
924 {
925 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
926 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
927 const int* inputs = 0;
928 int input_size = 0;
929 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
930 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 930, __extension__ __PRETTY_FUNCTION__)
; }))
;
931 update_inputs[0].d = inputs[0];
932 update_inputs[0].graph = graph;
933 update_inputs[1].d = inputs[1];
934 update_inputs[1].graph = graph;
935 update_outputs[0] = updated_parameters[parameter_indice];
936 for (j = 0; j < saved_aux_size; j++)
937 {
938 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
939 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
940 }
941 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
942 for (k = 1; k < parallel_count; k++)
943 {
944 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
945 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 945, __extension__ __PRETTY_FUNCTION__); }))
;
946 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
947 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 947, __extension__ __PRETTY_FUNCTION__)
; }))
;
948 update_inputs[0].d = inputs[0];
949 update_inputs[0].graph = graph;
950 update_inputs[1].d = inputs[1];
951 update_inputs[1].graph = graph;
952 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
953 for (j = 0; j < saved_aux_size; j++)
954 {
955 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
956 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
957 }
958 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
959 }
960 }
961 return this_parameter_flag;
962}
963
964typedef struct {
965 int parameter_size;
966 ccv_nnc_cmd_t minimizer;
967 ccv_cnnp_model_io_t parameters[1];
968} ccv_cnnp_set_minimizer_for_parameter_t;
969
970static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
971{
972 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
973 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 973, __extension__ __PRETTY_FUNCTION__); }))
;
974 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
975 // We update all parameters, at this point, we have one minimizer.
976 const int parameter_size = compiled_data->parameters->rnum;
977 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
978 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
979 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 979, __extension__ __PRETTY_FUNCTION__); }))
;
980 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
981 ccv_array_t* const parameters = compiled_data->minimize.parameters;
982 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
983 int i, j, flag = 0;
984 for (i = 0; i < parameters->rnum; i++)
985 {
986 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
987 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
988 {
989 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
990 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 990, __extension__ __PRETTY_FUNCTION__)
; }))
;
991 const int old_rnum = parameter_indices->rnum;
992 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
993 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
994 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 994, __extension__ __PRETTY_FUNCTION__)
; }))
;
995 if (param_ref >= 0)
996 {
997 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__)
; }))
;
998 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
999 parameter_indices->rnum = old_rnum + 1;
1000 }
1001 }
1002 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1003 // We may have duplicated indices, but that is OK, we will set it twice.
1004 for (j = 0; j < parameter_indices->rnum; j++)
1005 {
1006 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1007 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1007, __extension__ __PRETTY_FUNCTION__
); }))
;
1008 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1009 flag = 1;
1010 }
1011 ccv_array_clear(parameter_indices);
1012 }
1013 ccv_array_free(parameter_indices);
1014 return flag;
1015}
1016
1017static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1018{
1019 if (new_saved_aux_size == old_saved_aux_size)
1020 return;
1021 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1021, __extension__ __PRETTY_FUNCTION__
); }))
;
1022 int i, j;
1023 for (i = parameter_size - 1; i >= 0; i--)
1024 {
1025 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1026 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1027 for (j = old_saved_aux_size - 1; j >= 0; j--)
1028 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1029 }
1030}
1031
1032static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1033{
1034 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1035 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1035, __extension__ __PRETTY_FUNCTION__); }))
;
1036 if (!compiled_data->rewindables)
1037 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1038 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1039 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1040 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1041}
1042
1043static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1044{
1045 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1046 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1046, __extension__ __PRETTY_FUNCTION__
); }))
;
1047 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1047, __extension__ __PRETTY_FUNCTION__
); }))
;
1048 const int evaluate_to_size = compiled_data->evaluate.to_size;
1049 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__
); }))
;
1050 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1051 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1052 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1053 int i, j;
1054 const int output_size = model->output_size;
1055 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1055, __extension__ __PRETTY_FUNCTION__
); }))
;
1056 if (fits)
1057 for (i = 0; i < output_size; i++)
1058 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1059 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1060 const int parameter_size = compiled_data->parameters->rnum;
1061 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1062 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1063 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1064 int parameter_size_maybe_more = parameter_size;
1065 compiled_data->disable_outgrad = disable_outgrad;
1066 int outgrad_size;
1067 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1068 outgrad_size = 0;
1069 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1070 outgrad_size = model->input_size;
1071 else {
1072 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1072, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1073 outgrad_size = 0;
1074 for (i = 0; i < model->input_size; i++)
1075 if (!(disable_outgrad & ((uint64_t)1 << i)))
1076 ++outgrad_size;
1077 }
1078 compiled_data->outgrad_size = outgrad_size;
1079 parameter_size_maybe_more += outgrad_size;
1080 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1081 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1082 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1083 compiled_data->backward.to_size = parameter_size_maybe_more;
1084 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1085 if (compiled_data->parameter_flags)
1086 {
1087 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1088 for (i = 0; i < parameter_size; i++)
1089 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1090 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1091 else
1092 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1093 }
1094 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1095 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1096 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1097 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1098 else { // Compute minimize with gradients including selected inputs.
1099 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1099, __extension__ __PRETTY_FUNCTION__
); }))
;
1100 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1100, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1101 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1101, __extension__ __PRETTY_FUNCTION__
); }))
;
1102 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1103 j = 0;
1104 for (i = 0; i < model->input_size; i++)
1105 if (!(disable_outgrad & ((uint64_t)1 << i)))
1106 outgrads[j++] = model->inputs[i];
1107 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1108 }
1109 if (compiled_data->parameter_flags)
1110 ccfreefree(parameters);
1111 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1112 if (compiled_data->minimize.parameters)
1113 _ccv_cnnp_apply_parameters_with_minimizer(model);
1114 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1115 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1116 for (i = 0; i < output_size; i++)
1117 {
1118 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1119 // Init this to 1 so we can backprop.
1120 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1121 }
1122 compiled_data->backward.to_size = 0;
1123 for (i = 0; i < parameter_size_maybe_more; i++)
1124 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1125 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1126 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1127 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1128 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1129 {
1130 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1131 continue;
1132 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1133 const int* tos;
1134 int to_size;
1135 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1136 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1137 {
1138 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1139 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1140 int flag = 0;
1141 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1142 for (j = i - 1; !flag && j >= 0; j--)
1143 if (j + outgrad_destination_start < destination_count)
1144 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1145 if (!flag) // Only if we cannot find it, we add it.
1146 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1147 }
1148 }
1149 if (parallel_count > 1)
1150 {
1151 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1152 0, 0,
1153 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1154 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1155 0, 0, 0,
1156 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1157 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1158 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1159 for (i = 0; i < evaluate_to_size; i++)
1160 for (j = 1; j < parallel_count; j++)
1161 {
1162 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1163 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1164 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1165 }
1166 const int backward_to_size = compiled_data->backward.to_size;
1167 for (i = 0; i < backward_to_size; i++)
1168 for (j = 1; j < parallel_count; j++)
1169 {
1170 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1171 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1172 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1173 }
1174 }
1175 // Only use memory compression if we are in gradient parameter mode.
1176 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1177 {
1178 if (model->memory_compression)
1179 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1180 if (model->memory_reduction)
1181 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1182 }
1183 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1184 compiled_data->gradient_mode = gradient_mode;
1185}
1186
1187void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1188{
1189 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1189, __extension__ __PRETTY_FUNCTION__
); }))
;
1190 const int parameter_size = compiled_data->parameters->rnum;
1191 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1192 const int internal_size = compiled_data->internals->rnum;
1193 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1194 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1195 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1196 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1197}
1198
1199int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1200{
1201 int i, j;
1202 const int parameter_size = compiled_data->parameters->rnum;
1203 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1204 const int internal_size = compiled_data->internals->rnum;
1205 for (i = 0; i < parameter_size; i++)
1206 {
1207 // parameters has to be allocated all together.
1208 if (compiled_data->tensors.parameters[i])
1209 {
1210 for (j = 1; j < parallel_count; j++)
1211 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1211, __extension__ __PRETTY_FUNCTION__
); }))
; }
1212 continue;
1213 }
1214 return 1;
1215 }
1216 for (i = 0; i < internal_size; i++)
1217 {
1218 if (!compiled_data->tensors.internals[i])
1219 return 1;
1220 for (j = 1; j < parallel_count; j++)
1221 if (!compiled_data->tensors.internals[i + j * internal_size])
1222 return 1;
1223 }
1224 return 0;
1225}
1226
1227void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1228{
1229 int i, j;
1230 const int parameter_size = compiled_data->parameters->rnum;
1231 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1232 const int internal_size = compiled_data->internals->rnum;
1233 for (i = 0; i < parameter_size; i++)
1234 {
1235 // parameters has to be allocated all together.
1236 if (compiled_data->tensors.parameters[i])
1237 {
1238 for (j = 1; j < parallel_count; j++)
1239 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1239, __extension__ __PRETTY_FUNCTION__
); }))
; }
1240 continue;
1241 }
1242 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1243 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1244 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1245 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1246 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1247 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1248 for (j = 1; j < parallel_count; j++)
1249 {
1250 if (j != device_id)
1251 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1252 else
1253 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1254 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1255 }
1256 }
1257 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1258 for (i = 0; i < internal_size; i++)
1259 {
1260 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1261 const int d = retained.d;
1262 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1263 continue;
1264 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1265 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1266 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1267 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1268 if (!compiled_data->tensors.internals[i])
1269 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1270 for (j = 1; j < parallel_count; j++)
1271 {
1272 if (j != device_id)
1273 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1274 else
1275 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1276 if (!compiled_data->tensors.internals[i + j * internal_size])
1277 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1278 }
1279 }
1280 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1281}
1282
1283static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1284{
1285 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1286 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1287}
1288
1289static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1290{
1291 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1291, __extension__ __PRETTY_FUNCTION__
); }))
;
1292 int i, j;
1293 for (i = 0; i < tensor_size; i++)
1294 {
1295 if (!tensors[i])
1296 continue;
1297 const int d = tensor_symbols[i].d;
1298 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1299 continue;
1300 for (j = 1; j < parallel_count; j++)
1301 if (tensors[i + j * tensor_size])
1302 {
1303 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1304 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1305 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1306 }
1307 }
1308}
1309
1310static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1311{
1312 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1312, __extension__ __PRETTY_FUNCTION__
); }))
;
1313 int i, j;
1314 for (i = 0; i < tensor_size; i++)
1315 {
1316 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1317 for (j = 1; j < parallel_count; j++)
1318 {
1319 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1320 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1321 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1322 { // We shouldn't allocate this, free it up.
1323 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1324 tensors[i + j * tensor_size] = 0;
1325 }
1326 }
1327 }
1328}
1329
1330static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1331{
1332 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1332, __extension__ __PRETTY_FUNCTION__
); }))
;
1333 int i, j;
1334 for (i = 0; i < tensor_size; i++)
1335 {
1336 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1337 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1338 continue;
1339 if (graph)
1340 {
1341 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1342 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1343 tensor_symbol = alias_to;
1344 }
1345 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1346 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1347 {
1348 const ccv_nnc_tensor_bind_t retained_bind = {
1349 .symbol = tensor_symbol,
1350 .tensor = tensor
1351 };
1352 ccv_array_push(tensor_binds, &retained_bind);
1353 }
1354 for (j = 1; j < parallel_count; j++)
1355 {
1356 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1357 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1358 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1359 {
1360 const ccv_nnc_tensor_bind_t bind = {
1361 .symbol = copy,
1362 .tensor = tensors[i + j * tensor_size]
1363 };
1364 ccv_array_push(tensor_binds, &bind);
1365 }
1366 }
1367 }
1368}
1369
1370static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1371{
1372 if (compiled_data->graph)
1373 ccv_nnc_graph_free(compiled_data->graph);
1374 compiled_data->graph = 0;
1375 compiled_data->is_test = 0;
1376 if (compiled_data->tensor_arena)
1377 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1378 compiled_data->tensor_arena = 0;
1379 if (compiled_data->graph_exec_arena)
1380 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1381 compiled_data->graph_exec_arena = 0;
1382 if (compiled_data->backward.from_ops)
1383 ccfreefree(compiled_data->backward.from_ops);
1384 compiled_data->backward.from_ops = 0;
1385 if (compiled_data->evaluate.schedule)
1386 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1387 compiled_data->evaluate.schedule = 0;
1388 if (compiled_data->backward.schedule)
1389 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1390 compiled_data->backward.schedule = 0;
1391}
1392
1393static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1394{
1395 if (compiled_data->gradients)
1396 ccfreefree(compiled_data->gradients);
1397 compiled_data->gradients = 0;
1398 if (compiled_data->updated_parameters)
1399 ccfreefree(compiled_data->updated_parameters);
1400 compiled_data->updated_parameters = 0;
1401 compiled_data->update_nodes = 0;
1402 compiled_data->saved_aux = 0;
1403}
1404
1405static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1406{
1407 if (compiled_data->backward.gradients)
1408 ccfreefree(compiled_data->backward.gradients);
1409 compiled_data->backward.gradients = 0;
1410 if (compiled_data->backward.accum)
1411 ccv_nnc_graph_free(compiled_data->backward.accum);
1412 compiled_data->backward.accum = 0;
1413 if (compiled_data->backward.tensor_arena)
1414 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1415 compiled_data->backward.tensor_arena = 0;
1416 if (compiled_data->backward.graph_exec_arena)
1417 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1418 compiled_data->backward.graph_exec_arena = 0;
1419}
1420
1421static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1422{
1423 if (compiled_data->apply_gradients.graph)
1424 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1425 compiled_data->apply_gradients.graph = 0;
1426 if (compiled_data->apply_gradients.tensor_arena)
1427 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1428 compiled_data->apply_gradients.tensor_arena = 0;
1429 if (compiled_data->apply_gradients.graph_exec_arena)
1430 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1431 compiled_data->apply_gradients.graph_exec_arena = 0;
1432}
1433
1434// Compile the graph to run ccv_cnnp_model_fit
1435static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1436{
1437 int i, j;
1438 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1439 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1439, __extension__ __PRETTY_FUNCTION__
); }))
;
1440 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1441 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1442 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1442, __extension__ __PRETTY_FUNCTION__
); }))
;
1443 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1443
, __extension__ __PRETTY_FUNCTION__); }))
;
1444 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1444, __extension__ __PRETTY_FUNCTION__
); }))
;
1445 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1446 {
1447 _ccv_cnnp_model_set_rewindables(model);
1448 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1449 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1450 _ccv_cnnp_model_rewind_graph(model);
1451 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1452 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1453 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1454 }
1455 const int tensors_init = !!compiled_data->tensors_init.v;
1456 if (!tensors_init)
1457 _ccv_cnnp_model_tensors_init(model, compiled_data);
1458 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1459 // Check if it is not fully allocated, if it is not, init_1.
1460 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1461 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1462 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1462, __extension__ __PRETTY_FUNCTION__); }))
;
1463 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1463, __extension__ __PRETTY_FUNCTION__); }))
;
1464 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1464
, __extension__ __PRETTY_FUNCTION__); }))
;
1465 const int input_size_per_p = input_size / parallel_count;
1466 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1467 const int output_size_per_p = output_size / parallel_count;
1468 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1469 const int fit_size_per_p = fit_size / parallel_count;
1470 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1471 const int parameter_size = compiled_data->parameters->rnum;
1472 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1473 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1474 const int internal_size = compiled_data->internals->rnum;
1475 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1476 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1477 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1478 ccv_array_free(tensor_binds);
1479 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1480 if (tensors_init && parallel_count > 1)
1481 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1482 // If tensor is not init'ed, we need to init states first.
1483 if (_ccv_cnnp_any_to_init(compiled_data))
1484 {
1485 ccv_nnc_tensor_init_states_t tensor_init_states = {
1486 .parallel_count = parallel_count,
1487 .graph = model->graph,
1488 .compiled_data = compiled_data,
1489 .tensor_arena = compiled_data->tensor_arena
1490 };
1491 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1492 }
1493 compiled_data->is_test = 0;
1494 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1495 // No need to set because it is default to training mode.
1496 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1497 for (i = 0; i < saved_aux_size * parameter_size; i++)
1498 {
1499 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1500 continue;
1501 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1502 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1503 for (j = 1; j < parallel_count; j++)
1504 {
1505 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1506 if (copy)
1507 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1508 }
1509 }
1510 const int evaluate_to_size = compiled_data->evaluate.to_size;
1511 compiled_data->evaluate.to_op_size = 0;
1512 for (i = 0; i < evaluate_to_size; i++)
1513 {
1514 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1515 if (to.graph)
1516 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1517 }
1518 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1519 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1520}
1521
1522ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1523{
1524 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1525 if (!compiled_data || !compiled_data->graph)
1526 return 0;
1527 return ccv_nnc_graph_default_stream(compiled_data->graph);
1528}
1529
1530uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1531{
1532 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1533 if (!compiled_data || !compiled_data->tensor_arena)
1534 return 0;
1535 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1536}
1537
1538static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1539{
1540 int i, j;
1541 for (i = 0; i < tensor_size; i++)
1542 {
1543 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1544 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1545 continue;
1546 if (graph)
1547 {
1548 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1549 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1550 tensor_symbol = alias_to;
1551 }
1552 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1553 for (j = 1; j < parallel_count; j++)
1554 {
1555 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1556 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1557 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1558 }
1559 }
1560}
1561
1562void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1563{
1564 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1565 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1565, __extension__ __PRETTY_FUNCTION__); }))
;
1566 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1567 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1567, __extension__ __PRETTY_FUNCTION__
); }))
;
1568 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1568, __extension__ __PRETTY_FUNCTION__
); }))
;
1569 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1569
, __extension__ __PRETTY_FUNCTION__); }))
;
1570 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1570, __extension__ __PRETTY_FUNCTION__); }))
;
1571 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1572 {
1573 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1574 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1575 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1576 // Compile the symbolic graph down only when needed.
1577 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1578 } else {
1579 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1579, __extension__ __PRETTY_FUNCTION__); }))
;
1580 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1580, __extension__ __PRETTY_FUNCTION__); }))
;
1581 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1581
, __extension__ __PRETTY_FUNCTION__); }))
;
1582 const int input_size_per_p = input_size / parallel_count;
1583 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1584 const int output_size_per_p = output_size / parallel_count;
1585 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1586 const int fit_size_per_p = fit_size / parallel_count;
1587 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1588 }
1589 if (compiled_data->is_test)
1590 {
1591 compiled_data->is_test = 0;
1592 ccv_nnc_graph_exec_update_t update = {
1593 .parallel_count = parallel_count,
1594 .graph = model->graph,
1595 .graph_exec_arena = compiled_data->graph_exec_arena,
1596 };
1597 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1598 }
1599 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1600}
1601
1602// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1603static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1604{
1605 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1606 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1607 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1608 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1608, __extension__ __PRETTY_FUNCTION__
); }))
;
1609 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1609, __extension__ __PRETTY_FUNCTION__
); }))
;
1610 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1611 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1612 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1613 {
1614 const int evaluate_to_size = compiled_data->evaluate.to_size;
1615 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1616 _ccv_cnnp_model_set_rewindables(model);
1617 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1618 0, 0,
1619 0, 0, 0,
1620 0, 0, 0,
1621 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1622 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1623 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1624 int i, j;
1625 for (i = 0; i < evaluate_to_size; i++)
1626 for (j = 1; j < parallel_count; j++)
1627 {
1628 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1629 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1630 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1631 }
1632 }
1633 const int tensors_init = !!compiled_data->tensors_init.v;
1634 if (!tensors_init)
1635 _ccv_cnnp_model_tensors_init(model, compiled_data);
1636 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1637 // Check if it is not fully allocated, if it is not, init_1.
1638 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1639 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1640 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1640, __extension__ __PRETTY_FUNCTION__); }))
;
1641 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1641, __extension__ __PRETTY_FUNCTION__); }))
;
1642 const int input_size_per_p = input_size / parallel_count;
1643 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1644 const int output_size_per_p = output_size / parallel_count;
1645 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1646 const int parameter_size = compiled_data->parameters->rnum;
1647 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1648 const int internal_size = compiled_data->internals->rnum;
1649 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1650 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1651 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1652 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1653 ccv_array_free(tensor_binds);
1654 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1655 // If tensor is not init'ed, we need to init states first.
1656 if (tensors_init && parallel_count > 1)
1657 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1658 if (_ccv_cnnp_any_to_init(compiled_data))
1659 {
1660 ccv_nnc_tensor_init_states_t tensor_init_states = {
1661 .parallel_count = parallel_count,
1662 .graph = model->graph,
1663 .compiled_data = compiled_data,
1664 .tensor_arena = compiled_data->tensor_arena
1665 };
1666 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1667 }
1668 compiled_data->is_test = 1;
1669 ccv_nnc_graph_exec_update_t update = {
1670 .parallel_count = parallel_count,
1671 .graph = model->graph,
1672 .graph_exec_arena = compiled_data->graph_exec_arena,
1673 };
1674 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1675 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1676 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1677}
1678
1679static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1680{
1681 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1681, __extension__ __PRETTY_FUNCTION__
); }))
;
1682 const int parameter_size = compiled_data->parameters->rnum;
1683 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1684 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1685 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1686 int i, j;
1687 for (i = 0; i < parameter_size; i++)
1688 {
1689 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1690 {
1691 compiled_data->tensors.gradients[i] = 0;
1692 compiled_data->tensors.accum_gradients[i] = 0;
1693 for (j = 1; j < parallel_count; j++)
1694 {
1695 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1696 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1697 }
1698 continue;
1699 }
1700 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1701 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1702 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1703 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1704 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1705 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1706 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1707 for (j = 1; j < parallel_count; j++)
1708 {
1709 if (j != device_id)
1710 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1711 else
1712 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1713 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1714 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1715 }
1716 }
1717}
1718
1719static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1720{
1721 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1722 return 1;
1723 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1724 return 0;
1725 int i;
1726 for (i = 0; i < input_size; i++)
1727 if (!(disable_outgrad & ((uint64_t)1 << i)))
1728 return 0;
1729 return 1;
1730}
1731
1732// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1733// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1734static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1735{
1736 int i, j;
1737 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1738 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1739 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1739, __extension__ __PRETTY_FUNCTION__
); }))
;
1740 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1741 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1742 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1742, __extension__ __PRETTY_FUNCTION__
); }))
;
1743 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1743, __extension__ __PRETTY_FUNCTION__
); }))
;
1744 // There shouldn't be a loss function if we evaluate with multistage jit.
1745 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1745, __extension__ __PRETTY_FUNCTION__
); }))
;
1746 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1747 {
1748 _ccv_cnnp_model_set_rewindables(model);
1749 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1750 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1751 _ccv_cnnp_model_rewind_graph(model);
1752 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1753 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1754 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1755 }
1756 const int tensors_init = !!compiled_data->tensors_init.v;
1757 if (!tensors_init)
1758 _ccv_cnnp_model_tensors_init(model, compiled_data);
1759 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1760 // Check if it is not fully allocated, if it is not, init_1.
1761 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1762 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1763 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1763, __extension__ __PRETTY_FUNCTION__); }))
;
1764 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1764, __extension__ __PRETTY_FUNCTION__); }))
;
1765 const int input_size_per_p = input_size / parallel_count;
1766 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1767 const int output_size_per_p = output_size / parallel_count;
1768 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1769 const int parameter_size = compiled_data->parameters->rnum;
1770 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1771 const int internal_size = compiled_data->internals->rnum;
1772 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1773 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1774 if (!compiled_data->tensors.gradients)
1775 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1776 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1777 if (compiled_data->backward.to_size > 0)
1778 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1779 else
1780 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1781 ccv_array_free(tensor_binds);
1782 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1783 if (tensors_init && parallel_count > 1)
1784 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1785 // If tensor is not init'ed, we need to init states first.
1786 if (_ccv_cnnp_any_to_init(compiled_data))
1787 {
1788 ccv_nnc_tensor_init_states_t tensor_init_states = {
1789 .parallel_count = parallel_count,
1790 .graph = model->graph,
1791 .compiled_data = compiled_data,
1792 .tensor_arena = compiled_data->tensor_arena
1793 };
1794 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1795 }
1796 compiled_data->is_test = is_test;
1797 ccv_nnc_graph_exec_update_t update = {
1798 .parallel_count = parallel_count,
1799 .graph = model->graph,
1800 .graph_exec_arena = compiled_data->graph_exec_arena,
1801 };
1802 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1803 const int evaluate_to_size = compiled_data->evaluate.to_size;
1804 compiled_data->evaluate.to_op_size = 0;
1805 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1806 for (i = 0; i < evaluate_to_size; i++)
1807 {
1808 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1809 if (to_op.graph)
1810 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1811 const int* tos;
1812 int to_size;
1813 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1814 for (j = 0; j < to_size; j++)
1815 {
1816 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1817 .d = tos[j],
1818 .graph = model->graph
1819 });
1820 if (to_op.graph)
1821 ccv_array_add_unique_int(backward_from, to_op.d);
1822 }
1823 }
1824 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1824, __extension__
__PRETTY_FUNCTION__); }))
;
1825 compiled_data->backward.from_op_size = backward_from->rnum;
1826 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1827 for (i = 0; i < backward_from->rnum; i++)
1828 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1829 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1830 .graph = compiled_data->graph,
1831 };
1832 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1833 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1834 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1835 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1836 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1837 const int source_size = compiled_data->graph->sources->rnum;
1838 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1838, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1839 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1840 visited[(idx >> 5)] |= (1u << (idx & 31));
1841 } ccv_nnc_graph_visit_endfor} }
1842 ccv_nnc_graph_visit_free(visit);
1843 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1844 const int destination_size = compiled_data->graph->destinations->rnum;
1845 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1845, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1846 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1847 visited[(idx >> 5)] |= (1u << (idx & 31));
1848 } ccv_nnc_graph_visit_endfor} }
1849 ccv_nnc_graph_visit_free(visit);
1850 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1851 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1852 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1853 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1854 {
1855 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1855, __extension__ __PRETTY_FUNCTION__
); }))
;
1856 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1857 ccv_array_add_unique_int(backward_from, idx);
1858 }
1859 } ccv_nnc_graph_visit_endfor} }
1860 ccv_nnc_graph_visit_free(visit);
1861 ccfreefree(visited);
1862 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1863 {
1864 compiled_data->backward.from_op_size = backward_from->rnum;
1865 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1866 for (i = 0; i < backward_from->rnum; i++)
1867 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1868 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1869 .graph = compiled_data->graph,
1870 };
1871 }
1872 ccv_array_free(backward_from);
1873 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1874 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1875}
1876
1877void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1878{
1879 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1880 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1880, __extension__ __PRETTY_FUNCTION__); }))
;
1881 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1882 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1882, __extension__ __PRETTY_FUNCTION__
); }))
;
1883 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1883, __extension__ __PRETTY_FUNCTION__
); }))
;
1884 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1884, __extension__ __PRETTY_FUNCTION__); }))
;
1885 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1886 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1887 if (!compiled_data->graph || mode_mismatch)
1888 {
1889 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1890 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1891 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1892 if (params.requires_grad)
1893 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1894 else
1895 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1896 } else {
1897 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1898 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1898, __extension__ __PRETTY_FUNCTION__); }))
;
1899 const int input_size_per_p = input_size / parallel_count;
1900 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1901 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1901, __extension__ __PRETTY_FUNCTION__); }))
;
1902 const int output_size_per_p = output_size / parallel_count;
1903 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1904 }
1905 if (compiled_data->is_test != params.is_test)
1906 {
1907 compiled_data->is_test = params.is_test;
1908 ccv_nnc_graph_exec_update_t update = {
1909 .parallel_count = parallel_count,
1910 .graph = model->graph,
1911 .graph_exec_arena = compiled_data->graph_exec_arena,
1912 };
1913 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1914 }
1915}
1916
1917void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1918{
1919 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1920 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1920, __extension__ __PRETTY_FUNCTION__); }))
;
1921 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1922 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1923 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1924 else {
1925 if (!compiled_data->evaluate.schedule)
1926 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1927 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1928 }
1929}
1930
1931// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1932// Particularly, this method compiles the accumulator graph.
1933static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1934{
1935 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1936 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1936, __extension__ __PRETTY_FUNCTION__); }))
;
1937 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1937, __extension__ __PRETTY_FUNCTION__
); }))
;
1938 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1939 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1940 const int parameter_size = compiled_data->parameters->rnum;
1941 int i, j;
1942 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1943 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1944 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1945 for (i = 0; i < parameter_size; i++)
1946 for (j = 0; j < parallel_count; j++)
1947 if (compiled_data->tensors.gradients[i + j * parameter_size])
1948 {
1949 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1950 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1951 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1952 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1953 ccv_nnc_tensor_symbol_t inputs[2];
1954 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1955 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1956 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1957 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1958 } else {
1959 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1960 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1961 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1962 }
1963 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1964 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1965 {
1966 ccv_nnc_symbolic_graph_free(accum);
1967 // Create empty graph.
1968 compiled_data->backward.accum = ccv_nnc_graph_new();
1969 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1970 return;
1971 }
1972 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1973 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1974 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1975 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1976 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1977 ccv_nnc_symbolic_graph_free(accum);
1978 ccv_array_free(tensor_binds);
1979 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1980}
1981
1982void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1983{
1984 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1985 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1985, __extension__ __PRETTY_FUNCTION__); }))
;
1986 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
;
1987 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1988 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1988, __extension__ __PRETTY_FUNCTION__
); }))
;
1989 if (outgrad_size > 0)
1990 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1990, __extension__ __PRETTY_FUNCTION__
); }))
; }
1991 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1991, __extension__ __PRETTY_FUNCTION__); }))
;
1992 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1992, __extension__ __PRETTY_FUNCTION__
); }))
;
1993 const int parameter_size = compiled_data->parameters->rnum;
1994 // If we need to accumulate the gradients now, do jit on accumulator.
1995 if (compiled_data->backward.count > 0)
1996 {
1997 if (!compiled_data->backward.accum)
1998 _ccv_cnnp_model_multistage_jit_1(model);
1999 else if (compiled_data->backward.count == 1) {
2000 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2001 int i;
2002 for (i = 0; i < parameter_size * parallel_count; i++)
2003 {
2004 ccv_nnc_tensor_t* tensor;
2005 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2006 }
2007 if (compiled_data->backward.tensor_arena)
2008 {
2009 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2010 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2011 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2012 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2013 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2014 }
2015 }
2016 }
2017 const int ingrad_size_per_p = model->output_size;
2018 const int outgrad_size_per_p = compiled_data->outgrad_size;
2019 int i, j;
2020 for (i = 0; i < ingrad_size_per_p; i++)
2021 {
2022 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2023 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2024 {
2025 // Set it to 1 if it is not specified.
2026 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2027 if (ingrad_tensor)
2028 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2029 for (j = 1; j < parallel_count; j++)
2030 {
2031 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2032 if (ingrad_tensor)
2033 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2034 }
2035 } else {
2036 // Make sure the length matches, in case it is an alias.
2037 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2037, __extension__ __PRETTY_FUNCTION__
); }))
;
2038 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2039 for (j = 1; j < parallel_count; j++)
2040 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2041 }
2042 }
2043 if (outgrad_size > 0)
2044 {
2045 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2045, __extension__ __PRETTY_FUNCTION__
); }))
;
2046 for (i = 0; i < outgrad_size_per_p; i++)
2047 if (outgrads[i])
2048 {
2049 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2050 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2051 for (j = 1; j < parallel_count; j++)
2052 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2053 }
2054 } else {
2055 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2056, __extension__ __PRETTY_FUNCTION__
); }))
2056 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2056, __extension__ __PRETTY_FUNCTION__
); }))
;
2057 }
2058 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2059 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2060 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2061 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2062 if (!compiled_data->backward.schedule)
2063 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2064 // Run the backward pass.
2065 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2066 // If we need to run accumulation round, do that now.
2067 if (compiled_data->backward.count > 0)
2068 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2069 // Update the count, this determines whether we need to accumulate or not.
2070 ++compiled_data->backward.count;
2071}
2072
2073// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2074// Particularly, this method compiles the parameter update graph.
2075static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2076{
2077 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2078 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2078, __extension__ __PRETTY_FUNCTION__
); }))
;
2079 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2080 const int parameter_size = compiled_data->parameters->rnum;
2081 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2082 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2083 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2084 // Bind accumulated gradients.
2085 if (compiled_data->backward.count > 1)
2086 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2087 else
2088 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2089 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2090 int i, j;
2091 for (i = 0; i < compiled_data->backward.to_size; i++)
2092 {
2093 const int* tos;
2094 int to_size;
2095 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2096 for (j = 0; j < to_size; j++)
2097 {
2098 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2099 // gradients graph.
2100 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2101 .d = tos[j],
2102 .graph = model->graph,
2103 });
2104 if (!exec.graph)
2105 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2106 }
2107 }
2108 const int from_size = apply_gradients_from->rnum;
2109 if (from_size == 0)
2110 {
2111 ccv_array_free(apply_gradients_from);
2112 ccv_array_free(tensor_binds);
2113 return;
2114 }
2115 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2116 for (i = 0; i < from_size; i++)
2117 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2118 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2119 .graph = model->graph
2120 };
2121 ccv_array_free(apply_gradients_from);
2122 // It can only ends with updates on the parameters.
2123 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2124 for (i = 0; i < parameter_size; i++)
2125 {
2126 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2127 continue;
2128 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2129 for (j = 1; j < parallel_count; j++)
2130 {
2131 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2132 ccv_array_push(tos, &copy);
2133 }
2134 }
2135 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2136 ccv_array_free(tos);
2137 ccv_array_free(tensor_binds);
2138 ccfreefree(froms);
2139 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2140 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2141 {
2142 // Skip on no tensor.
2143 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2144 continue;
2145 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2146 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2147 for (j = 1; j < parallel_count; j++)
2148 {
2149 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2150 if (copy)
2151 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2152 }
2153 }
2154 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2155}
2156
2157void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2158{
2159 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2160 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2160, __extension__ __PRETTY_FUNCTION__); }))
;
2161 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2161, __extension__ __PRETTY_FUNCTION__
); }))
;
2162 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2163 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2163, __extension__ __PRETTY_FUNCTION__); }))
;
2164 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2164, __extension__ __PRETTY_FUNCTION__
); }))
;
2165 // Skip if there is no backward pass.
2166 if (compiled_data->backward.count <= 0)
2167 return;
2168 // Skip if there is no parameters.
2169 if (compiled_data->parameters->rnum == 0)
2170 {
2171 compiled_data->backward.count = 0;
2172 return;
2173 }
2174 if (!compiled_data->apply_gradients.graph)
2175 _ccv_cnnp_model_multistage_jit_2(model);
2176 else {
2177 const int parameter_size = compiled_data->parameters->rnum;
2178 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2179 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2180 if (compiled_data->backward.count > 1)
2181 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2182 else
2183 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2184 }
2185 if (compiled_data->apply_gradients.graph)
2186 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2187 // Reset backward count to 0.
2188 compiled_data->backward.count = 0;
2189}
2190
2191void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2192{
2193 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2194 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2195 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2195, __extension__ __PRETTY_FUNCTION__
); }))
;
2196 const int tensors_init = !!compiled_data->tensors_init.v;
2197 if (!tensors_init)
2198 _ccv_cnnp_model_tensors_init(model, compiled_data);
2199 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2200 // Check if it is not fully allocated, if it is not, init_1.
2201 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2202 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2203 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2204 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2205 if (param_ref < 0)
2206 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2206
, __extension__ __PRETTY_FUNCTION__); }))
; }
2207 else
2208 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2208, __extension__ __PRETTY_FUNCTION__
); }))
; }
2209 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2210 ccv_array_free(parameter_indices);
2211 const int parameter_size = compiled_data->parameters->rnum;
2212 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2212
, __extension__ __PRETTY_FUNCTION__); }))
;
2213 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2213, __extension__ __PRETTY_FUNCTION__
); }))
;
2214 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2215 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2216 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2216, __extension__
__PRETTY_FUNCTION__); }))
;
2217 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2218 int i;
2219 for (i = 1; i < parallel_count; i++)
2220 {
2221 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2222 if (copy_tensor)
2223 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2224 }
2225 // Mark this symbol as init'ed.
2226 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2227 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2228 init_v[s >> 5] |= (1u << (s & 0x1f));
2229}
2230
2231void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2232{
2233 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2234 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2235 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2235, __extension__ __PRETTY_FUNCTION__
); }))
;
2236 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2236, __extension__ __PRETTY_FUNCTION__
); }))
;
2237 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2238 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2239 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2240 if (param_ref < 0)
2241 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2241
, __extension__ __PRETTY_FUNCTION__); }))
; }
2242 else
2243 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2243, __extension__ __PRETTY_FUNCTION__
); }))
; }
2244 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2245 ccv_array_free(parameter_indices);
2246 const int parameter_size = compiled_data->parameters->rnum;
2247 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2247
, __extension__ __PRETTY_FUNCTION__); }))
;
2248 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2248, __extension__ __PRETTY_FUNCTION__
); }))
;
2249 // We don't need to consider parallel_count, every parameter on each device is identical.
2250 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2251 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2251, __extension__
__PRETTY_FUNCTION__); }))
;
2252 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2253}
2254
2255ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2256{
2257 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2258 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2259 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2259, __extension__ __PRETTY_FUNCTION__
); }))
;
2260 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2260, __extension__ __PRETTY_FUNCTION__
); }))
;
2261 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2262 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2263 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2264 if (param_ref < 0)
2265 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2265
, __extension__ __PRETTY_FUNCTION__); }))
; }
2266 else
2267 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2267, __extension__ __PRETTY_FUNCTION__
); }))
; }
2268 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2269 ccv_array_free(parameter_indices);
2270 const int parameter_size = compiled_data->parameters->rnum;
2271 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2271
, __extension__ __PRETTY_FUNCTION__); }))
;
2272 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2272, __extension__ __PRETTY_FUNCTION__
); }))
;
2273 // We don't need to consider parallel_count, every parameter on each device is identical.
2274 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2275 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2275, __extension__
__PRETTY_FUNCTION__); }))
;
2276 return tensor->info;
2277}
2278
2279const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2280{
2281 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2282 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2283 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2283, __extension__ __PRETTY_FUNCTION__
); }))
;
2284 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2285 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2286 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2287 if (param_ref < 0)
2288 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2288
, __extension__ __PRETTY_FUNCTION__); }))
; }
2289 else
2290 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2290, __extension__ __PRETTY_FUNCTION__
); }))
; }
2291 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2292 ccv_array_free(parameter_indices);
2293 const int parameter_size = compiled_data->parameters->rnum;
2294 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2294
, __extension__ __PRETTY_FUNCTION__); }))
;
2295 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2295, __extension__ __PRETTY_FUNCTION__
); }))
;
2296 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2297}
2298
2299int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2300{
2301 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2301, __extension__ __PRETTY_FUNCTION__
); }))
;
2302 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2303 return compiled_data->parameters->rnum;
2304}
2305
2306ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2307{
2308 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2309 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2309, __extension__ __PRETTY_FUNCTION__); }))
;
2310 const int parameter_size = compiled_data->parameters->rnum;
2311 int i;
2312 for (i = 0; i < parameter_size; i++)
2313 {
2314 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2315 if (first(model, name, context))
2316 return ccv_cnnp_model_parameters(model, -1, i);
2317 }
2318 return 0;
2319}
2320
2321ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2322{
2323 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2324 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2324, __extension__ __PRETTY_FUNCTION__); }))
;
2325 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2326 const int parameter_size = compiled_data->parameters->rnum;
2327 int i;
2328 for (i = 0; i < parameter_size; i++)
2329 {
2330 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2331 if (filter(model, name, context))
2332 {
2333 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2334 ccv_array_push(parameters, &parameter);
2335 }
2336 }
2337 return parameters;
2338
2339}
2340
2341CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2342{
2343 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2344 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2344, __extension__ __PRETTY_FUNCTION__); }))
;
2345 const int tensors_init = !!compiled_data->tensors_init.v;
2346 if (!tensors_init) // If nothing initialized, we return parameter 0.
2347 return ccv_cnnp_model_parameters(model, -1, 0);
2348 const int parameter_size = compiled_data->parameters->rnum;
2349 int i;
2350 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2351 for (i = 0; i < parameter_size; i++)
2352 {
2353 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2354 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2355 return ccv_cnnp_model_parameters(model, -1, i);
2356 }
2357 return 0;
2358}
2359
2360static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2361{
2362 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2363 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2363, __extension__
__PRETTY_FUNCTION__); }))
;
2364 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2365 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2366 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2367 return to_parameter_indices;
2368}
2369
2370static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2371{
2372 // If the model is not compiled yet. Compile them now.
2373 if (!model->graph)
2374 {
2375 model->graph = ccv_nnc_symbolic_graph_new();
2376 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2376, __extension__ __PRETTY_FUNCTION__
); }))
;
2377 const int input_size = from_model->input_size;
2378 ccv_nnc_tensor_param_t input_params[input_size];
2379 int i;
2380 for (i = 0; i < input_size; i++)
2381 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2382 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2383 model->parallel_count = from_model->parallel_count;
2384 model->memory_compression = from_model->memory_compression;
2385 model->memory_reduction = from_model->memory_reduction;
2386 model->gradient_checkpointing = from_model->gradient_checkpointing;
2387 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2388 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2389 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2390 }
2391 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2392 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2392, __extension__ __PRETTY_FUNCTION__
); }))
;
2393 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2394 if (!to_tensors_init)
2395 {
2396 if (only_init_0)
2397 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2398 else
2399 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2400 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2401 // Check if it is not fully allocated, if it is not, init_1.
2402 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2403 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2403, __extension__ __PRETTY_FUNCTION__
); }))
;
2404 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2405 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2406 if (*from_param_ref < 0 && *param_ref >= 0)
2407 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2407, __extension__ __PRETTY_FUNCTION__
); }))
; }
2408 else if (*from_param_ref >= 0)
2409 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2409, __extension__ __PRETTY_FUNCTION__
); }))
; }
2410 if (*param_ref < 0 && *from_param_ref >= 0)
2411 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2411, __extension__ __PRETTY_FUNCTION__); }))
; }
2412 else if (*param_ref >= 0)
2413 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2413, __extension__ __PRETTY_FUNCTION__
); }))
; }
2414}
2415
2416void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2417{
2418 ccv_array_t* to_parameter_indices;
2419 int to_param_ref;
2420 ccv_array_t* from_parameter_indices;
2421 int from_param_ref;
2422 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2423 // Should be exactly the same tensor.
2424 if (to_param_ref < 0 && from_param_ref < 0)
2425 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2425, __extension__ __PRETTY_FUNCTION__
); }))
; }
2426 // To models.
2427 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2428 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2428, __extension__ __PRETTY_FUNCTION__
); }))
;
2429 // From models.
2430 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2431 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2432 const int to_parameter_size = to_compiled_data->parameters->rnum;
2433 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2434 int i, j;
2435 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2436 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2437 for (i = 0; i < rnum; i++)
2438 {
2439 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2440 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2440, __extension__ __PRETTY_FUNCTION__); }))
;
2441 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2441, __extension__ __PRETTY_FUNCTION__
); }))
;
2442 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2443 // If the original is not init'ed. We cannot copy from.
2444 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2445 continue;
2446 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2447 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2447, __extension__ __PRETTY_FUNCTION__); }))
;
2448 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2448, __extension__ __PRETTY_FUNCTION__
); }))
;
2449 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2450 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2450, __extension__
__PRETTY_FUNCTION__); }))
;
2451 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2452 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2452, __extension__
__PRETTY_FUNCTION__); }))
;
2453 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2454 for (j = 1; j < parallel_count; j++)
2455 {
2456 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2457 if (copy_tensor)
2458 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2459 }
2460 // Mark this symbol as init'ed.
2461 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2462 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2463 }
2464 ccv_array_free(to_parameter_indices);
2465 ccv_array_free(from_parameter_indices);
2466}
2467
2468KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2468
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
The value 0 is assigned to 'i'
50
Assuming the condition is false
51
Taking false branch
52
Assuming the condition is false
53
1st function call argument is an uninitialized value
2469
2470void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2471{
2472 ccv_array_t* to_parameter_indices;
2473 int to_param_ref;
2474 ccv_array_t* from_parameter_indices;
2475 int from_param_ref;
2476 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2477 // Should be exactly the same tensor.
2478 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2479 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2479, __extension__ __PRETTY_FUNCTION__
); }))
; }
2480 // To models.
2481 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2482 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2482, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2483 // From models.
2484 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2485 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2486 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2486, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2487 const int from_parameter_size = from_compiled_data->parameters->rnum;
2488 const int to_parameter_size = to_compiled_data->parameters->rnum;
2489 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2490 int i, j;
2491 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2492 char* updated_name = 0;
2493 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2494 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2495 for (i = 0; i < rnum; i++)
2496 {
2497 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2498 // Need to figure out how to use the renamer here.
2499 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2500 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2500, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2501 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2501, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2502 if (renamer
18.1
'renamer' is non-null
)
2503 {
2504 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2505 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2506 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2507 updated_name = (char*)ccmallocmalloc(1024);
2508 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2509 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2510 memcpy(updated_name, src_name, src_name_len);
2511 updated_name[src_name_len] = 0;
2512 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2513 continue; // Skip this.
2514 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2515 {
2516 // Nothing changed.
2517 } else {
2518 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2519 {
2520 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2521 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
2522 {
2523 int ret;
2524 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
2525 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2525
, __extension__ __PRETTY_FUNCTION__); }))
;
2526 kh_val(id_map, k)((id_map)->vals[k]) = j;
2527 }
2528 }
2529 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2530 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2531 continue;
2532 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2533 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2533, __extension__ __PRETTY_FUNCTION__); }))
;
2534 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2534, __extension__
__PRETTY_FUNCTION__); }))
;
2535 }
2536 }
2537 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2537, __extension__ __PRETTY_FUNCTION__); }))
;
2538 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2538, __extension__
__PRETTY_FUNCTION__); }))
;
2539 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2540 // If the original is not init'ed. We cannot share from.
2541 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2542 continue;
2543 for (j = 0; j < parallel_count; j++)
2544 {
2545 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2546 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2546, __extension__
__PRETTY_FUNCTION__); }))
;
2547 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2548 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2549 ccv_nnc_tensor_free(dest);
2550 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2551 }
2552 // Mark this symbol as init'ed.
2553 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2554 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2555 }
2556 ccv_array_free(to_parameter_indices);
2557 ccv_array_free(from_parameter_indices);
2558 if (id_map)
2559 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2560 if (updated_name)
2561 ccfreefree(updated_name);
2562 // Mark it as incomplete so we will call init_1.
2563 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2564 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2565 else // Remove the flag.
2566 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2567}
2568
2569ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2570{
2571 if (!compiled_data->stream_map)
2572 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2573 int ret = 0;
2574 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2575 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2575, __extension__ __PRETTY_FUNCTION__); }))
;
2576 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2577 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2578 if (ret != 0)
2579 {
2580 stream = ccv_nnc_stream_context_new(type);
2581 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2582 }
2583 return stream;
2584}
2585
2586void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2587{
2588 ccv_array_t* to_parameter_indices;
2589 int to_param_ref;
2590 ccv_array_t* from_parameter_indices;
2591 int from_param_ref;
2592 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2593 // Should be exactly the same tensor.
2594 if (to_param_ref < 0 && from_param_ref < 0)
2595 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2595, __extension__ __PRETTY_FUNCTION__
); }))
; }
2596 // To models.
2597 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2598 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2598, __extension__ __PRETTY_FUNCTION__
); }))
;
2599 // From models.
2600 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2601 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2602 const int to_parameter_size = to_compiled_data->parameters->rnum;
2603 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2604 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2604, __extension__ __PRETTY_FUNCTION__
); }))
;
2605 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2605, __extension__ __PRETTY_FUNCTION__
); }))
;
2606 int i, j;
2607 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2608 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2609 for (i = 0; i < aux_in_size; i++)
2610 inputs[i + 2] = aux_ins[i];
2611 for (i = 0; i < aux_out_size; i++)
2612 outputs[i + 1] = aux_outs[i];
2613 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2614 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2615 for (i = 0; i < rnum; i++)
2616 {
2617 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2618 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2618, __extension__ __PRETTY_FUNCTION__); }))
;
2619 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2619, __extension__ __PRETTY_FUNCTION__
); }))
;
2620 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2621 // If the original is not init'ed. We cannot copy from.
2622 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2623 continue;
2624 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2625 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2625, __extension__ __PRETTY_FUNCTION__); }))
;
2626 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2626, __extension__ __PRETTY_FUNCTION__
); }))
;
2627 if (parallel_count > 1)
2628 {
2629 ccv_nnc_stream_context_t* streams[parallel_count];
2630 ccv_nnc_stream_signal_t* signal;
2631 if (stream_context)
2632 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2633 for (j = 0; j < parallel_count; j++)
2634 {
2635 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2636 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2637 if (!dest || !src)
2638 {
2639 streams[j] = 0;
2640 continue;
2641 }
2642 // At the moment, can only handle them on the same device.
2643 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2643, __extension__ __PRETTY_FUNCTION__
); }))
;
2644 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2644, __extension__ __PRETTY_FUNCTION__
); }))
;
2645 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2646 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2647 int type = stream_type;
2648 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2649 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2650 // Wait signal to finish.
2651 if (stream_context)
2652 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2653 inputs[0] = outputs[0] = dest;
2654 inputs[1] = src;
2655 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2656 if (stream_context)
2657 {
2658 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2659 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2660 }
2661 streams[j] = stream_0;
2662 }
2663 // If this should be blocking, blocking it.
2664 if (!stream_context)
2665 for (j = 0; j < parallel_count; j++)
2666 if (streams[j])
2667 ccv_nnc_stream_context_wait(streams[j]);
2668 } else {
2669 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2670 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2670, __extension__
__PRETTY_FUNCTION__); }))
;
2671 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2672 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2672, __extension__
__PRETTY_FUNCTION__); }))
;
2673 inputs[0] = outputs[0] = dest;
2674 inputs[1] = src;
2675 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2676 }
2677 // Mark this symbol as init'ed.
2678 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2679 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2680 }
2681 ccv_array_free(to_parameter_indices);
2682 ccv_array_free(from_parameter_indices);
2683}
2684
2685void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2686{
2687 int to_param_ref;
2688 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2689 // To models.
2690 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2691 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2691, __extension__ __PRETTY_FUNCTION__
); }))
;
2692 // Tensor has to be inited already.
2693 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2693, __extension__ __PRETTY_FUNCTION__
); }))
;
2694 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2694, __extension__ __PRETTY_FUNCTION__
); }))
;
2695 // From models.
2696 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2697 const int to_parameter_size = to_compiled_data->parameters->rnum;
2698 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2699 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2699, __extension__ __PRETTY_FUNCTION__
); }))
;
2700 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2700, __extension__ __PRETTY_FUNCTION__
); }))
;
2701 int i, j;
2702 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2703 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2704 for (i = 0; i < aux_in_size; i++)
2705 inputs[i + 1] = aux_ins[i];
2706 for (i = 0; i < aux_out_size; i++)
2707 outputs[i + 1] = aux_outs[i];
2708 for (i = 0; i < rnum; i++)
2709 {
2710 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2711 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2711, __extension__ __PRETTY_FUNCTION__); }))
;
2712 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2712, __extension__ __PRETTY_FUNCTION__
); }))
;
2713 if (parallel_count > 1)
2714 {
2715 ccv_nnc_stream_context_t* streams[parallel_count];
2716 ccv_nnc_stream_signal_t* signal;
2717 if (stream_context)
2718 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2719 for (j = 0; j < parallel_count; j++)
2720 {
2721 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2722 if (!dest)
2723 {
2724 streams[j] = 0;
2725 continue;
2726 }
2727 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2728 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2729 int type = stream_type;
2730 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2731 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2732 // Wait signal to finish.
2733 if (stream_context)
2734 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2735 inputs[0] = outputs[0] = dest;
2736 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2737 if (stream_context)
2738 {
2739 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2740 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2741 }
2742 streams[j] = stream_0;
2743 }
2744 // If this should be blocking, blocking it.
2745 if (!stream_context)
2746 for (j = 0; j < parallel_count; j++)
2747 if (streams[j])
2748 ccv_nnc_stream_context_wait(streams[j]);
2749 } else {
2750 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2751 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2751, __extension__
__PRETTY_FUNCTION__); }))
;
2752 inputs[0] = outputs[0] = dest;
2753 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2754 }
2755 // No need to mark this symbol as init'ed, it is already.
2756 }
2757 ccv_array_free(to_parameter_indices);
2758}
2759
2760void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2761{
2762 int to_param_ref;
2763 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2764 // To models.
2765 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2766 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2766, __extension__ __PRETTY_FUNCTION__
); }))
;
2767 // Tensor has to be inited already.
2768 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2768, __extension__ __PRETTY_FUNCTION__
); }))
;
2769 ccv_nnc_tensor_t** tensor_gradients;
2770 if (to_compiled_data->backward.count > 1)
2771 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2772 else
2773 tensor_gradients = to_compiled_data->tensors.gradients;
2774 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2774, __extension__ __PRETTY_FUNCTION__
); }))
;
2775 // From models.
2776 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2777 const int to_parameter_size = to_compiled_data->parameters->rnum;
2778 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2779 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2779, __extension__ __PRETTY_FUNCTION__
); }))
;
2780 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2780, __extension__ __PRETTY_FUNCTION__
); }))
;
2781 int i, j;
2782 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2783 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2784 for (i = 0; i < aux_in_size; i++)
2785 inputs[i + 1] = aux_ins[i];
2786 for (i = 0; i < aux_out_size; i++)
2787 outputs[i + 1] = aux_outs[i];
2788 for (i = 0; i < rnum; i++)
2789 {
2790 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2791 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2791, __extension__ __PRETTY_FUNCTION__); }))
;
2792 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2792, __extension__ __PRETTY_FUNCTION__
); }))
;
2793 if (parallel_count > 1)
2794 {
2795 ccv_nnc_stream_context_t* streams[parallel_count];
2796 ccv_nnc_stream_signal_t* signal;
2797 if (stream_context)
2798 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2799 for (j = 0; j < parallel_count; j++)
2800 {
2801 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2802 if (!dest)
2803 {
2804 streams[j] = 0;
2805 continue;
2806 }
2807 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2808 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2809 int type = stream_type;
2810 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2811 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2812 // Wait signal to finish.
2813 if (stream_context)
2814 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2815 inputs[0] = outputs[0] = dest;
2816 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2817 if (stream_context)
2818 {
2819 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2820 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2821 }
2822 streams[j] = stream_0;
2823 }
2824 // If this should be blocking, blocking it.
2825 if (!stream_context)
2826 for (j = 0; j < parallel_count; j++)
2827 if (streams[j])
2828 ccv_nnc_stream_context_wait(streams[j]);
2829 } else {
2830 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2831 if (!dest)
2832 continue;
2833 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2833, __extension__
__PRETTY_FUNCTION__); }))
;
2834 inputs[0] = outputs[0] = dest;
2835 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2836 }
2837 // No need to mark this symbol as init'ed, it is already.
2838 }
2839 ccv_array_free(to_parameter_indices);
2840}
2841
2842ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2843{
2844 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2845 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2845, __extension__ __PRETTY_FUNCTION__); }))
;
2846 return compiled_data->minimize.minimizer;
2847}
2848
2849void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2850{
2851 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2852 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2852, __extension__ __PRETTY_FUNCTION__); }))
;
2853 const int parameter_size = compiled_data->parameters->rnum;
2854 if (parameter_size == 0)
2855 return;
2856 if (reset)
2857 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2857, __extension__ __PRETTY_FUNCTION__
); }))
; }
2858 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2859 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2860 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2861 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2862 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2863 // We update all parameters, at this point, we have one minimizer.
2864 if (set_parameters == 0 || set_parameter_size == 0)
2865 compiled_data->minimize.minimizer = minimizer;
2866 int i;
2867 if (set_parameters && set_parameter_size)
2868 {
2869 // I need to save what's the minimizer along with this.
2870 if (!compiled_data->minimize.parameters)
2871 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2872 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2873 set_minimizer_for_parameter->minimizer = minimizer;
2874 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2875 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2876 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2877 }
2878 // If reset is true, clear the parameters array.
2879 if (reset && compiled_data->minimize.parameters)
2880 {
2881 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2882 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2883 ccv_array_clear(compiled_data->minimize.parameters);
2884 }
2885 if (!compiled_data->update_nodes)
2886 return;
2887 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2888 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2888, __extension__ __PRETTY_FUNCTION__); }))
;
2889 if (saved_aux_size > old_max_saved_aux_size)
2890 {
2891 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2891, __extension__ __PRETTY_FUNCTION__
); }))
;
2892 // Reallocate first, move them around later.
2893 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2894 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2895 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2896 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2897 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2898 }
2899 int flag = 0;
2900 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2901 if (set_parameters && set_parameter_size)
2902 {
2903 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2904 for (i = 0; i < set_parameter_size; i++)
2905 {
2906 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2907 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2907, __extension__ __PRETTY_FUNCTION__
); }))
;
2908 const int old_rnum = parameter_indices->rnum;
2909 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2910 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2911 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2911, __extension__ __PRETTY_FUNCTION__
); }))
;
2912 if (param_ref >= 0)
2913 {
2914 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2914, __extension__ __PRETTY_FUNCTION__
); }))
;
2915 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2916 parameter_indices->rnum = old_rnum + 1;
2917 }
2918 }
2919 // We may have duplicated indices, but that is OK, we will set it twice.
2920 for (i = 0; i < parameter_indices->rnum; i++)
2921 {
2922 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2923 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2924 flag = 1;
2925 }
2926 ccv_array_free(parameter_indices);
2927 } else {
2928 for (i = 0; i < parameter_size; i++)
2929 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2930 flag = 1;
2931 if (compiled_data->minimize.parameters)
2932 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2933 flag = 1;
2934 }
2935 if (flag)
2936 {
2937 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2938 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2939 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2940 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2941 }
2942}
2943
2944void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2945{
2946 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2947 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2947, __extension__ __PRETTY_FUNCTION__); }))
;
2948 compiled_data->compile_params = compile_params;
2949}
2950
2951void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2952{
2953 if (model->graph && out_size > 0)
2954 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2955 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2956 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2957 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2958 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2959 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2960 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2961}
2962
2963void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2964{
2965 if (model->graph)
2966 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2967}
2968
2969static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2970{
2971 int i;
2972 const int parameter_size = compiled_data->parameters->rnum;
2973 ccv_array_free(compiled_data->parameters);
2974 if (compiled_data->parameter_flags)
2975 ccfreefree(compiled_data->parameter_flags);
2976 const int internal_size = compiled_data->internals->rnum;
2977 ccv_array_free(compiled_data->internals);
2978 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2978, __extension__ __PRETTY_FUNCTION__
); }))
;
2979 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2979, __extension__ __PRETTY_FUNCTION__
); }))
;
2980 for (i = 0; i < parameter_size; i++)
2981 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2982 ccv_array_free(compiled_data->ids.parameters);
2983 for (i = 0; i < internal_size; i++)
2984 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2985 ccv_array_free(compiled_data->ids.internals);
2986 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2987 if (compiled_data->tensors.parameters)
2988 {
2989 for (i = 0; i < parameter_size * parallel_count; i++)
2990 // If it is not marked as not belonging, we can free it.
2991 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2992 if (compiled_data->tensors.parameters[i])
2993 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2994 for (i = 0; i < internal_size * parallel_count; i++)
2995 if (compiled_data->tensors.internals[i])
2996 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2997 ccfreefree(compiled_data->tensors.parameters);
2998 }
2999 if (compiled_data->tensors.gradients)
3000 {
3001 for (i = 0; i < parameter_size * parallel_count; i++)
3002 {
3003 if (compiled_data->tensors.gradients[i])
3004 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3005 if (compiled_data->tensors.accum_gradients[i])
3006 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3007 }
3008 ccfreefree(compiled_data->tensors.gradients);
3009 }
3010 if (compiled_data->minimize.parameters)
3011 {
3012 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3013 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3014 ccv_array_free(compiled_data->minimize.parameters);
3015 }
3016 if (compiled_data->rewindables)
3017 ccv_array_free(compiled_data->rewindables);
3018 if (compiled_data->tensors_init.v)
3019 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3020 if (compiled_data->evaluate.tos)
3021 ccfreefree(compiled_data->evaluate.tos);
3022 compiled_data->evaluate.tos = 0;
3023 if (compiled_data->stream_map)
3024 {
3025 khiter_t k;
3026 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3027 {
3028 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3029 continue;
3030 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3031 ccv_nnc_stream_context_free(stream);
3032 }
3033 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3034 }
3035 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3036 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3037 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3038 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3039 if (compiled_data->gradient_checkpoints)
3040 {
3041 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3042 {
3043 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3044 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3044, __extension__ __PRETTY_FUNCTION__
); }))
;
3045 ccfreefree(checkpoint->inputs);
3046 ccv_array_free(checkpoint->tensor_symbols);
3047 }
3048 ccv_array_free(compiled_data->gradient_checkpoints);
3049 }
3050 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3051 ccfreefree(compiled_data);
3052}
3053
3054void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3055{
3056 if (model->isa->deinit)
3057 model->isa->deinit(model);
3058 if (model->io)
3059 {
3060 int i;
3061 for (i = 0; i < model->io->rnum; i++)
3062 {
3063 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3064 if (model_io->outgoings)
3065 ccv_array_free(model_io->outgoings);
3066 if (model_io->incomings)
3067 ccv_array_free(model_io->incomings);
3068 if (model_io->dependencies)
3069 ccv_array_free(model_io->dependencies);
3070 ccfreefree(model_io);
3071 }
3072 ccv_array_free(model->io);
3073 }
3074 if (model->parameter_indices)
3075 ccv_array_free(model->parameter_indices);
3076 if (model->inputs)
3077 ccfreefree(model->inputs);
3078 if (model->graph)
3079 ccv_nnc_symbolic_graph_free(model->graph);
3080 if (model->compiled_data)
3081 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3082 if (model->name)
3083 ccfreefree(model->name);
3084 ccfreefree(model);
3085}
3086
3087void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3088{
3089 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3090 if (!compiled_data)
3091 return;
3092 if (compiled_data->graph)
3093 ccv_nnc_graph_cancel(compiled_data->graph);
3094 if (compiled_data->apply_gradients.graph)
3095 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3096}
3097
3098void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3099{
3100 model->exec_flags = flags;
3101}
3102
3103int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3104{
3105 return model->exec_flags;
3106}