Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2394, column 1
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2025-12-12-091359-782764-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7#ifdef HAVE_CUDA1
8#include "gpu/ccv_nnc_compat.h"
9#endif
10
11// MARK - Level-5 API
12
13ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
14{
15 if (!model->io)
16 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
17 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
18 model_io->param_ref = 0;
19 model_io->param_sel = 0;
20 model_io->visit = 0;
21 model_io->model = model;
22 model_io->dependencies = 0;
23 model_io->dependents = 0;
24 model_io->outgoings = 0;
25 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
26 ccv_array_push(model->io, &model_io);
27 if (input_size > 0)
28 {
29 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
30 ccv_array_resize(model_io->incomings, input_size);
31 int i;
32 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
33 for (i = 0; i < input_size; i++)
34 {
35 if (!inputs[i]->outgoings)
36 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
37 ccv_array_push(inputs[i]->outgoings, &model_io);
38 }
39 } else {
40 model_io->incomings = 0;
41 }
42 return model_io;
43}
44
45void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
46{
47 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 47, __extension__ __PRETTY_FUNCTION__);
}))
;
48 if (!model_io->dependencies)
49 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
50 int i, j;
51 for (i = 0; i < dependency_size; i++)
52 {
53 int flag = 0;
54 // Check if it is already exist or not.
55 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
56 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
57 flag = 1;
58 if (flag)
59 continue;
60 ccv_array_push(model_io->dependencies, dependencies + i);
61 ++dependencies[i]->dependents;
62 }
63}
64
65int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
66{
67 return model->output_size;
68}
69
70int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
71{
72 // If the model is compiled, it is default to 1 unless it is not.
73 if (model->compiled_data)
74 return model->is_trainable >= 0 ? model->is_trainable : 1;
75 return model->is_trainable;
76}
77
78ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
79{
80 if (!model->io)
81 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
82 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
83 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
84 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
85 model_io->visit = 0;
86 model_io->model = model;
87 model_io->outputs = 0;
88 model_io->dependencies = 0;
89 model_io->dependents = 0;
90 model_io->incomings = 0;
91 model_io->outgoings = 0;
92 ccv_array_push(model->io, &model_io);
93 return model_io;
94}
95
96void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
97{
98 model->notify_hook.func = func;
99 model->notify_hook.context = context;
100}
101
102void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
103{
104 if (model->notify_hook.func)
105 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
106 if (model->isa->notify)
107 model->isa->notify(model, tag, payload);
108}
109
110static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
111{
112 int i, j;
113 for (i = 0; i < graph_exec_symbol_size; i++)
114 {
115 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
116 // Check whether this tensor symbol has any duplicate.
117 for (j = i + 1; j < graph_exec_symbol_size;)
118 {
119 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
120 // If there is a same tensor symbol, remove it.
121 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
122 {
123 if (j + 1 < graph_exec_symbol_size)
124 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
125 --graph_exec_symbol_size;
126 continue;
127 }
128 ++j;
129 }
130 }
131 return graph_exec_symbol_size;
132}
133
134void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
135{
136 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
137 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
138 int i;
139 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
140 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
141 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
142 {
143 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
144 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
145 {
146 // Only add to parameter_indices if it is trainable.
147 if (add_to_array_context->add_parameter_indices)
148 ccv_array_add_unique_int(model->parameter_indices, i);
149 // Found it, return, don't add it.
150 return;
151 }
152 }
153 // Only add to parameter_indices if it is trainable.
154 if (add_to_array_context->add_parameter_indices)
155 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
156 // This is a new one, no need to add_unique_int, it is unique.
157 ccv_array_push(add_to_array_context->symbols, &symbol);
158 if (add_to_array_context->trainables)
159 ccv_array_push(add_to_array_context->trainables, &is_trainable);
160 char id[2048];
161 id[0] = add_to_array_context->prefix;
162 id[1] = '-';
163 int total_len = 2;
164 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
165 {
166 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
167 int len;
168 if (name->name && name->name[0] != '\0')
169 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
170 else
171 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
172 total_len += len;
173 if (total_len >= 2047)
174 break;
175 }
176 if (total_len < 2047)
177 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
178 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 178, __extension__ __PRETTY_FUNCTION__)
; }))
;
179 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
180 memcpy(heap_id, id, total_len + 1);
181 ccv_array_push(add_to_array_context->ids, &heap_id);
182 ++add_to_array_context->sequence->it;
183}
184
185static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
186{
187 compiled_data->f = compiled_data->fits + output_size;
188 compiled_data->xpu_alloc.mp_hdr = -1;
189 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
190 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
191 compiled_data->gradient_checkpoints = gradient_checkpoints;
192}
193
194typedef struct {
195 void* old_graph_exec_symbol_new_hook_context;
196 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
197 ccv_nnc_symbolic_graph_t* graph;
198 ccv_cnnp_model_build_data_t* build_data;
199} ccv_cnnp_model_set_exec_flags_context_t;
200
201static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
202{
203 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
204 if (flags_context->build_data->exec_flags)
205 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
206 if (flags_context->old_graph_exec_symbol_new_hook)
207 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
208}
209
210static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
211{
212 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 212, __extension__ __PRETTY_FUNCTION__); }))
;
213 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
214 int i;
215 for (i = 0; i < input_size; i++)
216 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
217 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
218 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
219 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
220 ccv_cnnp_model_sequence_t model_sequence = {
221 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
222 };
223 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
224 .add_parameter_indices = 1,
225 .prefix = 't',
226 .sequence = &model_sequence,
227 .symbols = parameters,
228 .ids = parameter_ids,
229 .trainables = parameter_trainables,
230 };
231 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
232 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
233 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
234 .add_parameter_indices = 0,
235 .prefix = 'r',
236 .sequence = &model_sequence,
237 .symbols = internals,
238 .ids = internal_ids,
239 .trainables = 0,
240 };
241 ccv_cnnp_model_build_data_t build_data = {
242 .exec_flags = 0,
243 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
244 .model_sequence = &model_sequence,
245 .add_to_array = ccv_cnnp_model_add_to_array,
246 .parameters = parameters,
247 .context = {
248 .add_to_parameter = &add_to_parameter_context,
249 .add_to_output = &add_to_output_context,
250 },
251 .gradient_checkpoints = 0,
252 };
253 model->data = &build_data;
254 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
255 .graph = model->graph,
256 .build_data = &build_data,
257 .old_graph_exec_symbol_new_hook = 0,
258 .old_graph_exec_symbol_new_hook_context = 0
259 };
260 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
261 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
262 // Reset back to previous hook.
263 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
264 for (i = 0; i < model->output_size; i++)
265 {
266 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
267 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
268 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
269 continue;
270 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
271 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
272 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
273 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
274 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
275 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
276 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
277 }
278 model->data = 0;
279 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
280 if (model_sequence.sequences)
281 ccv_array_free(model_sequence.sequences);
282 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
283 int not_trainables = 0;
284 // Assert no parameter is alias.
285 for (i = 0; i < parameters->rnum; i++)
286 {
287 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
288 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
289 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 289, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
290 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
291 not_trainables = 1;
292 }
293 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 293, __extension__ __PRETTY_FUNCTION__)
; }))
;
294 uint64_t* parameter_flags = 0;
295 if (not_trainables)
296 {
297 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
298 for (i = 0; i < parameter_trainables->rnum; i++)
299 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
300 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
301 }
302 ccv_array_free(parameter_trainables);
303 // Assert no internal is alias.
304 for (i = 0; i < internals->rnum; i++)
305 {
306 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
307 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
308 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 308, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
309 }
310 const int output_size = model->output_size;
311 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
312 const int parameters_rnum = parameters->rnum;
313 if (input_size > 0)
314 {
315 ccv_array_resize(parameters, parameters_rnum + input_size);
316 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
317 }
318 ccv_nnc_symbolic_graph_simplify(model->graph,
319 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
320 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
321 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
322 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
323 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
324 model->outputs, output_size,
325 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
326 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
327 // Size it down.
328 parameters->rnum = parameters_rnum;
329 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
330 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
331 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
332 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 332, __extension__ __PRETTY_FUNCTION__)
; }))
;
333 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
334 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
335 compiled_data->loss = loss;
336 if (loss.cmd == CCV_NNC_NOOP)
337 {
338 // If no loss function provided, there is no fits.
339 for (i = 0; i < output_size; i++)
340 {
341 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
342 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
343 if (alias_to.d < 0)
344 compiled_data->f[i] = model->outputs[i];
345 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
346 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
347 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
348 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
349 int j;
350 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
351 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 351, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
352 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
353 }
354 }
355 } else {
356 for (i = 0; i < output_size; i++)
357 {
358 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
359 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
360 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
361 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
362 }
363 }
364 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
365 ccv_nnc_symbolic_graph_simplify(model->graph,
366 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
367 0, 0, // No need to provide binds at this point.
368 compiled_data->f, model->output_size,
369 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
370 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
371 // If inputs are from GPU, stream type is GPU.
372 compiled_data->parameters = parameters;
373 compiled_data->parameter_flags = parameter_flags;
374 compiled_data->internals = internals;
375 compiled_data->ids.parameters = parameter_ids;
376 compiled_data->ids.internals = internal_ids;
377 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
378}
379
380static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
381{
382 ccv_array_t* const stack = (ccv_array_t*)context;
383 ccv_array_push(stack, &symbol.d);
384}
385
386static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
387{
388 const ccv_nnc_tensor_symbol_t src_symbol = {
389 .d = src_index,
390 .graph = src_graph
391 };
392 const ccv_nnc_tensor_symbol_t dest_symbol = {
393 .d = dest_index,
394 .graph = dest_graph
395 };
396 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
397 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
398 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
399 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
400 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
401 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
402}
403
404static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
405{
406 const ccv_nnc_tensor_symbol_t src_symbol = {
407 .d = src_index,
408 .graph = src_graph
409 };
410 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
411 const ccv_nnc_tensor_symbol_t dest_symbol = {
412 .d = dest_index,
413 .graph = dest_graph
414 };
415 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
416 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
417}
418
419static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
420static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
421
422typedef struct {
423 int parallel_count;
424 ccv_nnc_symbolic_graph_t* graph;
425 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
426} ccv_nnc_graph_exec_update_t;
427
428static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
429{
430 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
431 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
432 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
433 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
434 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
435 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
436 const int parallel_count = graph_exec_update->parallel_count;
437 int i;
438 for (i = 1; i < parallel_count; i++)
439 {
440 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
441 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
442 {
443 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
444 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
445 }
446 }
447}
448
449void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
450{
451 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 451, __extension__ __PRETTY_FUNCTION__); }))
;
452 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 452, __extension__ __PRETTY_FUNCTION__)
; }))
;
453 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 453, __extension__ __PRETTY_FUNCTION__); }))
;
454 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
455 init->graph = ccv_nnc_symbolic_graph_new();
456 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
457 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
458 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
459 init->parallel_count = model->parallel_count;
460 init->memory_compression = model->memory_compression;
461 init->memory_reduction = model->memory_reduction;
462 init->gradient_checkpointing = model->gradient_checkpointing;
463 init->compiled_data->stream_type = model->compiled_data->stream_type;
464 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
465 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
466 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
467 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
468 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
469 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
470 int i, j;
471 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
472 for (i = 0; i < compiled_data->parameters->rnum; i++)
473 {
474 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
475 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 475, __extension__ __PRETTY_FUNCTION__)
; }))
;
476 }
477 for (i = 0; i < compiled_data->internals->rnum; i++)
478 {
479 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
480 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 480, __extension__ __PRETTY_FUNCTION__)
; }))
;
481 }
482 // Update inputs.
483 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 483, __extension__ __PRETTY_FUNCTION__)
; }))
;
484 for (i = 0; i < model->input_size; i++)
485 if (model->inputs[i].d >= 0)
486 {
487 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 487, __extension__ __PRETTY_FUNCTION__)
; }))
;
488 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
489 }
490 // Update outputs.
491 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 491, __extension__ __PRETTY_FUNCTION__)
; }))
;
492 for (i = 0; i < model->output_size; i++)
493 {
494 if (model->outputs[i].d >= 0)
495 {
496 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 496, __extension__
__PRETTY_FUNCTION__); }))
;
497 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
498 }
499 if (model->outputs[i].d != model->compiled_data->f[i].d)
500 {
501 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__)
; }))
;
502 if (model->compiled_data->f[i].d >= 0)
503 {
504 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 504, __extension__ __PRETTY_FUNCTION__)
; }))
;
505 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
506 }
507 }
508 }
509 // Go through the graph to set tensor on matching symbols
510 for (i = 0; i < stack->rnum; i++)
511 {
512 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
513 // If exceed range, skip.
514 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
515 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
516 continue;
517 const ccv_nnc_graph_exec_symbol_t src_symbol = {
518 .d = d,
519 .graph = init->graph
520 };
521 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
522 .d = d,
523 .graph = model->graph
524 };
525 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
526 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
527 // If the name doesn't match, skip.
528 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
529 continue;
530 // Now get all the inputs and outputs, if matches, set them.
531 const int* src_inputs;
532 int src_input_size;
533 const int* src_outputs;
534 int src_output_size;
535 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
536 const int* dest_inputs;
537 int dest_input_size;
538 const int* dest_outputs;
539 int dest_output_size;
540 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
541 // We may have unmatched input / output size because this is the minimizer and it has
542 // different saved_aux (for example, when we shrunk with CMD_NOOP).
543 if (src_input_size != dest_input_size)
544 continue;
545 if (src_output_size != dest_output_size)
546 continue;
547 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
548 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
549 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
550 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
551 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
552 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
553 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
554 // a new exec symbol.
555 for (j = 0; j < src_input_size; j++)
556 if (src_inputs[j] >= 0)
557 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
558 for (j = 0; j < src_output_size; j++)
559 if (src_outputs[j] >= 0)
560 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
561 }
562 ccv_array_free(stack);
563 // After this, we get all tensors in the model graph resolved through tensor_auto.
564 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
565 // Verify symbols we get matches.
566 const int parameter_size = compiled_data->parameters->rnum;
567 for (i = 0; i < parameter_size; i++)
568 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 568, __extension__ __PRETTY_FUNCTION__)
; }))
; }
569 const int internal_size = compiled_data->internals->rnum;
570 for (i = 0; i < internal_size; i++)
571 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 571, __extension__ __PRETTY_FUNCTION__)
; }))
; }
572 // Go through compiled data.
573 if (compiled_data->tensor_arena)
574 {
575 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
576 if (flag == 0 && compiled_data->graph_exec_arena)
577 {
578 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
579 // Since we will reinit, if we previously set is_test, we need to set it again.
580 if (compiled_data->is_test)
581 {
582 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
583 ccv_nnc_graph_exec_update_t update = {
584 .parallel_count = parallel_count,
585 .graph = model->graph,
586 .graph_exec_arena = compiled_data->graph_exec_arena,
587 };
588 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
589 }
590 } else
591 // Free-up tensor arena & graph exec arena.
592 _ccv_cnnp_compiled_data_graph_free(compiled_data);
593 }
594 // There are other compiled graphs, for accum and apply gradients.
595 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
596 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
597 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
598 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
599 // That is why we don't update these compiled graphs at all this point.
600 // Free the model, we've already "absorbed" it.
601 ccv_cnnp_model_free(init);
602}
603
604void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
605{
606 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 606, __extension__ __PRETTY_FUNCTION__)
; }))
;
607 if (model->input_size == 0)
608 model->input_size = input_size;
609 if (!model->graph) // The graph is not compiled yet.
610 {
611 model->graph = ccv_nnc_symbolic_graph_new();
612 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
613 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 613, __extension__ __PRETTY_FUNCTION__)
; }))
;
614 int i, flag = 0;
615 for (i = 0; !flag && i < input_size; i++)
616 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
617 // If inputs are from GPU, stream type is GPU.
618 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
619 model->compiled_data->minimize.minimizer = minimizer;
620 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
621 } else {
622 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
623 // And then absorb the "new model" to the old one.
624 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
625 ccv_cnnp_model_absorb(model, init, inputs, input_size);
626 // Reset minimizer.
627 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
628 }
629}
630
631ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
632{
633 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
634 new_model->is_trainable = is_trainable;
635 return new_model;
636}
637
638void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
639{
640 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 640, __extension__ __PRETTY_FUNCTION__); }))
;
641 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 641, __extension__ __PRETTY_FUNCTION__)
; }))
;
642 ccv_nnc_symbolic_graph_t* const graph = model->graph;
643 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
644 int i;
645 for (i = 0; i < output_size; i++)
646 {
647 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 647, __extension__ __PRETTY_FUNCTION__)
; }))
;
648 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
649 }
650}
651
652void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
653{
654 if (workspace_size == model->workspace_size)
655 return;
656 model->workspace_size = workspace_size;
657 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
658 if (compiled_data && compiled_data->graph)
659 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
660}
661
662size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
663{
664 return model->workspace_size;
665}
666
667void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
668{
669 if (parallel == 0)
670 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
671 else
672 model->parallel_count = parallel;
673 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
674 if (compiled_data)
675 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 675, __extension__ __PRETTY_FUNCTION__)
; }))
; }
676}
677
678void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
679{
680 model->max_stream_count = max_stream_count;
681 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
682 if (compiled_data)
683 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 683, __extension__ __PRETTY_FUNCTION__)
; }))
; }
684}
685
686void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
687{
688 model->memory_compression = memory_compression;
689 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
690 if (compiled_data)
691 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 691, __extension__ __PRETTY_FUNCTION__)
; }))
; }
692}
693
694void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
695{
696 model->memory_reduction = memory_reduction;
697 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
698 if (compiled_data)
699 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 699, __extension__ __PRETTY_FUNCTION__)
; }))
; }
700}
701
702void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
703{
704 model->gradient_checkpointing = gradient_checkpointing;
705}
706
707int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
708{
709 return model->gradient_checkpointing;
710}
711
712typedef struct {
713 int parallel_count;
714 ccv_nnc_symbolic_graph_t* graph;
715 ccv_cnnp_compiled_data_t* compiled_data;
716 ccv_nnc_tensor_arena_t* tensor_arena;
717} ccv_nnc_tensor_init_states_t;
718
719static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
720{
721 int i;
722 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
723 for (i = 0; i < compiled_data->parameters->rnum; i++)
724 {
725 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
726 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
727 return 1;
728 }
729 for (i = 0; i < compiled_data->internals->rnum; i++)
730 {
731 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
732 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
733 return 1;
734 }
735 return 0;
736}
737
738static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
739{
740 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
741 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
742 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
743 if (!output_tensor)
744 return;
745 const int d = output_symbol.d;
746 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 746, __extension__ __PRETTY_FUNCTION__)
; }))
;
747 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
748 if (init_v[d >> 5] & (1u << (d & 0x1f)))
749 return;
750 init_v[d >> 5] |= (1u << (d & 0x1f));
751 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
752 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
753 const int parallel_count = tensor_init_states->parallel_count;
754 int i;
755 for (i = 1; i < parallel_count; i++)
756 {
757 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
758 if (copy)
759 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
760 }
761}
762
763// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
764// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
765static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
766{
767 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 767, __extension__ __PRETTY_FUNCTION__); }))
;
768 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 768, __extension__ __PRETTY_FUNCTION__)
; }))
;
769 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
770 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 770, __extension__
__PRETTY_FUNCTION__); }))
;
771 int i;
772 for (i = 0; i < compiled_data->rewindables->rnum; i++)
773 {
774 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
775 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
776 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
777 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
778 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
779 }
780 ccv_array_clear(compiled_data->rewindables);
781 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
782}
783
784static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
785{
786 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
787 .type = CCV_CNNP_REWIND_TENSOR,
788 .tensor = symbol
789 };
790 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
791 ccv_array_push(rewind_symbols, &rewind_symbol);
792}
793
794static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
795{
796 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
797 .type = CCV_CNNP_REWIND_TENSOR,
798 .tensor = symbol
799 };
800 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
801 ccv_array_push(rewind_symbols, &rewind_symbol);
802}
803
804static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
805{
806 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
807 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
808 .graph_exec = symbol
809 };
810 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
811 ccv_array_push(rewind_symbols, &rewind_symbol);
812}
813
814static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
815{
816 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
817 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
818 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
819 int i;
820 for (i = 1; i < parallel_count; i++)
821 {
822 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
823 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
824 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
825 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
826 }
827}
828
829static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
830{
831 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 831, __extension__ __PRETTY_FUNCTION__); }))
;
832 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 832, __extension__ __PRETTY_FUNCTION__); }))
;
833 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
834 int i;
835 for (i = 1; i < parallel_count; i++)
836 {
837 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
838 if (copy_symbol.graph)
839 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
840 }
841 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
842 if (graph_exec_arena)
843 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
844 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
845 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
846 if (gradient_graph_exec_arena)
847 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
848}
849
850static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
851{
852 int this_parameter_flag = 0;
853 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
854 return this_parameter_flag;
855 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
856 int j, k;
857 // For no-op, we can preserve previous saved_aux_size.
858 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
859 {
860 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
861 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
862 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
863 // make sure some model parameters don't update if we don't want them to.
864 int old_saved_aux_size;
865 if (old_minimizer.cmd == CCV_NNC_NOOP)
866 {
867 int input_size;
868 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
869 if (input_size < 2) // This is not legit.
870 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
871 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
872 old_saved_aux_size = input_size - 2;
873 } else
874 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
875 if (old_saved_aux_size != saved_aux_size)
876 {
877 this_parameter_flag = 1;
878 if (saved_aux_size > old_saved_aux_size)
879 {
880 // Allocate new tensor symbols.
881 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
882 for (j = old_saved_aux_size; j < saved_aux_size; j++)
883 {
884 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
885 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
886 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
887 for (k = 1; k < parallel_count; k++)
888 {
889 ccv_nnc_tensor_param_t dev_info = info;
890 if (k != device_id)
891 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
892 else
893 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
894 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
895 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
896 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
897 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
898 }
899 }
900 } else {
901 for (j = saved_aux_size; j < old_saved_aux_size; j++)
902 {
903 for (k = 1; k < parallel_count; k++)
904 {
905 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
906 if (src_copy.d >= 0)
907 {
908 ccv_nnc_tensor_symbol_free(graph, src_copy);
909 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
910 }
911 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
912 if (dest_copy.d >= 0)
913 {
914 ccv_nnc_tensor_symbol_free(graph, dest_copy);
915 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
916 }
917 }
918 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
919 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
920 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
921 }
922 }
923 }
924 }
925 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
926 if (this_parameter_flag)
927 {
928 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
929 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
930 const int* inputs = 0;
931 int input_size = 0;
932 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
933 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 933, __extension__ __PRETTY_FUNCTION__)
; }))
;
934 update_inputs[0].d = inputs[0];
935 update_inputs[0].graph = graph;
936 update_inputs[1].d = inputs[1];
937 update_inputs[1].graph = graph;
938 update_outputs[0] = updated_parameters[parameter_indice];
939 for (j = 0; j < saved_aux_size; j++)
940 {
941 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
942 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
943 }
944 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
945 for (k = 1; k < parallel_count; k++)
946 {
947 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
948 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 948, __extension__ __PRETTY_FUNCTION__); }))
;
949 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
950 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 950, __extension__ __PRETTY_FUNCTION__)
; }))
;
951 update_inputs[0].d = inputs[0];
952 update_inputs[0].graph = graph;
953 update_inputs[1].d = inputs[1];
954 update_inputs[1].graph = graph;
955 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
956 for (j = 0; j < saved_aux_size; j++)
957 {
958 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
959 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
960 }
961 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
962 }
963 }
964 return this_parameter_flag;
965}
966
967typedef struct {
968 int parameter_size;
969 ccv_nnc_cmd_t minimizer;
970 ccv_cnnp_model_io_t parameters[1];
971} ccv_cnnp_set_minimizer_for_parameter_t;
972
973static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
974{
975 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
976 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 976, __extension__ __PRETTY_FUNCTION__); }))
;
977 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
978 // We update all parameters, at this point, we have one minimizer.
979 const int parameter_size = compiled_data->parameters->rnum;
980 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
981 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
982 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 982, __extension__ __PRETTY_FUNCTION__); }))
;
983 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
984 ccv_array_t* const parameters = compiled_data->minimize.parameters;
985 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
986 int i, j, flag = 0;
987 for (i = 0; i < parameters->rnum; i++)
988 {
989 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
990 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
991 {
992 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
993 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 993, __extension__ __PRETTY_FUNCTION__)
; }))
;
994 const int old_rnum = parameter_indices->rnum;
995 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
996 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
997 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__)
; }))
;
998 if (param_ref >= 0)
999 {
1000 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1000, __extension__ __PRETTY_FUNCTION__
); }))
;
1001 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
1002 parameter_indices->rnum = old_rnum + 1;
1003 }
1004 }
1005 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1006 // We may have duplicated indices, but that is OK, we will set it twice.
1007 for (j = 0; j < parameter_indices->rnum; j++)
1008 {
1009 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1010 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1010, __extension__ __PRETTY_FUNCTION__
); }))
;
1011 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1012 flag = 1;
1013 }
1014 ccv_array_clear(parameter_indices);
1015 }
1016 ccv_array_free(parameter_indices);
1017 return flag;
1018}
1019
1020static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1021{
1022 if (new_saved_aux_size == old_saved_aux_size)
1023 return;
1024 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1024, __extension__ __PRETTY_FUNCTION__
); }))
;
1025 int i, j;
1026 for (i = parameter_size - 1; i >= 0; i--)
1027 {
1028 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1029 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1030 for (j = old_saved_aux_size - 1; j >= 0; j--)
1031 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1032 }
1033}
1034
1035static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1036{
1037 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1038 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1038, __extension__ __PRETTY_FUNCTION__); }))
;
1039 if (!compiled_data->rewindables)
1040 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1041 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1042 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1043 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1044}
1045
1046static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1047{
1048 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1049 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__
); }))
;
1050 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1050, __extension__ __PRETTY_FUNCTION__
); }))
;
1051 const int evaluate_to_size = compiled_data->evaluate.to_size;
1052 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1052, __extension__ __PRETTY_FUNCTION__
); }))
;
1053 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1054 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1055 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1056 int i, j;
1057 const int output_size = model->output_size;
1058 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1058, __extension__ __PRETTY_FUNCTION__
); }))
;
1059 if (fits)
1060 for (i = 0; i < output_size; i++)
1061 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1062 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1063 const int parameter_size = compiled_data->parameters->rnum;
1064 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1065 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1066 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1067 int parameter_size_maybe_more = parameter_size;
1068 compiled_data->disable_outgrad = disable_outgrad;
1069 int outgrad_size;
1070 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1071 outgrad_size = 0;
1072 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1073 outgrad_size = model->input_size;
1074 else {
1075 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1075, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1076 outgrad_size = 0;
1077 for (i = 0; i < model->input_size; i++)
1078 if (!(disable_outgrad & ((uint64_t)1 << i)))
1079 ++outgrad_size;
1080 }
1081 compiled_data->outgrad_size = outgrad_size;
1082 parameter_size_maybe_more += outgrad_size;
1083 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1084 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1085 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1086 compiled_data->backward.to_size = parameter_size_maybe_more;
1087 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1088 if (compiled_data->parameter_flags)
1089 {
1090 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1091 for (i = 0; i < parameter_size; i++)
1092 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1093 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1094 else
1095 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1096 }
1097 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1098 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1099 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1100 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1101 else { // Compute minimize with gradients including selected inputs.
1102 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1102, __extension__ __PRETTY_FUNCTION__
); }))
;
1103 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1103, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1104 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1104, __extension__ __PRETTY_FUNCTION__
); }))
;
1105 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1106 j = 0;
1107 for (i = 0; i < model->input_size; i++)
1108 if (!(disable_outgrad & ((uint64_t)1 << i)))
1109 outgrads[j++] = model->inputs[i];
1110 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1111 }
1112 if (compiled_data->parameter_flags)
1113 ccfreefree(parameters);
1114 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1115 if (compiled_data->minimize.parameters)
1116 _ccv_cnnp_apply_parameters_with_minimizer(model);
1117 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1118 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1119 for (i = 0; i < output_size; i++)
1120 {
1121 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1122 // Init this to 1 so we can backprop.
1123 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1124 }
1125 compiled_data->backward.to_size = 0;
1126 for (i = 0; i < parameter_size_maybe_more; i++)
1127 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1128 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1129 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1130 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1131 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1132 {
1133 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1134 continue;
1135 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1136 const int* tos;
1137 int to_size;
1138 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1139 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1140 {
1141 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1142 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1143 int flag = 0;
1144 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1145 for (j = i - 1; !flag && j >= 0; j--)
1146 if (j + outgrad_destination_start < destination_count)
1147 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1148 if (!flag) // Only if we cannot find it, we add it.
1149 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1150 }
1151 }
1152 if (parallel_count > 1)
1153 {
1154 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1155 0, 0,
1156 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1157 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1158 0, 0, 0,
1159 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1160 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1161 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1162 for (i = 0; i < evaluate_to_size; i++)
1163 for (j = 1; j < parallel_count; j++)
1164 {
1165 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1166 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1167 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1168 }
1169 const int backward_to_size = compiled_data->backward.to_size;
1170 for (i = 0; i < backward_to_size; i++)
1171 for (j = 1; j < parallel_count; j++)
1172 {
1173 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1174 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1175 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1176 }
1177 }
1178 // Only use memory compression if we are in gradient parameter mode.
1179 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1180 {
1181 if (model->memory_compression)
1182 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1183 if (model->memory_reduction)
1184 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1185 }
1186 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1187 compiled_data->gradient_mode = gradient_mode;
1188}
1189
1190void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1191{
1192 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1192, __extension__ __PRETTY_FUNCTION__
); }))
;
1193 const int parameter_size = compiled_data->parameters->rnum;
1194 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1195 const int internal_size = compiled_data->internals->rnum;
1196 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1197 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1198 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1199 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1200}
1201
1202int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1203{
1204 int i, j;
1205 const int parameter_size = compiled_data->parameters->rnum;
1206 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1207 const int internal_size = compiled_data->internals->rnum;
1208 for (i = 0; i < parameter_size; i++)
1209 {
1210 // parameters has to be allocated all together.
1211 if (compiled_data->tensors.parameters[i])
1212 {
1213 for (j = 1; j < parallel_count; j++)
1214 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1214, __extension__ __PRETTY_FUNCTION__
); }))
; }
1215 continue;
1216 }
1217 return 1;
1218 }
1219 for (i = 0; i < internal_size; i++)
1220 {
1221 if (!compiled_data->tensors.internals[i])
1222 return 1;
1223 for (j = 1; j < parallel_count; j++)
1224 if (!compiled_data->tensors.internals[i + j * internal_size])
1225 return 1;
1226 }
1227 return 0;
1228}
1229
1230void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1231{
1232 int i, j;
1233 const int parameter_size = compiled_data->parameters->rnum;
1234 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1235 const int internal_size = compiled_data->internals->rnum;
1236 for (i = 0; i < parameter_size; i++)
1237 {
1238 // parameters has to be allocated all together.
1239 if (compiled_data->tensors.parameters[i])
1240 {
1241 for (j = 1; j < parallel_count; j++)
1242 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1242, __extension__ __PRETTY_FUNCTION__
); }))
; }
1243 continue;
1244 }
1245 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1246 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1247 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1248 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1249 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1250 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1251 for (j = 1; j < parallel_count; j++)
1252 {
1253 if (j != device_id)
1254 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1255 else
1256 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1257 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1258 }
1259 }
1260 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1261 for (i = 0; i < internal_size; i++)
1262 {
1263 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1264 const int d = retained.d;
1265 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1266 continue;
1267 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1268 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1269 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1270 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1271 if (!compiled_data->tensors.internals[i])
1272 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1273 for (j = 1; j < parallel_count; j++)
1274 {
1275 if (j != device_id)
1276 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1277 else
1278 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1279 if (!compiled_data->tensors.internals[i + j * internal_size])
1280 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1281 }
1282 }
1283 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1284}
1285
1286static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1287{
1288 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1289 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1290}
1291
1292static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1293{
1294 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1294, __extension__ __PRETTY_FUNCTION__
); }))
;
1295 int i, j;
1296 for (i = 0; i < tensor_size; i++)
1297 {
1298 if (!tensors[i])
1299 continue;
1300 const int d = tensor_symbols[i].d;
1301 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1302 continue;
1303 for (j = 1; j < parallel_count; j++)
1304 if (tensors[i + j * tensor_size])
1305 {
1306 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1307 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1308 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1309 }
1310 }
1311}
1312
1313static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1314{
1315 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1315, __extension__ __PRETTY_FUNCTION__
); }))
;
1316 int i, j;
1317 for (i = 0; i < tensor_size; i++)
1318 {
1319 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1320 for (j = 1; j < parallel_count; j++)
1321 {
1322 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1323 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1324 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1325 { // We shouldn't allocate this, free it up.
1326 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1327 tensors[i + j * tensor_size] = 0;
1328 }
1329 }
1330 }
1331}
1332
1333static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1334{
1335 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1335, __extension__ __PRETTY_FUNCTION__
); }))
;
1336 int i, j;
1337 for (i = 0; i < tensor_size; i++)
1338 {
1339 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1340 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1341 continue;
1342 if (graph)
1343 {
1344 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1345 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1346 tensor_symbol = alias_to;
1347 }
1348 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1349 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1350 {
1351 const ccv_nnc_tensor_bind_t retained_bind = {
1352 .symbol = tensor_symbol,
1353 .tensor = tensor
1354 };
1355 ccv_array_push(tensor_binds, &retained_bind);
1356 }
1357 for (j = 1; j < parallel_count; j++)
1358 {
1359 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1360 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1361 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1362 {
1363 const ccv_nnc_tensor_bind_t bind = {
1364 .symbol = copy,
1365 .tensor = tensors[i + j * tensor_size]
1366 };
1367 ccv_array_push(tensor_binds, &bind);
1368 }
1369 }
1370 }
1371}
1372
1373static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1374{
1375 if (compiled_data->graph)
1376 ccv_nnc_graph_free(compiled_data->graph);
1377 compiled_data->graph = 0;
1378 compiled_data->is_test = 0;
1379 if (compiled_data->tensor_arena)
1380 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1381 compiled_data->tensor_arena = 0;
1382 if (compiled_data->graph_exec_arena)
1383 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1384 compiled_data->graph_exec_arena = 0;
1385 if (compiled_data->backward.from_ops)
1386 ccfreefree(compiled_data->backward.from_ops);
1387 compiled_data->backward.from_ops = 0;
1388 if (compiled_data->evaluate.schedule)
1389 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1390 compiled_data->evaluate.schedule = 0;
1391 if (compiled_data->backward.schedule)
1392 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1393 compiled_data->backward.schedule = 0;
1394}
1395
1396static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1397{
1398 if (compiled_data->gradients)
1399 ccfreefree(compiled_data->gradients);
1400 compiled_data->gradients = 0;
1401 if (compiled_data->updated_parameters)
1402 ccfreefree(compiled_data->updated_parameters);
1403 compiled_data->updated_parameters = 0;
1404 compiled_data->update_nodes = 0;
1405 compiled_data->saved_aux = 0;
1406}
1407
1408static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1409{
1410 if (compiled_data->backward.gradients)
1411 ccfreefree(compiled_data->backward.gradients);
1412 compiled_data->backward.gradients = 0;
1413 if (compiled_data->backward.accum)
1414 ccv_nnc_graph_free(compiled_data->backward.accum);
1415 compiled_data->backward.accum = 0;
1416 if (compiled_data->backward.tensor_arena)
1417 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1418 compiled_data->backward.tensor_arena = 0;
1419 if (compiled_data->backward.graph_exec_arena)
1420 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1421 compiled_data->backward.graph_exec_arena = 0;
1422}
1423
1424static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1425{
1426 if (compiled_data->apply_gradients.graph)
1427 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1428 compiled_data->apply_gradients.graph = 0;
1429 if (compiled_data->apply_gradients.tensor_arena)
1430 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1431 compiled_data->apply_gradients.tensor_arena = 0;
1432 if (compiled_data->apply_gradients.graph_exec_arena)
1433 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1434 compiled_data->apply_gradients.graph_exec_arena = 0;
1435}
1436
1437// Compile the graph to run ccv_cnnp_model_fit
1438static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1439{
1440 int i, j;
1441 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1442 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1442, __extension__ __PRETTY_FUNCTION__
); }))
;
1443 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1444 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1445 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1445, __extension__ __PRETTY_FUNCTION__
); }))
;
1446 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1446
, __extension__ __PRETTY_FUNCTION__); }))
;
1447 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1447, __extension__ __PRETTY_FUNCTION__
); }))
;
1448 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1449 {
1450 _ccv_cnnp_model_set_rewindables(model);
1451 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1452 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1453 _ccv_cnnp_model_rewind_graph(model);
1454 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1455 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1456 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1457 }
1458 const int tensors_init = !!compiled_data->tensors_init.v;
1459 if (!tensors_init)
1460 _ccv_cnnp_model_tensors_init(model, compiled_data);
1461 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1462 // Check if it is not fully allocated, if it is not, init_1.
1463 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1464 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1465 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1465, __extension__ __PRETTY_FUNCTION__); }))
;
1466 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1466, __extension__ __PRETTY_FUNCTION__); }))
;
1467 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1467
, __extension__ __PRETTY_FUNCTION__); }))
;
1468 const int input_size_per_p = input_size / parallel_count;
1469 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1470 const int output_size_per_p = output_size / parallel_count;
1471 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1472 const int fit_size_per_p = fit_size / parallel_count;
1473 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1474 const int parameter_size = compiled_data->parameters->rnum;
1475 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1476 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1477 const int internal_size = compiled_data->internals->rnum;
1478 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1479 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1480 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1481 ccv_array_free(tensor_binds);
1482 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1483 if (tensors_init && parallel_count > 1)
1484 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1485 // If tensor is not init'ed, we need to init states first.
1486 if (_ccv_cnnp_any_to_init(compiled_data))
1487 {
1488 ccv_nnc_tensor_init_states_t tensor_init_states = {
1489 .parallel_count = parallel_count,
1490 .graph = model->graph,
1491 .compiled_data = compiled_data,
1492 .tensor_arena = compiled_data->tensor_arena
1493 };
1494 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1495 }
1496 compiled_data->is_test = 0;
1497 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1498 // No need to set because it is default to training mode.
1499 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1500 for (i = 0; i < saved_aux_size * parameter_size; i++)
1501 {
1502 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1503 continue;
1504 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1505 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1506 for (j = 1; j < parallel_count; j++)
1507 {
1508 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1509 if (copy)
1510 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1511 }
1512 }
1513 const int evaluate_to_size = compiled_data->evaluate.to_size;
1514 compiled_data->evaluate.to_op_size = 0;
1515 for (i = 0; i < evaluate_to_size; i++)
1516 {
1517 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1518 if (to.graph)
1519 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1520 }
1521 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1522 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1523}
1524
1525ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1526{
1527 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1528 if (!compiled_data || !compiled_data->graph)
1529 return 0;
1530 return ccv_nnc_graph_default_stream(compiled_data->graph);
1531}
1532
1533uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1534{
1535 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1536 if (!compiled_data || !compiled_data->tensor_arena)
1537 return 0;
1538 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1539}
1540
1541static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1542{
1543 int i, j;
1544 for (i = 0; i < tensor_size; i++)
1545 {
1546 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1547 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1548 continue;
1549 if (graph)
1550 {
1551 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1552 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1553 tensor_symbol = alias_to;
1554 }
1555 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1556 for (j = 1; j < parallel_count; j++)
1557 {
1558 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1559 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1560 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1561 }
1562 }
1563}
1564
1565void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1566{
1567 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1568 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1568, __extension__ __PRETTY_FUNCTION__); }))
;
1569 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1570 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1570, __extension__ __PRETTY_FUNCTION__
); }))
;
1571 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1571, __extension__ __PRETTY_FUNCTION__
); }))
;
1572 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1572
, __extension__ __PRETTY_FUNCTION__); }))
;
1573 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1573, __extension__ __PRETTY_FUNCTION__); }))
;
1574 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1575 {
1576 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1577 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1578 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1579 // Compile the symbolic graph down only when needed.
1580 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1581 } else {
1582 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1582, __extension__ __PRETTY_FUNCTION__); }))
;
1583 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1583, __extension__ __PRETTY_FUNCTION__); }))
;
1584 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1584
, __extension__ __PRETTY_FUNCTION__); }))
;
1585 const int input_size_per_p = input_size / parallel_count;
1586 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1587 const int output_size_per_p = output_size / parallel_count;
1588 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1589 const int fit_size_per_p = fit_size / parallel_count;
1590 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1591 }
1592 if (compiled_data->is_test)
1593 {
1594 compiled_data->is_test = 0;
1595 ccv_nnc_graph_exec_update_t update = {
1596 .parallel_count = parallel_count,
1597 .graph = model->graph,
1598 .graph_exec_arena = compiled_data->graph_exec_arena,
1599 };
1600 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1601 }
1602 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1603}
1604
1605// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1606static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1607{
1608 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1609 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1610 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1611 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1611, __extension__ __PRETTY_FUNCTION__
); }))
;
1612 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1612, __extension__ __PRETTY_FUNCTION__
); }))
;
1613 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1614 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1615 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1616 {
1617 const int evaluate_to_size = compiled_data->evaluate.to_size;
1618 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1619 _ccv_cnnp_model_set_rewindables(model);
1620 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1621 0, 0,
1622 0, 0, 0,
1623 0, 0, 0,
1624 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1625 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1626 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1627 int i, j;
1628 for (i = 0; i < evaluate_to_size; i++)
1629 for (j = 1; j < parallel_count; j++)
1630 {
1631 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1632 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1633 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1634 }
1635 }
1636 const int tensors_init = !!compiled_data->tensors_init.v;
1637 if (!tensors_init)
1638 _ccv_cnnp_model_tensors_init(model, compiled_data);
1639 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1640 // Check if it is not fully allocated, if it is not, init_1.
1641 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1642 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1643 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1643, __extension__ __PRETTY_FUNCTION__); }))
;
1644 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1644, __extension__ __PRETTY_FUNCTION__); }))
;
1645 const int input_size_per_p = input_size / parallel_count;
1646 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1647 const int output_size_per_p = output_size / parallel_count;
1648 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1649 const int parameter_size = compiled_data->parameters->rnum;
1650 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1651 const int internal_size = compiled_data->internals->rnum;
1652 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1653 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1654 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1655 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1656 ccv_array_free(tensor_binds);
1657 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1658 // If tensor is not init'ed, we need to init states first.
1659 if (tensors_init && parallel_count > 1)
1660 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1661 if (_ccv_cnnp_any_to_init(compiled_data))
1662 {
1663 ccv_nnc_tensor_init_states_t tensor_init_states = {
1664 .parallel_count = parallel_count,
1665 .graph = model->graph,
1666 .compiled_data = compiled_data,
1667 .tensor_arena = compiled_data->tensor_arena
1668 };
1669 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1670 }
1671 compiled_data->is_test = 1;
1672 ccv_nnc_graph_exec_update_t update = {
1673 .parallel_count = parallel_count,
1674 .graph = model->graph,
1675 .graph_exec_arena = compiled_data->graph_exec_arena,
1676 };
1677 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1678 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1679 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1680}
1681
1682static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1683{
1684 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1684, __extension__ __PRETTY_FUNCTION__
); }))
;
1685 const int parameter_size = compiled_data->parameters->rnum;
1686 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1687 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1688 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1689 int i, j;
1690 for (i = 0; i < parameter_size; i++)
1691 {
1692 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1693 {
1694 compiled_data->tensors.gradients[i] = 0;
1695 compiled_data->tensors.accum_gradients[i] = 0;
1696 for (j = 1; j < parallel_count; j++)
1697 {
1698 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1699 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1700 }
1701 continue;
1702 }
1703 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1704 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1705 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1706 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1707 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1708 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1709 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1710 for (j = 1; j < parallel_count; j++)
1711 {
1712 if (j != device_id)
1713 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1714 else
1715 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1716 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1717 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1718 }
1719 }
1720}
1721
1722static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1723{
1724 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1725 return 1;
1726 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1727 return 0;
1728 int i;
1729 for (i = 0; i < input_size; i++)
1730 if (!(disable_outgrad & ((uint64_t)1 << i)))
1731 return 0;
1732 return 1;
1733}
1734
1735// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1736// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1737static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1738{
1739 int i, j;
1740 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1741 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1742 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1742, __extension__ __PRETTY_FUNCTION__
); }))
;
1743 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1744 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1745 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1745, __extension__ __PRETTY_FUNCTION__
); }))
;
1746 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1746, __extension__ __PRETTY_FUNCTION__
); }))
;
1747 // There shouldn't be a loss function if we evaluate with multistage jit.
1748 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1748, __extension__ __PRETTY_FUNCTION__
); }))
;
1749 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1750 {
1751 _ccv_cnnp_model_set_rewindables(model);
1752 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1753 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1754 _ccv_cnnp_model_rewind_graph(model);
1755 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1756 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1757 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1758 }
1759 const int tensors_init = !!compiled_data->tensors_init.v;
1760 if (!tensors_init)
1761 _ccv_cnnp_model_tensors_init(model, compiled_data);
1762 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1763 // Check if it is not fully allocated, if it is not, init_1.
1764 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1765 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1766 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1766, __extension__ __PRETTY_FUNCTION__); }))
;
1767 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1767, __extension__ __PRETTY_FUNCTION__); }))
;
1768 const int input_size_per_p = input_size / parallel_count;
1769 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1770 const int output_size_per_p = output_size / parallel_count;
1771 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1772 const int parameter_size = compiled_data->parameters->rnum;
1773 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1774 const int internal_size = compiled_data->internals->rnum;
1775 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1776 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1777 if (!compiled_data->tensors.gradients)
1778 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1779 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1780 if (compiled_data->backward.to_size > 0)
1781 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1782 else
1783 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1784 ccv_array_free(tensor_binds);
1785 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1786 if (tensors_init && parallel_count > 1)
1787 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1788 // If tensor is not init'ed, we need to init states first.
1789 if (_ccv_cnnp_any_to_init(compiled_data))
1790 {
1791 ccv_nnc_tensor_init_states_t tensor_init_states = {
1792 .parallel_count = parallel_count,
1793 .graph = model->graph,
1794 .compiled_data = compiled_data,
1795 .tensor_arena = compiled_data->tensor_arena
1796 };
1797 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1798 }
1799 compiled_data->is_test = is_test;
1800 ccv_nnc_graph_exec_update_t update = {
1801 .parallel_count = parallel_count,
1802 .graph = model->graph,
1803 .graph_exec_arena = compiled_data->graph_exec_arena,
1804 };
1805 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1806 const int evaluate_to_size = compiled_data->evaluate.to_size;
1807 compiled_data->evaluate.to_op_size = 0;
1808 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1809 for (i = 0; i < evaluate_to_size; i++)
1810 {
1811 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1812 if (to_op.graph)
1813 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1814 const int* tos;
1815 int to_size;
1816 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1817 for (j = 0; j < to_size; j++)
1818 {
1819 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1820 .d = tos[j],
1821 .graph = model->graph
1822 });
1823 if (to_op.graph)
1824 ccv_array_add_unique_int(backward_from, to_op.d);
1825 }
1826 }
1827 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1827, __extension__
__PRETTY_FUNCTION__); }))
;
1828 compiled_data->backward.from_op_size = backward_from->rnum;
1829 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1830 for (i = 0; i < backward_from->rnum; i++)
1831 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1832 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1833 .graph = compiled_data->graph,
1834 };
1835 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1836 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1837 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1838 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1839 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1840 const int source_size = compiled_data->graph->sources->rnum;
1841 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1841, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1842 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1843 visited[(idx >> 5)] |= (1u << (idx & 31));
1844 } ccv_nnc_graph_visit_endfor} }
1845 ccv_nnc_graph_visit_free(visit);
1846 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1847 const int destination_size = compiled_data->graph->destinations->rnum;
1848 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1848, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1849 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1850 visited[(idx >> 5)] |= (1u << (idx & 31));
1851 } ccv_nnc_graph_visit_endfor} }
1852 ccv_nnc_graph_visit_free(visit);
1853 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1854 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1855 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1856 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1857 {
1858 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1858, __extension__ __PRETTY_FUNCTION__
); }))
;
1859 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1860 ccv_array_add_unique_int(backward_from, idx);
1861 }
1862 } ccv_nnc_graph_visit_endfor} }
1863 ccv_nnc_graph_visit_free(visit);
1864 ccfreefree(visited);
1865 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1866 {
1867 compiled_data->backward.from_op_size = backward_from->rnum;
1868 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1869 for (i = 0; i < backward_from->rnum; i++)
1870 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1871 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1872 .graph = compiled_data->graph,
1873 };
1874 }
1875 ccv_array_free(backward_from);
1876 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1877 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1878}
1879
1880void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1881{
1882 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1883 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1883, __extension__ __PRETTY_FUNCTION__); }))
;
1884 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1885 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1885, __extension__ __PRETTY_FUNCTION__
); }))
;
1886 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1886, __extension__ __PRETTY_FUNCTION__
); }))
;
1887 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1887, __extension__ __PRETTY_FUNCTION__); }))
;
1888 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1889 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1890 if (!compiled_data->graph || mode_mismatch)
1891 {
1892 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1893 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1894 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1895 if (params.requires_grad)
1896 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1897 else
1898 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1899 } else {
1900 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1901 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1901, __extension__ __PRETTY_FUNCTION__); }))
;
1902 const int input_size_per_p = input_size / parallel_count;
1903 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1904 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1904, __extension__ __PRETTY_FUNCTION__); }))
;
1905 const int output_size_per_p = output_size / parallel_count;
1906 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1907 }
1908 if (compiled_data->is_test != params.is_test)
1909 {
1910 compiled_data->is_test = params.is_test;
1911 ccv_nnc_graph_exec_update_t update = {
1912 .parallel_count = parallel_count,
1913 .graph = model->graph,
1914 .graph_exec_arena = compiled_data->graph_exec_arena,
1915 };
1916 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1917 }
1918}
1919
1920void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1921{
1922 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1923 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1923, __extension__ __PRETTY_FUNCTION__); }))
;
1924 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1925 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1926 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1927 else {
1928 if (!compiled_data->evaluate.schedule)
1929 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1930 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1931 }
1932}
1933
1934// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1935// Particularly, this method compiles the accumulator graph.
1936static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1937{
1938 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1939 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1939, __extension__ __PRETTY_FUNCTION__); }))
;
1940 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1940, __extension__ __PRETTY_FUNCTION__
); }))
;
1941 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1942 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1943 const int parameter_size = compiled_data->parameters->rnum;
1944 int i, j;
1945 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1946 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1947 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1948 for (i = 0; i < parameter_size; i++)
1949 for (j = 0; j < parallel_count; j++)
1950 if (compiled_data->tensors.gradients[i + j * parameter_size])
1951 {
1952 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1953 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1954 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1955 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1956 ccv_nnc_tensor_symbol_t inputs[2];
1957 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1958 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1959 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1960 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1961 } else {
1962 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1963 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1964 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1965 }
1966 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1967 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1968 {
1969 ccv_nnc_symbolic_graph_free(accum);
1970 // Create empty graph.
1971 compiled_data->backward.accum = ccv_nnc_graph_new();
1972 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1973 return;
1974 }
1975 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1976 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1977 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1978 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1979 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1980 ccv_nnc_symbolic_graph_free(accum);
1981 ccv_array_free(tensor_binds);
1982 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1983}
1984
1985void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1986{
1987 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1988 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1988, __extension__ __PRETTY_FUNCTION__); }))
;
1989 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1989, __extension__ __PRETTY_FUNCTION__
); }))
;
1990 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1991 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1991, __extension__ __PRETTY_FUNCTION__
); }))
;
1992 if (outgrad_size > 0)
1993 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1993, __extension__ __PRETTY_FUNCTION__
); }))
; }
1994 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1994, __extension__ __PRETTY_FUNCTION__); }))
;
1995 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1995, __extension__ __PRETTY_FUNCTION__
); }))
;
1996 const int parameter_size = compiled_data->parameters->rnum;
1997 // If we need to accumulate the gradients now, do jit on accumulator.
1998 if (compiled_data->backward.count > 0)
1999 {
2000 if (!compiled_data->backward.accum)
2001 _ccv_cnnp_model_multistage_jit_1(model);
2002 else if (compiled_data->backward.count == 1) {
2003 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2004 int i;
2005 for (i = 0; i < parameter_size * parallel_count; i++)
2006 {
2007 ccv_nnc_tensor_t* tensor;
2008 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2009 }
2010 if (compiled_data->backward.tensor_arena)
2011 {
2012 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2013 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2014 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2015 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2016 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2017 }
2018 }
2019 }
2020 const int ingrad_size_per_p = model->output_size;
2021 const int outgrad_size_per_p = compiled_data->outgrad_size;
2022 int i, j;
2023 for (i = 0; i < ingrad_size_per_p; i++)
2024 {
2025 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2026 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2027 {
2028 // Set it to 1 if it is not specified.
2029 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2030 if (ingrad_tensor)
2031 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2032 for (j = 1; j < parallel_count; j++)
2033 {
2034 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2035 if (ingrad_tensor)
2036 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2037 }
2038 } else {
2039 // Make sure the length matches, in case it is an alias.
2040 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2040, __extension__ __PRETTY_FUNCTION__
); }))
;
2041 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2042 for (j = 1; j < parallel_count; j++)
2043 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2044 }
2045 }
2046 if (outgrad_size > 0)
2047 {
2048 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2048, __extension__ __PRETTY_FUNCTION__
); }))
;
2049 for (i = 0; i < outgrad_size_per_p; i++)
2050 if (outgrads[i])
2051 {
2052 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2053 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2054 for (j = 1; j < parallel_count; j++)
2055 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2056 }
2057 } else {
2058 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__
); }))
2059 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__
); }))
;
2060 }
2061 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2062 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2063 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2064 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2065 if (!compiled_data->backward.schedule)
2066 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2067 // Run the backward pass.
2068 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2069 // If we need to run accumulation round, do that now.
2070 if (compiled_data->backward.count > 0)
2071 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2072 // Update the count, this determines whether we need to accumulate or not.
2073 ++compiled_data->backward.count;
2074}
2075
2076// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2077// Particularly, this method compiles the parameter update graph.
2078static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2079{
2080 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2081 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2081, __extension__ __PRETTY_FUNCTION__
); }))
;
2082 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2083 const int parameter_size = compiled_data->parameters->rnum;
2084 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2085 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2086 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2087 // Bind accumulated gradients.
2088 if (compiled_data->backward.count > 1)
2089 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2090 else
2091 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2092 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2093 int i, j;
2094 for (i = 0; i < compiled_data->backward.to_size; i++)
2095 {
2096 const int* tos;
2097 int to_size;
2098 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2099 for (j = 0; j < to_size; j++)
2100 {
2101 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2102 // gradients graph.
2103 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2104 .d = tos[j],
2105 .graph = model->graph,
2106 });
2107 if (!exec.graph)
2108 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2109 }
2110 }
2111 const int from_size = apply_gradients_from->rnum;
2112 if (from_size == 0)
2113 {
2114 ccv_array_free(apply_gradients_from);
2115 ccv_array_free(tensor_binds);
2116 return;
2117 }
2118 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2119 for (i = 0; i < from_size; i++)
2120 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2121 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2122 .graph = model->graph
2123 };
2124 ccv_array_free(apply_gradients_from);
2125 // It can only ends with updates on the parameters.
2126 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2127 for (i = 0; i < parameter_size; i++)
2128 {
2129 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2130 continue;
2131 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2132 for (j = 1; j < parallel_count; j++)
2133 {
2134 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2135 ccv_array_push(tos, &copy);
2136 }
2137 }
2138 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2139 ccv_array_free(tos);
2140 ccv_array_free(tensor_binds);
2141 ccfreefree(froms);
2142 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2143 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2144 {
2145 // Skip on no tensor.
2146 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2147 continue;
2148 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2149 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2150 for (j = 1; j < parallel_count; j++)
2151 {
2152 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2153 if (copy)
2154 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2155 }
2156 }
2157 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2158}
2159
2160void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2161{
2162 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2163 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2163, __extension__ __PRETTY_FUNCTION__); }))
;
2164 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2164, __extension__ __PRETTY_FUNCTION__
); }))
;
2165 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2166 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2166, __extension__ __PRETTY_FUNCTION__); }))
;
2167 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2167, __extension__ __PRETTY_FUNCTION__
); }))
;
2168 // Skip if there is no backward pass.
2169 if (compiled_data->backward.count <= 0)
2170 return;
2171 // Skip if there is no parameters.
2172 if (compiled_data->parameters->rnum == 0)
2173 {
2174 compiled_data->backward.count = 0;
2175 return;
2176 }
2177 if (!compiled_data->apply_gradients.graph)
2178 _ccv_cnnp_model_multistage_jit_2(model);
2179 else {
2180 const int parameter_size = compiled_data->parameters->rnum;
2181 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2182 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2183 if (compiled_data->backward.count > 1)
2184 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2185 else
2186 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2187 }
2188 if (compiled_data->apply_gradients.graph)
2189 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2190 // Reset backward count to 0.
2191 compiled_data->backward.count = 0;
2192}
2193
2194void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2195{
2196 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2197 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2198 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2198, __extension__ __PRETTY_FUNCTION__
); }))
;
2199 const int tensors_init = !!compiled_data->tensors_init.v;
2200 if (!tensors_init)
2201 _ccv_cnnp_model_tensors_init(model, compiled_data);
2202 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2203 // Check if it is not fully allocated, if it is not, init_1.
2204 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2205 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2206 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2207 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2208 if (param_ref < 0)
2209 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2209
, __extension__ __PRETTY_FUNCTION__); }))
; }
2210 else
2211 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2211, __extension__ __PRETTY_FUNCTION__
); }))
; }
2212 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2213 ccv_array_free(parameter_indices);
2214 const int parameter_size = compiled_data->parameters->rnum;
2215 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2215
, __extension__ __PRETTY_FUNCTION__); }))
;
2216 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2216, __extension__ __PRETTY_FUNCTION__
); }))
;
2217 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2218 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2219 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2219, __extension__
__PRETTY_FUNCTION__); }))
;
2220 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2221 int i;
2222 for (i = 1; i < parallel_count; i++)
2223 {
2224 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2225 if (copy_tensor)
2226 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2227 }
2228 // Mark this symbol as init'ed.
2229 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2230 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2231 init_v[s >> 5] |= (1u << (s & 0x1f));
2232}
2233
2234void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2235{
2236 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2237 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2238 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2238, __extension__ __PRETTY_FUNCTION__
); }))
;
2239 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2239, __extension__ __PRETTY_FUNCTION__
); }))
;
2240 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2241 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2242 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2243 if (param_ref < 0)
2244 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2244
, __extension__ __PRETTY_FUNCTION__); }))
; }
2245 else
2246 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2246, __extension__ __PRETTY_FUNCTION__
); }))
; }
2247 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2248 ccv_array_free(parameter_indices);
2249 const int parameter_size = compiled_data->parameters->rnum;
2250 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2250
, __extension__ __PRETTY_FUNCTION__); }))
;
2251 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2251, __extension__ __PRETTY_FUNCTION__
); }))
;
2252 // We don't need to consider parallel_count, every parameter on each device is identical.
2253 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2254 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2254, __extension__
__PRETTY_FUNCTION__); }))
;
2255 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2256}
2257
2258ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2259{
2260 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2261 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2262 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2262, __extension__ __PRETTY_FUNCTION__
); }))
;
2263 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2263, __extension__ __PRETTY_FUNCTION__
); }))
;
2264 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2265 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2266 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2267 if (param_ref < 0)
2268 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2268
, __extension__ __PRETTY_FUNCTION__); }))
; }
2269 else
2270 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2270, __extension__ __PRETTY_FUNCTION__
); }))
; }
2271 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2272 ccv_array_free(parameter_indices);
2273 const int parameter_size = compiled_data->parameters->rnum;
2274 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2274
, __extension__ __PRETTY_FUNCTION__); }))
;
2275 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2275, __extension__ __PRETTY_FUNCTION__
); }))
;
2276 // We don't need to consider parallel_count, every parameter on each device is identical.
2277 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2278 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2278, __extension__
__PRETTY_FUNCTION__); }))
;
2279 return tensor->info;
2280}
2281
2282const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2283{
2284 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2285 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2286 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2286, __extension__ __PRETTY_FUNCTION__
); }))
;
2287 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2288 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2289 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2290 if (param_ref < 0)
2291 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2291
, __extension__ __PRETTY_FUNCTION__); }))
; }
2292 else
2293 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2293, __extension__ __PRETTY_FUNCTION__
); }))
; }
2294 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2295 ccv_array_free(parameter_indices);
2296 const int parameter_size = compiled_data->parameters->rnum;
2297 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2297
, __extension__ __PRETTY_FUNCTION__); }))
;
2298 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2298, __extension__ __PRETTY_FUNCTION__
); }))
;
2299 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2300}
2301
2302int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2303{
2304 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2304, __extension__ __PRETTY_FUNCTION__
); }))
;
2305 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2306 return compiled_data->parameters->rnum;
2307}
2308
2309uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2310{
2311 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2311, __extension__ __PRETTY_FUNCTION__
); }))
;
2312 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2313 const int parameter_size = compiled_data->parameters->rnum;
2314 int i;
2315 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2316 uint64_t size = 0;
2317 const int tensors_init = !!compiled_data->tensors_init.v;
2318 uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
: 0;
2319 for (i = 0; i < parameter_size; i++)
2320 {
2321 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2322 if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i])
2323 {
2324 ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info;
2325 size += ccv_nnc_tensor_data_size(params);
2326 continue;
2327 }
2328 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2329 .graph = graph,
2330 .d = d
2331 });
2332 size += ccv_nnc_tensor_data_size(params);
2333 }
2334 return size;
2335}
2336
2337int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type)
2338{
2339 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2339, __extension__ __PRETTY_FUNCTION__
); }))
;
2340 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2341 if (count != compiled_data->parameters->rnum)
2342 return 0;
2343 if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2344 CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) <<
8))
;
2345 int i;
2346 // We don't need to consider parallel_count, every parameter on each device is identical.
2347 for (i = 0; i < count; i++)
2348 {
2349 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2350 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2351 {
2352 tensors[i] = 0;
2353 continue;
2354 }
2355 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2356 if (tensor->info.type == type)
2357 tensors[i] = tensor;
2358 else {
2359 ccv_nnc_tensor_param_t info = tensor->info;
2360 info.type = type;
2361 tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet.
2362 }
2363 }
2364 for (i = 0; i < count; i++)
2365 {
2366 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2367 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2368 continue;
2369 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2370 // Now initiate transfer. We should do this one on a stream.
2371 if (tensor->info.type != type)
2372 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2373 }
2374 // Copy names and remove parameters.
2375 for (i = 0; i < count; i++)
2376 {
2377 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i];
2378 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2379 {
2380 names[i] = 0;
2381 continue;
2382 }
2383 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2384 const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof
(1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2385 names[i] = ccmallocmalloc(name_len + 1);
2386 names[i][name_len] = 0;
2387 memcpy(names[i], name, name_len);
2388 if (tensor->info.type == type)
2389 compiled_data->tensors.parameters[i] = 0; // Only move when it is moved.
2390 }
2391 return 1;
2392}
2393
2394KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2394
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Taking true branch
58
Assuming the condition is true
59
Assuming the condition is true
60
The value 1 is assigned to 'i'
61
Taking false branch
62
Assuming the condition is true
63
Assuming the condition is false
64
1st function call argument is an uninitialized value
2395
2396void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates)
2397{
2398 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2398, __extension__ __PRETTY_FUNCTION__
); }))
;
2399 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2400 int i;
2401 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2402 if (count != compiled_data->parameters->rnum)
2403 {
2404 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2405 // Build the map between name and the index.
2406 for (i = 0; i < count; i++)
2407 {
2408 int ret;
2409 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret);
2410 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2410
, __extension__ __PRETTY_FUNCTION__); }))
;
2411 kh_val(id_map, k)((id_map)->vals[k]) = i;
2412 }
2413 }
2414 const int parameter_size = compiled_data->parameters->rnum;
2415 int* copy_back = 0;
2416 const int tensors_init = !!compiled_data->tensors_init.v;
2417 if (!tensors_init)
2418 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2419 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2420 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2421 for (i = 0; i < parameter_size; i++)
2422 {
2423 int j = i;
2424 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2425 if (i >= 0 || strncmp(name, names[i], 1023) != 0)
2426 {
2427 // Build the map.
2428 if (id_map == 0)
2429 {
2430 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2431 for (j = 0; j < count; j++)
2432 {
2433 int ret;
2434 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret);
2435 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2435
, __extension__ __PRETTY_FUNCTION__); }))
;
2436 kh_val(id_map, k)((id_map)->vals[k]) = j;
2437 }
2438 }
2439 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name);
2440 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2441 continue;
2442 j = kh_val(id_map, k)((id_map)->vals[k]);
2443 }
2444 if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read.
2445 { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters
[i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t
)compiled_data->tensors.parameters[i] & (uintptr_t)1))
; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)"
, "ccv_cnnp_model.c", 2445, __extension__ __PRETTY_FUNCTION__
); }))
; }
2446 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
2447 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2448 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2449 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2450 const int d = parameter.d;
2451 if (info.type == tensors[j]->info.type && invalidates) // Can move.
2452 {
2453 // Deallocate it if needed.
2454 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2455 if (compiled_data->tensors.parameters[i])
2456 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2457 compiled_data->tensors.parameters[i] = tensors[j];
2458 tensors[j] = 0;
2459 } else {
2460 if (!compiled_data->tensors.parameters[i])
2461 { // Not allocated, to allocate first.
2462 // Create new one, make sure we create this by having the right parameters.
2463 const int type = info.type;
2464 info = tensors[j]->info;
2465 info.type = type; // Revert back the type.
2466 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
2467 }
2468 if (!copy_back)
2469 copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int));
2470 copy_back[i] = j + 1;
2471 }
2472 init_v[d >> 5] |= (1u << (d & 0x1f));
2473 // Create this tensor for other data parallel allocations.
2474 info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
2475 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2476 for (j = 1; j < parallel_count; j++)
2477 if (!compiled_data->tensors.parameters[i + j * parameter_size])
2478 {
2479 if (j != device_id)
2480 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
2481 else
2482 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2483 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2484 }
2485 // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
2486 }
2487 if (id_map)
2488 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2489 // Now do the transfer.
2490 if (copy_back)
2491 {
2492 for (i = 0; i < parameter_size; i++)
2493 {
2494 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[i]) & ~(uintptr_t)1))
;
2495 if (copy_back[i] == 0)
2496 continue;
2497 const int j = copy_back[i] - 1;
2498 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2499 }
2500 ccfreefree(copy_back);
2501 }
2502}
2503
2504ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2505{
2506 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2507 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2507, __extension__ __PRETTY_FUNCTION__); }))
;
2508 const int parameter_size = compiled_data->parameters->rnum;
2509 int i;
2510 for (i = 0; i < parameter_size; i++)
2511 {
2512 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2513 if (first(model, name, context))
2514 return ccv_cnnp_model_parameters(model, -1, i);
2515 }
2516 return 0;
2517}
2518
2519ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2520{
2521 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2522 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2522, __extension__ __PRETTY_FUNCTION__); }))
;
2523 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2524 const int parameter_size = compiled_data->parameters->rnum;
2525 int i;
2526 for (i = 0; i < parameter_size; i++)
2527 {
2528 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2529 if (filter(model, name, context))
2530 {
2531 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2532 ccv_array_push(parameters, &parameter);
2533 }
2534 }
2535 return parameters;
2536
2537}
2538
2539CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2540{
2541 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2542 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2542, __extension__ __PRETTY_FUNCTION__); }))
;
2543 const int tensors_init = !!compiled_data->tensors_init.v;
2544 if (!tensors_init) // If nothing initialized, we return parameter 0.
2545 return ccv_cnnp_model_parameters(model, -1, 0);
2546 const int parameter_size = compiled_data->parameters->rnum;
2547 int i;
2548 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2549 for (i = 0; i < parameter_size; i++)
2550 {
2551 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2552 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2553 return ccv_cnnp_model_parameters(model, -1, i);
2554 }
2555 return 0;
2556}
2557
2558static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2559{
2560 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2561 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2561, __extension__
__PRETTY_FUNCTION__); }))
;
2562 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2563 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2564 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2565 return to_parameter_indices;
2566}
2567
2568static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2569{
2570 // If the model is not compiled yet. Compile them now.
2571 if (!model->graph)
2572 {
2573 model->graph = ccv_nnc_symbolic_graph_new();
2574 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2574, __extension__ __PRETTY_FUNCTION__
); }))
;
2575 const int input_size = from_model->input_size;
2576 ccv_nnc_tensor_param_t input_params[input_size];
2577 int i;
2578 for (i = 0; i < input_size; i++)
2579 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2580 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2581 model->parallel_count = from_model->parallel_count;
2582 model->memory_compression = from_model->memory_compression;
2583 model->memory_reduction = from_model->memory_reduction;
2584 model->gradient_checkpointing = from_model->gradient_checkpointing;
2585 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2586 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2587 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2588 }
2589 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2590 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2590, __extension__ __PRETTY_FUNCTION__
); }))
;
2591 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2592 if (!to_tensors_init)
2593 {
2594 if (only_init_0)
2595 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2596 else
2597 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2598 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2599 // Check if it is not fully allocated, if it is not, init_1.
2600 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2601 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2601, __extension__ __PRETTY_FUNCTION__
); }))
;
2602 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2603 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2604 if (*from_param_ref < 0 && *param_ref >= 0)
2605 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2605, __extension__ __PRETTY_FUNCTION__
); }))
; }
2606 else if (*from_param_ref >= 0)
2607 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2607, __extension__ __PRETTY_FUNCTION__
); }))
; }
2608 if (*param_ref < 0 && *from_param_ref >= 0)
2609 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2609, __extension__ __PRETTY_FUNCTION__); }))
; }
2610 else if (*param_ref >= 0)
2611 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2611, __extension__ __PRETTY_FUNCTION__
); }))
; }
2612}
2613
2614void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2615{
2616 ccv_array_t* to_parameter_indices;
2617 int to_param_ref;
2618 ccv_array_t* from_parameter_indices;
2619 int from_param_ref;
2620 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2621 // Should be exactly the same tensor.
2622 if (to_param_ref < 0 && from_param_ref < 0)
2623 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2623, __extension__ __PRETTY_FUNCTION__
); }))
; }
2624 // To models.
2625 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2626 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2626, __extension__ __PRETTY_FUNCTION__
); }))
;
2627 // From models.
2628 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2629 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2630 const int to_parameter_size = to_compiled_data->parameters->rnum;
2631 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2632 int i, j;
2633 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2634 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2635 for (i = 0; i < rnum; i++)
2636 {
2637 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2638 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2638, __extension__ __PRETTY_FUNCTION__); }))
;
2639 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2639, __extension__ __PRETTY_FUNCTION__
); }))
;
2640 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2641 // If the original is not init'ed. We cannot copy from.
2642 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2643 continue;
2644 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2645 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2645, __extension__ __PRETTY_FUNCTION__); }))
;
2646 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2646, __extension__ __PRETTY_FUNCTION__
); }))
;
2647 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2648 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2648, __extension__
__PRETTY_FUNCTION__); }))
;
2649 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2650 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2650, __extension__
__PRETTY_FUNCTION__); }))
;
2651 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2652 for (j = 1; j < parallel_count; j++)
2653 {
2654 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2655 if (copy_tensor)
2656 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2657 }
2658 // Mark this symbol as init'ed.
2659 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2660 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2661 }
2662 ccv_array_free(to_parameter_indices);
2663 ccv_array_free(from_parameter_indices);
2664}
2665
2666void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2667{
2668 ccv_array_t* to_parameter_indices;
2669 int to_param_ref;
2670 ccv_array_t* from_parameter_indices;
2671 int from_param_ref;
2672 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2673 // Should be exactly the same tensor.
2674 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2675 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2675, __extension__ __PRETTY_FUNCTION__
); }))
; }
2676 // To models.
2677 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2678 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2678, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2679 // From models.
2680 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2681 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2682 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2682, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2683 const int from_parameter_size = from_compiled_data->parameters->rnum;
2684 const int to_parameter_size = to_compiled_data->parameters->rnum;
2685 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2686 int i, j;
2687 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2688 char* updated_name = 0;
2689 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2690 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2691 for (i = 0; i < rnum; i++)
2692 {
2693 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2694 // Need to figure out how to use the renamer here.
2695 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2696 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2696, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2697 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2697, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2698 if (renamer
18.1
'renamer' is non-null
)
2699 {
2700 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2701 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2702 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2703 updated_name = (char*)ccmallocmalloc(1024);
2704 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2705 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2706 memcpy(updated_name, src_name, src_name_len);
2707 updated_name[src_name_len] = 0;
2708 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2709 continue; // Skip this.
2710 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2711 {
2712 // Nothing changed.
2713 } else {
2714 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2715 {
2716 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2717 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is >= 'from_parameter_size'
55
Loop condition is false. Execution continues on line 2725
2718 {
2719 int ret;
2720 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
2721 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2721
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2722 kh_val(id_map, k)((id_map)->vals[k]) = j;
2723 }
2724 }
2725 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
56
Calling 'kh_get_ccv_cnnp_parameter_id'
2726 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2727 continue;
2728 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2729 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2729, __extension__ __PRETTY_FUNCTION__); }))
;
2730 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2730, __extension__
__PRETTY_FUNCTION__); }))
;
2731 }
2732 }
2733 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2733, __extension__ __PRETTY_FUNCTION__); }))
;
2734 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2734, __extension__
__PRETTY_FUNCTION__); }))
;
2735 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2736 // If the original is not init'ed. We cannot share from.
2737 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2738 continue;
2739 for (j = 0; j < parallel_count; j++)
2740 {
2741 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2742 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2742, __extension__
__PRETTY_FUNCTION__); }))
;
2743 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2744 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2745 ccv_nnc_tensor_free(dest);
2746 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2747 }
2748 // Mark this symbol as init'ed.
2749 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2750 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2751 }
2752 ccv_array_free(to_parameter_indices);
2753 ccv_array_free(from_parameter_indices);
2754 if (id_map)
2755 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2756 if (updated_name)
2757 ccfreefree(updated_name);
2758 // Mark it as incomplete so we will call init_1.
2759 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2760 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2761 else // Remove the flag.
2762 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2763}
2764
2765ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2766{
2767 if (!compiled_data->stream_map)
2768 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2769 int ret = 0;
2770 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2771 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2771, __extension__ __PRETTY_FUNCTION__); }))
;
2772 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2773 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2774 if (ret != 0)
2775 {
2776 stream = ccv_nnc_stream_context_new(type);
2777 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2778 }
2779 return stream;
2780}
2781
2782void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2783{
2784 ccv_array_t* to_parameter_indices;
2785 int to_param_ref;
2786 ccv_array_t* from_parameter_indices;
2787 int from_param_ref;
2788 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2789 // Should be exactly the same tensor.
2790 if (to_param_ref < 0 && from_param_ref < 0)
2791 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2791, __extension__ __PRETTY_FUNCTION__
); }))
; }
2792 // To models.
2793 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2794 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2794, __extension__ __PRETTY_FUNCTION__
); }))
;
2795 // From models.
2796 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2797 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2798 const int to_parameter_size = to_compiled_data->parameters->rnum;
2799 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2800 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2800, __extension__ __PRETTY_FUNCTION__
); }))
;
2801 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2801, __extension__ __PRETTY_FUNCTION__
); }))
;
2802 int i, j;
2803 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2804 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2805 for (i = 0; i < aux_in_size; i++)
2806 inputs[i + 2] = aux_ins[i];
2807 for (i = 0; i < aux_out_size; i++)
2808 outputs[i + 1] = aux_outs[i];
2809 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2810 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2811 for (i = 0; i < rnum; i++)
2812 {
2813 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2814 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2814, __extension__ __PRETTY_FUNCTION__); }))
;
2815 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2815, __extension__ __PRETTY_FUNCTION__
); }))
;
2816 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2817 // If the original is not init'ed. We cannot copy from.
2818 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2819 continue;
2820 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2821 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2821, __extension__ __PRETTY_FUNCTION__); }))
;
2822 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2822, __extension__ __PRETTY_FUNCTION__
); }))
;
2823 if (parallel_count > 1)
2824 {
2825 ccv_nnc_stream_context_t* streams[parallel_count];
2826 ccv_nnc_stream_signal_t* signal;
2827 if (stream_context)
2828 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2829 for (j = 0; j < parallel_count; j++)
2830 {
2831 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2832 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2833 if (!dest || !src)
2834 {
2835 streams[j] = 0;
2836 continue;
2837 }
2838 // At the moment, can only handle them on the same device.
2839 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2839, __extension__ __PRETTY_FUNCTION__
); }))
;
2840 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2840, __extension__ __PRETTY_FUNCTION__
); }))
;
2841 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2842 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2843 int type = stream_type;
2844 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2845 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2846 // Wait signal to finish.
2847 if (stream_context)
2848 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2849 inputs[0] = outputs[0] = dest;
2850 inputs[1] = src;
2851 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2852 if (stream_context)
2853 {
2854 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2855 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2856 }
2857 streams[j] = stream_0;
2858 }
2859 // If this should be blocking, blocking it.
2860 if (!stream_context)
2861 for (j = 0; j < parallel_count; j++)
2862 if (streams[j])
2863 ccv_nnc_stream_context_wait(streams[j]);
2864 } else {
2865 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2866 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2866, __extension__
__PRETTY_FUNCTION__); }))
;
2867 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2868 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2868, __extension__
__PRETTY_FUNCTION__); }))
;
2869 inputs[0] = outputs[0] = dest;
2870 inputs[1] = src;
2871 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2872 }
2873 // Mark this symbol as init'ed.
2874 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2875 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2876 }
2877 ccv_array_free(to_parameter_indices);
2878 ccv_array_free(from_parameter_indices);
2879}
2880
2881void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2882{
2883 int to_param_ref;
2884 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2885 // To models.
2886 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2887 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2887, __extension__ __PRETTY_FUNCTION__
); }))
;
2888 // Tensor has to be inited already.
2889 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2889, __extension__ __PRETTY_FUNCTION__
); }))
;
2890 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2890, __extension__ __PRETTY_FUNCTION__
); }))
;
2891 // From models.
2892 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2893 const int to_parameter_size = to_compiled_data->parameters->rnum;
2894 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2895 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2895, __extension__ __PRETTY_FUNCTION__
); }))
;
2896 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2896, __extension__ __PRETTY_FUNCTION__
); }))
;
2897 int i, j;
2898 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2899 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2900 for (i = 0; i < aux_in_size; i++)
2901 inputs[i + 1] = aux_ins[i];
2902 for (i = 0; i < aux_out_size; i++)
2903 outputs[i + 1] = aux_outs[i];
2904 for (i = 0; i < rnum; i++)
2905 {
2906 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2907 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2907, __extension__ __PRETTY_FUNCTION__); }))
;
2908 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2908, __extension__ __PRETTY_FUNCTION__
); }))
;
2909 if (parallel_count > 1)
2910 {
2911 ccv_nnc_stream_context_t* streams[parallel_count];
2912 ccv_nnc_stream_signal_t* signal;
2913 if (stream_context)
2914 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2915 for (j = 0; j < parallel_count; j++)
2916 {
2917 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2918 if (!dest)
2919 {
2920 streams[j] = 0;
2921 continue;
2922 }
2923 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2924 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2925 int type = stream_type;
2926 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2927 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2928 // Wait signal to finish.
2929 if (stream_context)
2930 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2931 inputs[0] = outputs[0] = dest;
2932 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2933 if (stream_context)
2934 {
2935 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2936 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2937 }
2938 streams[j] = stream_0;
2939 }
2940 // If this should be blocking, blocking it.
2941 if (!stream_context)
2942 for (j = 0; j < parallel_count; j++)
2943 if (streams[j])
2944 ccv_nnc_stream_context_wait(streams[j]);
2945 } else {
2946 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2947 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2947, __extension__
__PRETTY_FUNCTION__); }))
;
2948 inputs[0] = outputs[0] = dest;
2949 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2950 }
2951 // No need to mark this symbol as init'ed, it is already.
2952 }
2953 ccv_array_free(to_parameter_indices);
2954}
2955
2956void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2957{
2958 int to_param_ref;
2959 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2960 // To models.
2961 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2962 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2962, __extension__ __PRETTY_FUNCTION__
); }))
;
2963 // Tensor has to be inited already.
2964 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2964, __extension__ __PRETTY_FUNCTION__
); }))
;
2965 ccv_nnc_tensor_t** tensor_gradients;
2966 if (to_compiled_data->backward.count > 1)
2967 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2968 else
2969 tensor_gradients = to_compiled_data->tensors.gradients;
2970 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2970, __extension__ __PRETTY_FUNCTION__
); }))
;
2971 // From models.
2972 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2973 const int to_parameter_size = to_compiled_data->parameters->rnum;
2974 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2975 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2975, __extension__ __PRETTY_FUNCTION__
); }))
;
2976 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2976, __extension__ __PRETTY_FUNCTION__
); }))
;
2977 int i, j;
2978 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2979 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2980 for (i = 0; i < aux_in_size; i++)
2981 inputs[i + 1] = aux_ins[i];
2982 for (i = 0; i < aux_out_size; i++)
2983 outputs[i + 1] = aux_outs[i];
2984 for (i = 0; i < rnum; i++)
2985 {
2986 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2987 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2987, __extension__ __PRETTY_FUNCTION__); }))
;
2988 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2988, __extension__ __PRETTY_FUNCTION__
); }))
;
2989 if (parallel_count > 1)
2990 {
2991 ccv_nnc_stream_context_t* streams[parallel_count];
2992 ccv_nnc_stream_signal_t* signal;
2993 if (stream_context)
2994 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2995 for (j = 0; j < parallel_count; j++)
2996 {
2997 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2998 if (!dest)
2999 {
3000 streams[j] = 0;
3001 continue;
3002 }
3003 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3004 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3005 int type = stream_type;
3006 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3007 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3008 // Wait signal to finish.
3009 if (stream_context)
3010 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3011 inputs[0] = outputs[0] = dest;
3012 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3013 if (stream_context)
3014 {
3015 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3016 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3017 }
3018 streams[j] = stream_0;
3019 }
3020 // If this should be blocking, blocking it.
3021 if (!stream_context)
3022 for (j = 0; j < parallel_count; j++)
3023 if (streams[j])
3024 ccv_nnc_stream_context_wait(streams[j]);
3025 } else {
3026 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
3027 if (!dest)
3028 continue;
3029 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3029, __extension__
__PRETTY_FUNCTION__); }))
;
3030 inputs[0] = outputs[0] = dest;
3031 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3032 }
3033 // No need to mark this symbol as init'ed, it is already.
3034 }
3035 ccv_array_free(to_parameter_indices);
3036}
3037
3038void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
3039{
3040 // Only CUDA backend has this feature.
3041#ifdef HAVE_CUDA1
3042 int to_param_ref;
3043 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3044 // To models.
3045 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3046 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3046, __extension__ __PRETTY_FUNCTION__); }))
;
3047 // Tensor has to be inited already.
3048 assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0)
, __extension__ ({ if (!!compiled_data->tensors_init.v) ; else
__assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c"
, 3048, __extension__ __PRETTY_FUNCTION__); }))
;
3049 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 3049, __extension__ __PRETTY_FUNCTION__
); }))
;
3050 // From models.
3051 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3052 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3053 int i;
3054 for (i = 0; i < rnum; i++)
3055 {
3056 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3057 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3057, __extension__ __PRETTY_FUNCTION__); }))
;
3058 assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3058, __extension__ __PRETTY_FUNCTION__
); }))
;
3059 if (parallel_count > 1)
3060 {
3061 assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect."
) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect."
) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\""
, "ccv_cnnp_model.c", 3061, __extension__ __PRETTY_FUNCTION__
); }))
;
3062 } else {
3063 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[dest_d]) & ~(uintptr_t)1))
;
3064 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 3064, __extension__
__PRETTY_FUNCTION__); }))
;
3065 ccv_nnc_tensor_param_t params = src->info;
3066 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY)
3067 continue;
3068 const size_t size = ccv_nnc_tensor_data_size(params);
3069 if (size <= 0)
3070 continue;
3071 const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1);
3072 const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0);
3073 ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t));
3074 tensor->dataof = 0;
3075 tensor->alias_ref = 0;
3076 tensor->sig = 0;
3077 tensor->refcount = 1;
3078 tensor->info = params;
3079 if (tfb)
3080 {
3081 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2];
3082 // This corresponding to mat->step
3083 tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype
) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12
] * (((((params.datatype) & 0xFF000) | params.dim[2])) &
0xFFF) + 3) & -4)
;
3084 } else // This won't be recognized by ccv_dense_matrix_t
3085 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000);
3086 // Remove this flag so it can be deallocated as usual.
3087 tensor->type &= ~CCV_NO_DATA_ALLOC;
3088 assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY
) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00
) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY"
, "ccv_cnnp_model.c", 3088, __extension__ __PRETTY_FUNCTION__
); }))
;
3089 void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size);
3090 if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
3091 {
3092 tensor->data.u8 = (uint8_t*)ptr;
3093 cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size);
3094 tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
3095 } else {
3096 // Allocation failed.
3097 ccfreefree(tensor);
3098 continue;
3099 }
3100 // TODO: Cannot run this on the stream context yet, due to allocation and deallocations.
3101 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0);
3102 compiled_data->tensors.parameters[dest_d] = tensor;
3103 // Can free out the old one.
3104 if (should_free)
3105 ccv_nnc_tensor_free(src);
3106 }
3107 // No need to mark this symbol as init'ed, it is already.
3108 }
3109 ccv_array_free(to_parameter_indices);
3110#endif
3111}
3112
3113ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
3114{
3115 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3116 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3116, __extension__ __PRETTY_FUNCTION__); }))
;
3117 return compiled_data->minimize.minimizer;
3118}
3119
3120void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
3121{
3122 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3123 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3123, __extension__ __PRETTY_FUNCTION__); }))
;
3124 const int parameter_size = compiled_data->parameters->rnum;
3125 if (parameter_size == 0)
3126 return;
3127 if (reset)
3128 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 3128, __extension__ __PRETTY_FUNCTION__
); }))
; }
3129 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3130 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
3131 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
3132 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
3133 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3134 // We update all parameters, at this point, we have one minimizer.
3135 if (set_parameters == 0 || set_parameter_size == 0)
3136 compiled_data->minimize.minimizer = minimizer;
3137 int i;
3138 if (set_parameters && set_parameter_size)
3139 {
3140 // I need to save what's the minimizer along with this.
3141 if (!compiled_data->minimize.parameters)
3142 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
3143 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
3144 set_minimizer_for_parameter->minimizer = minimizer;
3145 set_minimizer_for_parameter->parameter_size = set_parameter_size;
3146 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
3147 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
3148 }
3149 // If reset is true, clear the parameters array.
3150 if (reset && compiled_data->minimize.parameters)
3151 {
3152 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3153 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3154 ccv_array_clear(compiled_data->minimize.parameters);
3155 }
3156 if (!compiled_data->update_nodes)
3157 return;
3158 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
3159 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 3159, __extension__ __PRETTY_FUNCTION__); }))
;
3160 if (saved_aux_size > old_max_saved_aux_size)
3161 {
3162 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 3162, __extension__ __PRETTY_FUNCTION__
); }))
;
3163 // Reallocate first, move them around later.
3164 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
3165 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
3166 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
3167 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
3168 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
3169 }
3170 int flag = 0;
3171 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3172 if (set_parameters && set_parameter_size)
3173 {
3174 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
3175 for (i = 0; i < set_parameter_size; i++)
3176 {
3177 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
3178 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 3178, __extension__ __PRETTY_FUNCTION__
); }))
;
3179 const int old_rnum = parameter_indices->rnum;
3180 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
3181 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
3182 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 3182, __extension__ __PRETTY_FUNCTION__
); }))
;
3183 if (param_ref >= 0)
3184 {
3185 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 3185, __extension__ __PRETTY_FUNCTION__
); }))
;
3186 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
3187 parameter_indices->rnum = old_rnum + 1;
3188 }
3189 }
3190 // We may have duplicated indices, but that is OK, we will set it twice.
3191 for (i = 0; i < parameter_indices->rnum; i++)
3192 {
3193 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
3194 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
3195 flag = 1;
3196 }
3197 ccv_array_free(parameter_indices);
3198 } else {
3199 for (i = 0; i < parameter_size; i++)
3200 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
3201 flag = 1;
3202 if (compiled_data->minimize.parameters)
3203 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
3204 flag = 1;
3205 }
3206 if (flag)
3207 {
3208 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
3209 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
3210 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3211 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3212 }
3213}
3214
3215void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
3216{
3217 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3218 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3218, __extension__ __PRETTY_FUNCTION__); }))
;
3219 compiled_data->compile_params = compile_params;
3220}
3221
3222void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
3223{
3224 if (model->graph && out_size > 0)
3225 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
3226 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
3227 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
3228 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
3229 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
3230 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
3231 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
3232}
3233
3234void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
3235{
3236 if (model->graph)
3237 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
3238}
3239
3240static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
3241{
3242 int i;
3243 const int parameter_size = compiled_data->parameters->rnum;
3244 ccv_array_free(compiled_data->parameters);
3245 if (compiled_data->parameter_flags)
3246 ccfreefree(compiled_data->parameter_flags);
3247 const int internal_size = compiled_data->internals->rnum;
3248 ccv_array_free(compiled_data->internals);
3249 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 3249, __extension__ __PRETTY_FUNCTION__
); }))
;
3250 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 3250, __extension__ __PRETTY_FUNCTION__
); }))
;
3251 for (i = 0; i < parameter_size; i++)
3252 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3253 ccv_array_free(compiled_data->ids.parameters);
3254 for (i = 0; i < internal_size; i++)
3255 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3256 ccv_array_free(compiled_data->ids.internals);
3257 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3258 if (compiled_data->tensors.parameters)
3259 {
3260 for (i = 0; i < parameter_size * parallel_count; i++)
3261 // If it is not marked as not belonging, we can free it.
3262 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3263 if (compiled_data->tensors.parameters[i])
3264 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3265 for (i = 0; i < internal_size * parallel_count; i++)
3266 if (compiled_data->tensors.internals[i])
3267 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3268 ccfreefree(compiled_data->tensors.parameters);
3269 }
3270 if (compiled_data->tensors.gradients)
3271 {
3272 for (i = 0; i < parameter_size * parallel_count; i++)
3273 {
3274 if (compiled_data->tensors.gradients[i])
3275 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3276 if (compiled_data->tensors.accum_gradients[i])
3277 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3278 }
3279 ccfreefree(compiled_data->tensors.gradients);
3280 }
3281 if (compiled_data->minimize.parameters)
3282 {
3283 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3284 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3285 ccv_array_free(compiled_data->minimize.parameters);
3286 }
3287 if (compiled_data->rewindables)
3288 ccv_array_free(compiled_data->rewindables);
3289 if (compiled_data->tensors_init.v)
3290 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3291 if (compiled_data->evaluate.tos)
3292 ccfreefree(compiled_data->evaluate.tos);
3293 compiled_data->evaluate.tos = 0;
3294 if (compiled_data->stream_map)
3295 {
3296 khiter_t k;
3297 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3298 {
3299 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3300 continue;
3301 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3302 ccv_nnc_stream_context_free(stream);
3303 }
3304 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3305 }
3306 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3307 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3308 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3309 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3310 if (compiled_data->gradient_checkpoints)
3311 {
3312 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3313 {
3314 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3315 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3315, __extension__ __PRETTY_FUNCTION__
); }))
;
3316 ccfreefree(checkpoint->inputs);
3317 ccv_array_free(checkpoint->tensor_symbols);
3318 }
3319 ccv_array_free(compiled_data->gradient_checkpoints);
3320 }
3321 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3322 ccfreefree(compiled_data);
3323}
3324
3325void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3326{
3327 ccv_cnnp_model_deinit(model);
3328 if (model->isa->dealloc)
3329 model->isa->dealloc(model);
3330 if (model->io)
3331 {
3332 int i;
3333 for (i = 0; i < model->io->rnum; i++)
3334 {
3335 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3336 if (model_io->outgoings)
3337 ccv_array_free(model_io->outgoings);
3338 if (model_io->incomings)
3339 ccv_array_free(model_io->incomings);
3340 if (model_io->dependencies)
3341 ccv_array_free(model_io->dependencies);
3342 ccfreefree(model_io);
3343 }
3344 ccv_array_free(model->io);
3345 }
3346 if (model->parameter_indices)
3347 ccv_array_free(model->parameter_indices);
3348 if (model->inputs)
3349 ccfreefree(model->inputs);
3350 if (model->graph)
3351 ccv_nnc_symbolic_graph_free(model->graph);
3352 if (model->compiled_data)
3353 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3354 if (model->name)
3355 ccfreefree(model->name);
3356 ccfreefree(model);
3357}
3358
3359void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3360{
3361 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3362 if (!compiled_data)
3363 return;
3364 if (compiled_data->graph)
3365 ccv_nnc_graph_cancel(compiled_data->graph);
3366 if (compiled_data->apply_gradients.graph)
3367 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3368}
3369
3370void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3371{
3372 model->exec_flags = flags;
3373}
3374
3375int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3376{
3377 return model->exec_flags;
3378}