Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2761, column 13
Array access (via field 'vals') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2025-12-18-173245-1246484-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7#ifdef HAVE_CUDA1
8#include "gpu/ccv_nnc_compat.h"
9#endif
10
11// MARK - Level-5 API
12
13ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
14{
15 if (!model->io)
16 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
17 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
18 model_io->param_ref = 0;
19 model_io->param_sel = 0;
20 model_io->visit = 0;
21 model_io->model = model;
22 model_io->dependencies = 0;
23 model_io->dependents = 0;
24 model_io->outgoings = 0;
25 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
26 ccv_array_push(model->io, &model_io);
27 if (input_size > 0)
28 {
29 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
30 ccv_array_resize(model_io->incomings, input_size);
31 int i;
32 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
33 for (i = 0; i < input_size; i++)
34 {
35 if (!inputs[i]->outgoings)
36 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
37 ccv_array_push(inputs[i]->outgoings, &model_io);
38 }
39 } else {
40 model_io->incomings = 0;
41 }
42 return model_io;
43}
44
45void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
46{
47 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 47, __extension__ __PRETTY_FUNCTION__);
}))
;
48 if (!model_io->dependencies)
49 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
50 int i, j;
51 for (i = 0; i < dependency_size; i++)
52 {
53 int flag = 0;
54 // Check if it is already exist or not.
55 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
56 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
57 flag = 1;
58 if (flag)
59 continue;
60 ccv_array_push(model_io->dependencies, dependencies + i);
61 ++dependencies[i]->dependents;
62 }
63}
64
65int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
66{
67 return model->output_size;
68}
69
70int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
71{
72 // If the model is compiled, it is default to 1 unless it is not.
73 if (model->compiled_data)
74 return model->is_trainable >= 0 ? model->is_trainable : 1;
75 return model->is_trainable;
76}
77
78ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
79{
80 if (!model->io)
81 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
82 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
83 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
84 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
85 model_io->visit = 0;
86 model_io->model = model;
87 model_io->outputs = 0;
88 model_io->dependencies = 0;
89 model_io->dependents = 0;
90 model_io->incomings = 0;
91 model_io->outgoings = 0;
92 ccv_array_push(model->io, &model_io);
93 return model_io;
94}
95
96void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
97{
98 model->notify_hook.func = func;
99 model->notify_hook.context = context;
100}
101
102void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
103{
104 if (model->notify_hook.func)
105 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
106 if (model->isa->notify)
107 model->isa->notify(model, tag, payload);
108}
109
110static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
111{
112 int i, j;
113 for (i = 0; i < graph_exec_symbol_size; i++)
114 {
115 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
116 // Check whether this tensor symbol has any duplicate.
117 for (j = i + 1; j < graph_exec_symbol_size;)
118 {
119 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
120 // If there is a same tensor symbol, remove it.
121 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
122 {
123 if (j + 1 < graph_exec_symbol_size)
124 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
125 --graph_exec_symbol_size;
126 continue;
127 }
128 ++j;
129 }
130 }
131 return graph_exec_symbol_size;
132}
133
134void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
135{
136 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
137 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
138 int i;
139 if (add_to_array_context->add_parameter_indices && !model->parameter_indices)
140 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
141 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
142 {
143 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
144 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
145 {
146 // Only add to parameter_indices if it is trainable.
147 if (add_to_array_context->add_parameter_indices)
148 ccv_array_add_unique_int(model->parameter_indices, i);
149 // Found it, return, don't add it.
150 return;
151 }
152 }
153 // Only add to parameter_indices if it is trainable.
154 if (add_to_array_context->add_parameter_indices)
155 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
156 // This is a new one, no need to add_unique_int, it is unique.
157 ccv_array_push(add_to_array_context->symbols, &symbol);
158 if (add_to_array_context->trainables)
159 ccv_array_push(add_to_array_context->trainables, &is_trainable);
160 char id[2048];
161 id[0] = add_to_array_context->prefix;
162 id[1] = '-';
163 int total_len = 2;
164 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
165 {
166 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
167 int len;
168 if (name->name && name->name[0] != '\0')
169 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
170 else
171 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
172 total_len += len;
173 if (total_len >= 2047)
174 break;
175 }
176 if (total_len < 2047)
177 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
178 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 178, __extension__ __PRETTY_FUNCTION__)
; }))
;
179 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
180 memcpy(heap_id, id, total_len + 1);
181 ccv_array_push(add_to_array_context->ids, &heap_id);
182 ++add_to_array_context->sequence->it;
183}
184
185static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
186{
187 compiled_data->f = compiled_data->fits + output_size;
188 compiled_data->xpu_alloc.mp_hdr = -1;
189 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
190 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
191 compiled_data->gradient_checkpoints = gradient_checkpoints;
192}
193
194typedef struct {
195 void* old_graph_exec_symbol_new_hook_context;
196 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
197 ccv_nnc_symbolic_graph_t* graph;
198 ccv_cnnp_model_build_data_t* build_data;
199} ccv_cnnp_model_set_exec_flags_context_t;
200
201static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
202{
203 ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context;
204 if (flags_context->build_data->exec_flags)
205 ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags);
206 if (flags_context->old_graph_exec_symbol_new_hook)
207 flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
208}
209
210static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
211{
212 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 212, __extension__ __PRETTY_FUNCTION__); }))
;
213 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
214 int i;
215 for (i = 0; i < input_size; i++)
216 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
217 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
218 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
219 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
220 ccv_cnnp_model_sequence_t model_sequence = {
221 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
222 };
223 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
224 .add_parameter_indices = 1,
225 .prefix = 't',
226 .sequence = &model_sequence,
227 .symbols = parameters,
228 .ids = parameter_ids,
229 .trainables = parameter_trainables,
230 };
231 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
232 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
233 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
234 .add_parameter_indices = 0,
235 .prefix = 'r',
236 .sequence = &model_sequence,
237 .symbols = internals,
238 .ids = internal_ids,
239 .trainables = 0,
240 };
241 ccv_cnnp_model_build_data_t build_data = {
242 .exec_flags = 0,
243 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
244 .model_sequence = &model_sequence,
245 .add_to_array = ccv_cnnp_model_add_to_array,
246 .parameters = parameters,
247 .context = {
248 .add_to_parameter = &add_to_parameter_context,
249 .add_to_output = &add_to_output_context,
250 },
251 .gradient_checkpoints = 0,
252 };
253 model->data = &build_data;
254 ccv_cnnp_model_set_exec_flags_context_t flags_context = {
255 .graph = model->graph,
256 .build_data = &build_data,
257 .old_graph_exec_symbol_new_hook = 0,
258 .old_graph_exec_symbol_new_hook_context = 0
259 };
260 flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook);
261 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
262 // Reset back to previous hook.
263 ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0);
264 for (i = 0; i < model->output_size; i++)
265 {
266 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
267 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
268 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
269 continue;
270 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
271 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
272 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
273 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
274 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
275 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
276 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
277 }
278 model->data = 0;
279 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
280 if (model_sequence.sequences)
281 ccv_array_free(model_sequence.sequences);
282 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
283 int not_trainables = 0;
284 // Assert no parameter is alias.
285 for (i = 0; i < parameters->rnum; i++)
286 {
287 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
288 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
289 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 289, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
290 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
291 not_trainables = 1;
292 }
293 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 293, __extension__ __PRETTY_FUNCTION__)
; }))
;
294 uint64_t* parameter_flags = 0;
295 if (not_trainables)
296 {
297 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
298 for (i = 0; i < parameter_trainables->rnum; i++)
299 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
300 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
301 }
302 ccv_array_free(parameter_trainables);
303 // Assert no internal is alias.
304 for (i = 0; i < internals->rnum; i++)
305 {
306 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
307 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
308 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 308, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
309 }
310 const int output_size = model->output_size;
311 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
312 const int parameters_rnum = parameters->rnum;
313 if (input_size > 0)
314 {
315 ccv_array_resize(parameters, parameters_rnum + input_size);
316 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
317 }
318 ccv_nnc_symbolic_graph_simplify(model->graph,
319 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
320 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
321 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
322 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
323 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
324 model->outputs, output_size,
325 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
326 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
327 // Size it down.
328 parameters->rnum = parameters_rnum;
329 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
330 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
331 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
332 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 332, __extension__ __PRETTY_FUNCTION__)
; }))
;
333 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
334 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
335 compiled_data->loss = loss;
336 if (loss.cmd == CCV_NNC_NOOP)
337 {
338 // If no loss function provided, there is no fits.
339 for (i = 0; i < output_size; i++)
340 {
341 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
342 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
343 if (alias_to.d < 0)
344 compiled_data->f[i] = model->outputs[i];
345 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
346 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
347 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
348 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
349 int j;
350 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
351 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 351, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
352 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
353 }
354 }
355 } else {
356 for (i = 0; i < output_size; i++)
357 {
358 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
359 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
360 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
361 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
362 }
363 }
364 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
365 ccv_nnc_symbolic_graph_simplify(model->graph,
366 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
367 0, 0, // No need to provide binds at this point.
368 compiled_data->f, model->output_size,
369 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
370 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
371 // If inputs are from GPU, stream type is GPU.
372 compiled_data->parameters = parameters;
373 compiled_data->parameter_flags = parameter_flags;
374 compiled_data->internals = internals;
375 compiled_data->ids.parameters = parameter_ids;
376 compiled_data->ids.internals = internal_ids;
377 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
378}
379
380static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
381{
382 ccv_array_t* const stack = (ccv_array_t*)context;
383 ccv_array_push(stack, &symbol.d);
384}
385
386static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
387{
388 const ccv_nnc_tensor_symbol_t src_symbol = {
389 .d = src_index,
390 .graph = src_graph
391 };
392 const ccv_nnc_tensor_symbol_t dest_symbol = {
393 .d = dest_index,
394 .graph = dest_graph
395 };
396 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
397 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
398 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
399 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
400 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
401 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
402}
403
404static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
405{
406 const ccv_nnc_tensor_symbol_t src_symbol = {
407 .d = src_index,
408 .graph = src_graph
409 };
410 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
411 const ccv_nnc_tensor_symbol_t dest_symbol = {
412 .d = dest_index,
413 .graph = dest_graph
414 };
415 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
416 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
417}
418
419static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
420static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
421
422typedef struct {
423 int parallel_count;
424 ccv_nnc_symbolic_graph_t* graph;
425 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
426} ccv_nnc_graph_exec_update_t;
427
428static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
429{
430 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
431 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
432 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
433 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
434 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
435 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
436 const int parallel_count = graph_exec_update->parallel_count;
437 int i;
438 for (i = 1; i < parallel_count; i++)
439 {
440 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
441 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
442 {
443 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
444 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
445 }
446 }
447}
448
449void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
450{
451 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 451, __extension__ __PRETTY_FUNCTION__); }))
;
452 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 452, __extension__ __PRETTY_FUNCTION__)
; }))
;
453 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 453, __extension__ __PRETTY_FUNCTION__); }))
;
454 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
455 init->graph = ccv_nnc_symbolic_graph_new();
456 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
457 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
458 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
459 init->parallel_count = model->parallel_count;
460 init->memory_compression = model->memory_compression;
461 init->memory_reduction = model->memory_reduction;
462 init->gradient_checkpointing = model->gradient_checkpointing;
463 init->compiled_data->stream_type = model->compiled_data->stream_type;
464 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
465 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
466 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
467 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
468 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
469 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
470 int i, j;
471 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
472 for (i = 0; i < compiled_data->parameters->rnum; i++)
473 {
474 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
475 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 475, __extension__ __PRETTY_FUNCTION__)
; }))
;
476 }
477 for (i = 0; i < compiled_data->internals->rnum; i++)
478 {
479 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
480 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 480, __extension__ __PRETTY_FUNCTION__)
; }))
;
481 }
482 // Update inputs.
483 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 483, __extension__ __PRETTY_FUNCTION__)
; }))
;
484 for (i = 0; i < model->input_size; i++)
485 if (model->inputs[i].d >= 0)
486 {
487 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 487, __extension__ __PRETTY_FUNCTION__)
; }))
;
488 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
489 }
490 // Update outputs.
491 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 491, __extension__ __PRETTY_FUNCTION__)
; }))
;
492 for (i = 0; i < model->output_size; i++)
493 {
494 if (model->outputs[i].d >= 0)
495 {
496 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 496, __extension__
__PRETTY_FUNCTION__); }))
;
497 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
498 }
499 if (model->outputs[i].d != model->compiled_data->f[i].d)
500 {
501 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__)
; }))
;
502 if (model->compiled_data->f[i].d >= 0)
503 {
504 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 504, __extension__ __PRETTY_FUNCTION__)
; }))
;
505 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
506 }
507 }
508 }
509 // Go through the graph to set tensor on matching symbols
510 for (i = 0; i < stack->rnum; i++)
511 {
512 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
513 // If exceed range, skip.
514 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
515 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
516 continue;
517 const ccv_nnc_graph_exec_symbol_t src_symbol = {
518 .d = d,
519 .graph = init->graph
520 };
521 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
522 .d = d,
523 .graph = model->graph
524 };
525 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
526 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
527 // If the name doesn't match, skip.
528 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
529 continue;
530 // Now get all the inputs and outputs, if matches, set them.
531 const int* src_inputs;
532 int src_input_size;
533 const int* src_outputs;
534 int src_output_size;
535 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
536 const int* dest_inputs;
537 int dest_input_size;
538 const int* dest_outputs;
539 int dest_output_size;
540 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
541 // We may have unmatched input / output size because this is the minimizer and it has
542 // different saved_aux (for example, when we shrunk with CMD_NOOP).
543 if (src_input_size != dest_input_size)
544 continue;
545 if (src_output_size != dest_output_size)
546 continue;
547 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
548 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
549 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
550 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
551 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
552 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
553 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
554 // a new exec symbol.
555 for (j = 0; j < src_input_size; j++)
556 if (src_inputs[j] >= 0)
557 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
558 for (j = 0; j < src_output_size; j++)
559 if (src_outputs[j] >= 0)
560 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
561 }
562 ccv_array_free(stack);
563 // After this, we get all tensors in the model graph resolved through tensor_auto.
564 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
565 // Verify symbols we get matches.
566 const int parameter_size = compiled_data->parameters->rnum;
567 for (i = 0; i < parameter_size; i++)
568 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 568, __extension__ __PRETTY_FUNCTION__)
; }))
; }
569 const int internal_size = compiled_data->internals->rnum;
570 for (i = 0; i < internal_size; i++)
571 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 571, __extension__ __PRETTY_FUNCTION__)
; }))
; }
572 // Go through compiled data.
573 if (compiled_data->tensor_arena)
574 {
575 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
576 if (flag == 0 && compiled_data->graph_exec_arena)
577 {
578 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
579 // Since we will reinit, if we previously set is_test, we need to set it again.
580 if (compiled_data->is_test)
581 {
582 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
583 ccv_nnc_graph_exec_update_t update = {
584 .parallel_count = parallel_count,
585 .graph = model->graph,
586 .graph_exec_arena = compiled_data->graph_exec_arena,
587 };
588 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
589 }
590 } else
591 // Free-up tensor arena & graph exec arena.
592 _ccv_cnnp_compiled_data_graph_free(compiled_data);
593 }
594 // There are other compiled graphs, for accum and apply gradients.
595 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
596 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
597 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
598 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
599 // That is why we don't update these compiled graphs at all this point.
600 // Free the model, we've already "absorbed" it.
601 ccv_cnnp_model_free(init);
602}
603
604void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
605{
606 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 606, __extension__ __PRETTY_FUNCTION__)
; }))
;
607 if (model->input_size == 0)
608 model->input_size = input_size;
609 if (!model->graph) // The graph is not compiled yet.
610 {
611 model->graph = ccv_nnc_symbolic_graph_new();
612 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
613 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 613, __extension__ __PRETTY_FUNCTION__)
; }))
;
614 int i, flag = 0;
615 for (i = 0; !flag && i < input_size; i++)
616 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
617 // If inputs are from GPU, stream type is GPU.
618 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
619 model->compiled_data->minimize.minimizer = minimizer;
620 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
621 } else {
622 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
623 // And then absorb the "new model" to the old one.
624 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
625 ccv_cnnp_model_absorb(model, init, inputs, input_size);
626 // Reset minimizer.
627 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
628 }
629}
630
631ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
632{
633 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
634 new_model->is_trainable = is_trainable;
635 return new_model;
636}
637
638void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
639{
640 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 640, __extension__ __PRETTY_FUNCTION__); }))
;
641 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 641, __extension__ __PRETTY_FUNCTION__)
; }))
;
642 ccv_nnc_symbolic_graph_t* const graph = model->graph;
643 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
644 int i;
645 for (i = 0; i < output_size; i++)
646 {
647 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 647, __extension__ __PRETTY_FUNCTION__)
; }))
;
648 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
649 }
650}
651
652void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
653{
654 if (workspace_size == model->workspace_size)
655 return;
656 model->workspace_size = workspace_size;
657 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
658 if (compiled_data && compiled_data->graph)
659 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
660}
661
662size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
663{
664 return model->workspace_size;
665}
666
667void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
668{
669 if (parallel == 0)
670 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
671 else
672 model->parallel_count = parallel;
673 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
674 if (compiled_data)
675 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 675, __extension__ __PRETTY_FUNCTION__)
; }))
; }
676}
677
678void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
679{
680 model->max_stream_count = max_stream_count;
681 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
682 if (compiled_data)
683 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 683, __extension__ __PRETTY_FUNCTION__)
; }))
; }
684}
685
686void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
687{
688 model->memory_compression = memory_compression;
689 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
690 if (compiled_data)
691 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 691, __extension__ __PRETTY_FUNCTION__)
; }))
; }
692}
693
694void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
695{
696 model->memory_reduction = memory_reduction;
697 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
698 if (compiled_data)
699 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 699, __extension__ __PRETTY_FUNCTION__)
; }))
; }
700}
701
702void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
703{
704 model->gradient_checkpointing = gradient_checkpointing;
705}
706
707int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
708{
709 return model->gradient_checkpointing;
710}
711
712typedef struct {
713 int parallel_count;
714 ccv_nnc_symbolic_graph_t* graph;
715 ccv_cnnp_compiled_data_t* compiled_data;
716 ccv_nnc_tensor_arena_t* tensor_arena;
717} ccv_nnc_tensor_init_states_t;
718
719static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
720{
721 int i;
722 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
723 for (i = 0; i < compiled_data->parameters->rnum; i++)
724 {
725 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
726 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
727 return 1;
728 }
729 for (i = 0; i < compiled_data->internals->rnum; i++)
730 {
731 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
732 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
733 return 1;
734 }
735 return 0;
736}
737
738static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
739{
740 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
741 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
742 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
743 if (!output_tensor)
744 return;
745 const int d = output_symbol.d;
746 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 746, __extension__ __PRETTY_FUNCTION__)
; }))
;
747 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
748 if (init_v[d >> 5] & (1u << (d & 0x1f)))
749 return;
750 init_v[d >> 5] |= (1u << (d & 0x1f));
751 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
752 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
753 const int parallel_count = tensor_init_states->parallel_count;
754 int i;
755 for (i = 1; i < parallel_count; i++)
756 {
757 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
758 if (copy)
759 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
760 }
761}
762
763// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
764// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
765static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
766{
767 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 767, __extension__ __PRETTY_FUNCTION__); }))
;
768 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 768, __extension__ __PRETTY_FUNCTION__)
; }))
;
769 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
770 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 770, __extension__
__PRETTY_FUNCTION__); }))
;
771 int i;
772 for (i = 0; i < compiled_data->rewindables->rnum; i++)
773 {
774 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
775 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
776 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
777 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
778 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
779 }
780 ccv_array_clear(compiled_data->rewindables);
781 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
782}
783
784static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
785{
786 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
787 .type = CCV_CNNP_REWIND_TENSOR,
788 .tensor = symbol
789 };
790 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
791 ccv_array_push(rewind_symbols, &rewind_symbol);
792}
793
794static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
795{
796 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
797 .type = CCV_CNNP_REWIND_TENSOR,
798 .tensor = symbol
799 };
800 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
801 ccv_array_push(rewind_symbols, &rewind_symbol);
802}
803
804static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
805{
806 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
807 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
808 .graph_exec = symbol
809 };
810 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
811 ccv_array_push(rewind_symbols, &rewind_symbol);
812}
813
814static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
815{
816 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
817 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
818 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
819 int i;
820 for (i = 1; i < parallel_count; i++)
821 {
822 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
823 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
824 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
825 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
826 }
827}
828
829static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
830{
831 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 831, __extension__ __PRETTY_FUNCTION__); }))
;
832 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 832, __extension__ __PRETTY_FUNCTION__); }))
;
833 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
834 int i;
835 for (i = 1; i < parallel_count; i++)
836 {
837 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
838 if (copy_symbol.graph)
839 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
840 }
841 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
842 if (graph_exec_arena)
843 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
844 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
845 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
846 if (gradient_graph_exec_arena)
847 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
848}
849
850static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
851{
852 int this_parameter_flag = 0;
853 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
854 return this_parameter_flag;
855 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
856 int j, k;
857 // For no-op, we can preserve previous saved_aux_size.
858 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
859 {
860 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
861 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
862 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
863 // make sure some model parameters don't update if we don't want them to.
864 int old_saved_aux_size;
865 if (old_minimizer.cmd == CCV_NNC_NOOP)
866 {
867 int input_size;
868 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
869 if (input_size < 2) // This is not legit.
870 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
871 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
872 old_saved_aux_size = input_size - 2;
873 } else
874 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
875 if (old_saved_aux_size != saved_aux_size)
876 {
877 this_parameter_flag = 1;
878 if (saved_aux_size > old_saved_aux_size)
879 {
880 // Allocate new tensor symbols.
881 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
882 for (j = old_saved_aux_size; j < saved_aux_size; j++)
883 {
884 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
885 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
886 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
887 for (k = 1; k < parallel_count; k++)
888 {
889 ccv_nnc_tensor_param_t dev_info = info;
890 if (k != device_id)
891 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
892 else
893 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
894 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
895 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
896 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
897 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
898 }
899 }
900 } else {
901 for (j = saved_aux_size; j < old_saved_aux_size; j++)
902 {
903 for (k = 1; k < parallel_count; k++)
904 {
905 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
906 if (src_copy.d >= 0)
907 {
908 ccv_nnc_tensor_symbol_free(graph, src_copy);
909 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
910 }
911 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
912 if (dest_copy.d >= 0)
913 {
914 ccv_nnc_tensor_symbol_free(graph, dest_copy);
915 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
916 }
917 }
918 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
919 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
920 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
921 }
922 }
923 }
924 }
925 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
926 if (this_parameter_flag)
927 {
928 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
929 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
930 const int* inputs = 0;
931 int input_size = 0;
932 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
933 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 933, __extension__ __PRETTY_FUNCTION__)
; }))
;
934 update_inputs[0].d = inputs[0];
935 update_inputs[0].graph = graph;
936 update_inputs[1].d = inputs[1];
937 update_inputs[1].graph = graph;
938 update_outputs[0] = updated_parameters[parameter_indice];
939 for (j = 0; j < saved_aux_size; j++)
940 {
941 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
942 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
943 }
944 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
945 for (k = 1; k < parallel_count; k++)
946 {
947 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
948 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 948, __extension__ __PRETTY_FUNCTION__); }))
;
949 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
950 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 950, __extension__ __PRETTY_FUNCTION__)
; }))
;
951 update_inputs[0].d = inputs[0];
952 update_inputs[0].graph = graph;
953 update_inputs[1].d = inputs[1];
954 update_inputs[1].graph = graph;
955 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
956 for (j = 0; j < saved_aux_size; j++)
957 {
958 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
959 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
960 }
961 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
962 }
963 }
964 return this_parameter_flag;
965}
966
967typedef struct {
968 int parameter_size;
969 ccv_nnc_cmd_t minimizer;
970 ccv_cnnp_model_io_t parameters[1];
971} ccv_cnnp_set_minimizer_for_parameter_t;
972
973static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
974{
975 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
976 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 976, __extension__ __PRETTY_FUNCTION__); }))
;
977 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
978 // We update all parameters, at this point, we have one minimizer.
979 const int parameter_size = compiled_data->parameters->rnum;
980 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
981 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
982 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 982, __extension__ __PRETTY_FUNCTION__); }))
;
983 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
984 ccv_array_t* const parameters = compiled_data->minimize.parameters;
985 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
986 int i, j, flag = 0;
987 for (i = 0; i < parameters->rnum; i++)
988 {
989 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
990 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
991 {
992 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
993 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 993, __extension__ __PRETTY_FUNCTION__)
; }))
;
994 const int old_rnum = parameter_indices->rnum;
995 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
996 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
997 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__)
; }))
;
998 if (param_ref >= 0)
999 {
1000 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1000, __extension__ __PRETTY_FUNCTION__
); }))
;
1001 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
1002 parameter_indices->rnum = old_rnum + 1;
1003 }
1004 }
1005 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
1006 // We may have duplicated indices, but that is OK, we will set it twice.
1007 for (j = 0; j < parameter_indices->rnum; j++)
1008 {
1009 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
1010 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 1010, __extension__ __PRETTY_FUNCTION__
); }))
;
1011 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
1012 flag = 1;
1013 }
1014 ccv_array_clear(parameter_indices);
1015 }
1016 ccv_array_free(parameter_indices);
1017 return flag;
1018}
1019
1020static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
1021{
1022 if (new_saved_aux_size == old_saved_aux_size)
1023 return;
1024 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 1024, __extension__ __PRETTY_FUNCTION__
); }))
;
1025 int i, j;
1026 for (i = parameter_size - 1; i >= 0; i--)
1027 {
1028 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
1029 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1030 for (j = old_saved_aux_size - 1; j >= 0; j--)
1031 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1032 }
1033}
1034
1035static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1036{
1037 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1038 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1038, __extension__ __PRETTY_FUNCTION__); }))
;
1039 if (!compiled_data->rewindables)
1040 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1041 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1042 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1043 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1044}
1045
1046static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1047{
1048 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1049 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__
); }))
;
1050 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1050, __extension__ __PRETTY_FUNCTION__
); }))
;
1051 const int evaluate_to_size = compiled_data->evaluate.to_size;
1052 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1052, __extension__ __PRETTY_FUNCTION__
); }))
;
1053 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1054 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1055 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1056 int i, j;
1057 const int output_size = model->output_size;
1058 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1058, __extension__ __PRETTY_FUNCTION__
); }))
;
1059 if (fits)
1060 for (i = 0; i < output_size; i++)
1061 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1062 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1063 const int parameter_size = compiled_data->parameters->rnum;
1064 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1065 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1066 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1067 int parameter_size_maybe_more = parameter_size;
1068 compiled_data->disable_outgrad = disable_outgrad;
1069 int outgrad_size;
1070 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1071 outgrad_size = 0;
1072 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1073 outgrad_size = model->input_size;
1074 else {
1075 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1075, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1076 outgrad_size = 0;
1077 for (i = 0; i < model->input_size; i++)
1078 if (!(disable_outgrad & ((uint64_t)1 << i)))
1079 ++outgrad_size;
1080 }
1081 compiled_data->outgrad_size = outgrad_size;
1082 parameter_size_maybe_more += outgrad_size;
1083 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1084 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1085 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1086 compiled_data->backward.to_size = parameter_size_maybe_more;
1087 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1088 if (compiled_data->parameter_flags)
1089 {
1090 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1091 for (i = 0; i < parameter_size; i++)
1092 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1093 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1094 else
1095 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1096 }
1097 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1098 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1099 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1100 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1101 else { // Compute minimize with gradients including selected inputs.
1102 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1102, __extension__ __PRETTY_FUNCTION__
); }))
;
1103 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1103, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1104 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1104, __extension__ __PRETTY_FUNCTION__
); }))
;
1105 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1106 j = 0;
1107 for (i = 0; i < model->input_size; i++)
1108 if (!(disable_outgrad & ((uint64_t)1 << i)))
1109 outgrads[j++] = model->inputs[i];
1110 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1111 }
1112 if (compiled_data->parameter_flags)
1113 ccfreefree(parameters);
1114 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1115 if (compiled_data->minimize.parameters)
1116 _ccv_cnnp_apply_parameters_with_minimizer(model);
1117 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1118 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1119 for (i = 0; i < output_size; i++)
1120 {
1121 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1122 // Init this to 1 so we can backprop.
1123 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1124 }
1125 compiled_data->backward.to_size = 0;
1126 for (i = 0; i < parameter_size_maybe_more; i++)
1127 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1128 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1129 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1130 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1131 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1132 {
1133 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1134 continue;
1135 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1136 const int* tos;
1137 int to_size;
1138 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1139 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1140 {
1141 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1142 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1143 int flag = 0;
1144 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1145 for (j = i - 1; !flag && j >= 0; j--)
1146 if (j + outgrad_destination_start < destination_count)
1147 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1148 if (!flag) // Only if we cannot find it, we add it.
1149 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1150 }
1151 }
1152 if (parallel_count > 1)
1153 {
1154 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1155 0, 0,
1156 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1157 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1158 0, 0, 0,
1159 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1160 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1161 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1162 for (i = 0; i < evaluate_to_size; i++)
1163 for (j = 1; j < parallel_count; j++)
1164 {
1165 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1166 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1167 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1168 }
1169 const int backward_to_size = compiled_data->backward.to_size;
1170 for (i = 0; i < backward_to_size; i++)
1171 for (j = 1; j < parallel_count; j++)
1172 {
1173 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1174 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1175 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1176 }
1177 }
1178 // Only use memory compression if we are in gradient parameter mode.
1179 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1180 {
1181 if (model->memory_compression)
1182 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1183 if (model->memory_reduction)
1184 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1185 }
1186 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1187 compiled_data->gradient_mode = gradient_mode;
1188}
1189
1190void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1191{
1192 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1192, __extension__ __PRETTY_FUNCTION__
); }))
;
1193 const int parameter_size = compiled_data->parameters->rnum;
1194 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1195 const int internal_size = compiled_data->internals->rnum;
1196 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1197 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1198 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1199 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1200}
1201
1202int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1203{
1204 int i, j;
1205 const int parameter_size = compiled_data->parameters->rnum;
1206 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1207 const int internal_size = compiled_data->internals->rnum;
1208 for (i = 0; i < parameter_size; i++)
1209 {
1210 // parameters has to be allocated all together.
1211 if (compiled_data->tensors.parameters[i])
1212 {
1213 for (j = 1; j < parallel_count; j++)
1214 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1214, __extension__ __PRETTY_FUNCTION__
); }))
; }
1215 continue;
1216 }
1217 return 1;
1218 }
1219 for (i = 0; i < internal_size; i++)
1220 {
1221 if (!compiled_data->tensors.internals[i])
1222 return 1;
1223 for (j = 1; j < parallel_count; j++)
1224 if (!compiled_data->tensors.internals[i + j * internal_size])
1225 return 1;
1226 }
1227 return 0;
1228}
1229
1230void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1231{
1232 int i, j;
1233 const int parameter_size = compiled_data->parameters->rnum;
1234 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1235 const int internal_size = compiled_data->internals->rnum;
1236 for (i = 0; i < parameter_size; i++)
1237 {
1238 // parameters has to be allocated all together.
1239 if (compiled_data->tensors.parameters[i])
1240 {
1241 for (j = 1; j < parallel_count; j++)
1242 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1242, __extension__ __PRETTY_FUNCTION__
); }))
; }
1243 continue;
1244 }
1245 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1246 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1247 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1248 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1249 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1250 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1251 for (j = 1; j < parallel_count; j++)
1252 {
1253 if (j != device_id)
1254 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1255 else
1256 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1257 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1258 }
1259 }
1260 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1261 for (i = 0; i < internal_size; i++)
1262 {
1263 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1264 const int d = retained.d;
1265 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1266 continue;
1267 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1268 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1269 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1270 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1271 if (!compiled_data->tensors.internals[i])
1272 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1273 for (j = 1; j < parallel_count; j++)
1274 {
1275 if (j != device_id)
1276 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1277 else
1278 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1279 if (!compiled_data->tensors.internals[i + j * internal_size])
1280 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1281 }
1282 }
1283 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1284}
1285
1286static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1287{
1288 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1289 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1290}
1291
1292static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1293{
1294 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1294, __extension__ __PRETTY_FUNCTION__
); }))
;
1295 int i, j;
1296 for (i = 0; i < tensor_size; i++)
1297 {
1298 if (!tensors[i])
1299 continue;
1300 const int d = tensor_symbols[i].d;
1301 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1302 continue;
1303 for (j = 1; j < parallel_count; j++)
1304 if (tensors[i + j * tensor_size])
1305 {
1306 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1307 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1308 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1309 }
1310 }
1311}
1312
1313static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1314{
1315 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1315, __extension__ __PRETTY_FUNCTION__
); }))
;
1316 int i, j;
1317 for (i = 0; i < tensor_size; i++)
1318 {
1319 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1320 for (j = 1; j < parallel_count; j++)
1321 {
1322 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1323 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1324 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1325 { // We shouldn't allocate this, free it up.
1326 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1327 tensors[i + j * tensor_size] = 0;
1328 }
1329 }
1330 }
1331}
1332
1333static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1334{
1335 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1335, __extension__ __PRETTY_FUNCTION__
); }))
;
1336 int i, j;
1337 for (i = 0; i < tensor_size; i++)
1338 {
1339 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1340 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1341 continue;
1342 if (graph)
1343 {
1344 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1345 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1346 tensor_symbol = alias_to;
1347 }
1348 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1349 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1350 {
1351 const ccv_nnc_tensor_bind_t retained_bind = {
1352 .symbol = tensor_symbol,
1353 .tensor = tensor
1354 };
1355 ccv_array_push(tensor_binds, &retained_bind);
1356 }
1357 for (j = 1; j < parallel_count; j++)
1358 {
1359 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1360 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1361 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1362 {
1363 const ccv_nnc_tensor_bind_t bind = {
1364 .symbol = copy,
1365 .tensor = tensors[i + j * tensor_size]
1366 };
1367 ccv_array_push(tensor_binds, &bind);
1368 }
1369 }
1370 }
1371}
1372
1373static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1374{
1375 if (compiled_data->graph)
1376 ccv_nnc_graph_free(compiled_data->graph);
1377 compiled_data->graph = 0;
1378 compiled_data->is_test = 0;
1379 if (compiled_data->tensor_arena)
1380 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1381 compiled_data->tensor_arena = 0;
1382 if (compiled_data->graph_exec_arena)
1383 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1384 compiled_data->graph_exec_arena = 0;
1385 if (compiled_data->backward.from_ops)
1386 ccfreefree(compiled_data->backward.from_ops);
1387 compiled_data->backward.from_ops = 0;
1388 if (compiled_data->evaluate.schedule)
1389 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1390 compiled_data->evaluate.schedule = 0;
1391 if (compiled_data->backward.schedule)
1392 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1393 compiled_data->backward.schedule = 0;
1394}
1395
1396static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1397{
1398 if (compiled_data->gradients)
1399 ccfreefree(compiled_data->gradients);
1400 compiled_data->gradients = 0;
1401 if (compiled_data->updated_parameters)
1402 ccfreefree(compiled_data->updated_parameters);
1403 compiled_data->updated_parameters = 0;
1404 compiled_data->update_nodes = 0;
1405 compiled_data->saved_aux = 0;
1406}
1407
1408static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1409{
1410 if (compiled_data->backward.gradients)
1411 ccfreefree(compiled_data->backward.gradients);
1412 compiled_data->backward.gradients = 0;
1413 if (compiled_data->backward.accum)
1414 ccv_nnc_graph_free(compiled_data->backward.accum);
1415 compiled_data->backward.accum = 0;
1416 if (compiled_data->backward.tensor_arena)
1417 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1418 compiled_data->backward.tensor_arena = 0;
1419 if (compiled_data->backward.graph_exec_arena)
1420 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1421 compiled_data->backward.graph_exec_arena = 0;
1422}
1423
1424static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1425{
1426 if (compiled_data->apply_gradients.graph)
1427 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1428 compiled_data->apply_gradients.graph = 0;
1429 if (compiled_data->apply_gradients.tensor_arena)
1430 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1431 compiled_data->apply_gradients.tensor_arena = 0;
1432 if (compiled_data->apply_gradients.graph_exec_arena)
1433 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1434 compiled_data->apply_gradients.graph_exec_arena = 0;
1435}
1436
1437// Compile the graph to run ccv_cnnp_model_fit
1438static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1439{
1440 int i, j;
1441 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1442 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1442, __extension__ __PRETTY_FUNCTION__
); }))
;
1443 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1444 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1445 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1445, __extension__ __PRETTY_FUNCTION__
); }))
;
1446 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1446
, __extension__ __PRETTY_FUNCTION__); }))
;
1447 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1447, __extension__ __PRETTY_FUNCTION__
); }))
;
1448 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1449 {
1450 _ccv_cnnp_model_set_rewindables(model);
1451 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1452 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1453 _ccv_cnnp_model_rewind_graph(model);
1454 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1455 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1456 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1457 }
1458 const int tensors_init = !!compiled_data->tensors_init.v;
1459 if (!tensors_init)
1460 _ccv_cnnp_model_tensors_init(model, compiled_data);
1461 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1462 // Check if it is not fully allocated, if it is not, init_1.
1463 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1464 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1465 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1465, __extension__ __PRETTY_FUNCTION__); }))
;
1466 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1466, __extension__ __PRETTY_FUNCTION__); }))
;
1467 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1467
, __extension__ __PRETTY_FUNCTION__); }))
;
1468 const int input_size_per_p = input_size / parallel_count;
1469 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1470 const int output_size_per_p = output_size / parallel_count;
1471 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1472 const int fit_size_per_p = fit_size / parallel_count;
1473 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1474 const int parameter_size = compiled_data->parameters->rnum;
1475 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1476 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1477 const int internal_size = compiled_data->internals->rnum;
1478 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1479 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1480 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1481 ccv_array_free(tensor_binds);
1482 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1483 if (tensors_init && parallel_count > 1)
1484 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1485 // If tensor is not init'ed, we need to init states first.
1486 if (_ccv_cnnp_any_to_init(compiled_data))
1487 {
1488 ccv_nnc_tensor_init_states_t tensor_init_states = {
1489 .parallel_count = parallel_count,
1490 .graph = model->graph,
1491 .compiled_data = compiled_data,
1492 .tensor_arena = compiled_data->tensor_arena
1493 };
1494 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1495 }
1496 compiled_data->is_test = 0;
1497 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1498 // No need to set because it is default to training mode.
1499 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1500 for (i = 0; i < saved_aux_size * parameter_size; i++)
1501 {
1502 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1503 continue;
1504 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1505 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1506 for (j = 1; j < parallel_count; j++)
1507 {
1508 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1509 if (copy)
1510 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1511 }
1512 }
1513 const int evaluate_to_size = compiled_data->evaluate.to_size;
1514 compiled_data->evaluate.to_op_size = 0;
1515 for (i = 0; i < evaluate_to_size; i++)
1516 {
1517 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1518 if (to.graph)
1519 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1520 }
1521 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1522 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1523}
1524
1525ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1526{
1527 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1528 if (!compiled_data || !compiled_data->graph)
1529 return 0;
1530 return ccv_nnc_graph_default_stream(compiled_data->graph);
1531}
1532
1533uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1534{
1535 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1536 if (!compiled_data || !compiled_data->tensor_arena)
1537 return 0;
1538 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1539}
1540
1541static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1542{
1543 int i, j;
1544 for (i = 0; i < tensor_size; i++)
1545 {
1546 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1547 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1548 continue;
1549 if (graph)
1550 {
1551 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1552 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1553 tensor_symbol = alias_to;
1554 }
1555 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1556 for (j = 1; j < parallel_count; j++)
1557 {
1558 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1559 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1560 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1561 }
1562 }
1563}
1564
1565void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1566{
1567 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1568 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1568, __extension__ __PRETTY_FUNCTION__); }))
;
1569 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1570 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1570, __extension__ __PRETTY_FUNCTION__
); }))
;
1571 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1571, __extension__ __PRETTY_FUNCTION__
); }))
;
1572 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1572
, __extension__ __PRETTY_FUNCTION__); }))
;
1573 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1573, __extension__ __PRETTY_FUNCTION__); }))
;
1574 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1575 {
1576 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1577 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1578 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1579 // Compile the symbolic graph down only when needed.
1580 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1581 } else {
1582 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1582, __extension__ __PRETTY_FUNCTION__); }))
;
1583 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1583, __extension__ __PRETTY_FUNCTION__); }))
;
1584 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1584
, __extension__ __PRETTY_FUNCTION__); }))
;
1585 const int input_size_per_p = input_size / parallel_count;
1586 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1587 const int output_size_per_p = output_size / parallel_count;
1588 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1589 const int fit_size_per_p = fit_size / parallel_count;
1590 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1591 }
1592 if (compiled_data->is_test)
1593 {
1594 compiled_data->is_test = 0;
1595 ccv_nnc_graph_exec_update_t update = {
1596 .parallel_count = parallel_count,
1597 .graph = model->graph,
1598 .graph_exec_arena = compiled_data->graph_exec_arena,
1599 };
1600 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1601 }
1602 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1603}
1604
1605// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1606static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1607{
1608 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1609 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1610 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1611 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1611, __extension__ __PRETTY_FUNCTION__
); }))
;
1612 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1612, __extension__ __PRETTY_FUNCTION__
); }))
;
1613 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1614 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1615 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1616 {
1617 const int evaluate_to_size = compiled_data->evaluate.to_size;
1618 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1619 _ccv_cnnp_model_set_rewindables(model);
1620 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1621 0, 0,
1622 0, 0, 0,
1623 0, 0, 0,
1624 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1625 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1626 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1627 int i, j;
1628 for (i = 0; i < evaluate_to_size; i++)
1629 for (j = 1; j < parallel_count; j++)
1630 {
1631 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1632 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1633 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1634 }
1635 }
1636 const int tensors_init = !!compiled_data->tensors_init.v;
1637 if (!tensors_init)
1638 _ccv_cnnp_model_tensors_init(model, compiled_data);
1639 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1640 // Check if it is not fully allocated, if it is not, init_1.
1641 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1642 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1643 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1643, __extension__ __PRETTY_FUNCTION__); }))
;
1644 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1644, __extension__ __PRETTY_FUNCTION__); }))
;
1645 const int input_size_per_p = input_size / parallel_count;
1646 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1647 const int output_size_per_p = output_size / parallel_count;
1648 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1649 const int parameter_size = compiled_data->parameters->rnum;
1650 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1651 const int internal_size = compiled_data->internals->rnum;
1652 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1653 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1654 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1655 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1656 ccv_array_free(tensor_binds);
1657 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1658 // If tensor is not init'ed, we need to init states first.
1659 if (tensors_init && parallel_count > 1)
1660 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1661 if (_ccv_cnnp_any_to_init(compiled_data))
1662 {
1663 ccv_nnc_tensor_init_states_t tensor_init_states = {
1664 .parallel_count = parallel_count,
1665 .graph = model->graph,
1666 .compiled_data = compiled_data,
1667 .tensor_arena = compiled_data->tensor_arena
1668 };
1669 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1670 }
1671 compiled_data->is_test = 1;
1672 ccv_nnc_graph_exec_update_t update = {
1673 .parallel_count = parallel_count,
1674 .graph = model->graph,
1675 .graph_exec_arena = compiled_data->graph_exec_arena,
1676 };
1677 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1678 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1679 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1680}
1681
1682static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1683{
1684 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1684, __extension__ __PRETTY_FUNCTION__
); }))
;
1685 const int parameter_size = compiled_data->parameters->rnum;
1686 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1687 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1688 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1689 int i, j;
1690 for (i = 0; i < parameter_size; i++)
1691 {
1692 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1693 {
1694 compiled_data->tensors.gradients[i] = 0;
1695 compiled_data->tensors.accum_gradients[i] = 0;
1696 for (j = 1; j < parallel_count; j++)
1697 {
1698 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1699 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1700 }
1701 continue;
1702 }
1703 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1704 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1705 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1706 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1707 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1708 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1709 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1710 for (j = 1; j < parallel_count; j++)
1711 {
1712 if (j != device_id)
1713 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1714 else
1715 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1716 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1717 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1718 }
1719 }
1720}
1721
1722static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1723{
1724 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1725 return 1;
1726 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1727 return 0;
1728 int i;
1729 for (i = 0; i < input_size; i++)
1730 if (!(disable_outgrad & ((uint64_t)1 << i)))
1731 return 0;
1732 return 1;
1733}
1734
1735// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1736// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1737static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1738{
1739 int i, j;
1740 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1741 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1742 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1742, __extension__ __PRETTY_FUNCTION__
); }))
;
1743 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1744 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1745 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1745, __extension__ __PRETTY_FUNCTION__
); }))
;
1746 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1746, __extension__ __PRETTY_FUNCTION__
); }))
;
1747 // There shouldn't be a loss function if we evaluate with multistage jit.
1748 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1748, __extension__ __PRETTY_FUNCTION__
); }))
;
1749 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1750 {
1751 _ccv_cnnp_model_set_rewindables(model);
1752 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1753 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1754 _ccv_cnnp_model_rewind_graph(model);
1755 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1756 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1757 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1758 }
1759 const int tensors_init = !!compiled_data->tensors_init.v;
1760 if (!tensors_init)
1761 _ccv_cnnp_model_tensors_init(model, compiled_data);
1762 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1763 // Check if it is not fully allocated, if it is not, init_1.
1764 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1765 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1766 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1766, __extension__ __PRETTY_FUNCTION__); }))
;
1767 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1767, __extension__ __PRETTY_FUNCTION__); }))
;
1768 const int input_size_per_p = input_size / parallel_count;
1769 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1770 const int output_size_per_p = output_size / parallel_count;
1771 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1772 const int parameter_size = compiled_data->parameters->rnum;
1773 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1774 const int internal_size = compiled_data->internals->rnum;
1775 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1776 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1777 if (!compiled_data->tensors.gradients)
1778 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1779 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1780 if (compiled_data->backward.to_size > 0)
1781 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1782 else
1783 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1784 ccv_array_free(tensor_binds);
1785 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1786 if (tensors_init && parallel_count > 1)
1787 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1788 // If tensor is not init'ed, we need to init states first.
1789 if (_ccv_cnnp_any_to_init(compiled_data))
1790 {
1791 ccv_nnc_tensor_init_states_t tensor_init_states = {
1792 .parallel_count = parallel_count,
1793 .graph = model->graph,
1794 .compiled_data = compiled_data,
1795 .tensor_arena = compiled_data->tensor_arena
1796 };
1797 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1798 }
1799 compiled_data->is_test = is_test;
1800 ccv_nnc_graph_exec_update_t update = {
1801 .parallel_count = parallel_count,
1802 .graph = model->graph,
1803 .graph_exec_arena = compiled_data->graph_exec_arena,
1804 };
1805 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1806 const int evaluate_to_size = compiled_data->evaluate.to_size;
1807 compiled_data->evaluate.to_op_size = 0;
1808 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1809 for (i = 0; i < evaluate_to_size; i++)
1810 {
1811 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1812 if (to_op.graph)
1813 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1814 const int* tos;
1815 int to_size;
1816 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1817 for (j = 0; j < to_size; j++)
1818 {
1819 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1820 .d = tos[j],
1821 .graph = model->graph
1822 });
1823 if (to_op.graph)
1824 ccv_array_add_unique_int(backward_from, to_op.d);
1825 }
1826 }
1827 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1827, __extension__
__PRETTY_FUNCTION__); }))
;
1828 compiled_data->backward.from_op_size = backward_from->rnum;
1829 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1830 for (i = 0; i < backward_from->rnum; i++)
1831 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1832 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1833 .graph = compiled_data->graph,
1834 };
1835 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1836 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1837 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1838 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1839 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1840 const int source_size = compiled_data->graph->sources->rnum;
1841 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1841, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1842 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1843 visited[(idx >> 5)] |= (1u << (idx & 31));
1844 } ccv_nnc_graph_visit_endfor} }
1845 ccv_nnc_graph_visit_free(visit);
1846 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1847 const int destination_size = compiled_data->graph->destinations->rnum;
1848 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1848, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1849 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1850 visited[(idx >> 5)] |= (1u << (idx & 31));
1851 } ccv_nnc_graph_visit_endfor} }
1852 ccv_nnc_graph_visit_free(visit);
1853 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1854 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1855 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1856 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1857 {
1858 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1858, __extension__ __PRETTY_FUNCTION__
); }))
;
1859 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1860 ccv_array_add_unique_int(backward_from, idx);
1861 }
1862 } ccv_nnc_graph_visit_endfor} }
1863 ccv_nnc_graph_visit_free(visit);
1864 ccfreefree(visited);
1865 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1866 {
1867 compiled_data->backward.from_op_size = backward_from->rnum;
1868 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1869 for (i = 0; i < backward_from->rnum; i++)
1870 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1871 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1872 .graph = compiled_data->graph,
1873 };
1874 }
1875 ccv_array_free(backward_from);
1876 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1877 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1878}
1879
1880void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1881{
1882 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1883 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1883, __extension__ __PRETTY_FUNCTION__); }))
;
1884 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1885 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1885, __extension__ __PRETTY_FUNCTION__
); }))
;
1886 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1886, __extension__ __PRETTY_FUNCTION__
); }))
;
1887 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1887, __extension__ __PRETTY_FUNCTION__); }))
;
1888 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1889 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1890 if (!compiled_data->graph || mode_mismatch)
1891 {
1892 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1893 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1894 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1895 if (params.requires_grad)
1896 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1897 else
1898 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1899 } else {
1900 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1901 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1901, __extension__ __PRETTY_FUNCTION__); }))
;
1902 const int input_size_per_p = input_size / parallel_count;
1903 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1904 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1904, __extension__ __PRETTY_FUNCTION__); }))
;
1905 const int output_size_per_p = output_size / parallel_count;
1906 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1907 }
1908 if (compiled_data->is_test != params.is_test)
1909 {
1910 compiled_data->is_test = params.is_test;
1911 ccv_nnc_graph_exec_update_t update = {
1912 .parallel_count = parallel_count,
1913 .graph = model->graph,
1914 .graph_exec_arena = compiled_data->graph_exec_arena,
1915 };
1916 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1917 }
1918}
1919
1920void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1921{
1922 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1923 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1923, __extension__ __PRETTY_FUNCTION__); }))
;
1924 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1925 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1926 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1927 else {
1928 if (!compiled_data->evaluate.schedule)
1929 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1930 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1931 }
1932}
1933
1934// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1935// Particularly, this method compiles the accumulator graph.
1936static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1937{
1938 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1939 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1939, __extension__ __PRETTY_FUNCTION__); }))
;
1940 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1940, __extension__ __PRETTY_FUNCTION__
); }))
;
1941 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1942 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1943 const int parameter_size = compiled_data->parameters->rnum;
1944 int i, j;
1945 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1946 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1947 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1948 for (i = 0; i < parameter_size; i++)
1949 for (j = 0; j < parallel_count; j++)
1950 if (compiled_data->tensors.gradients[i + j * parameter_size])
1951 {
1952 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1953 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1954 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1955 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1956 ccv_nnc_tensor_symbol_t inputs[2];
1957 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1958 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1959 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1960 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1961 } else {
1962 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1963 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1964 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1965 }
1966 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1967 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1968 {
1969 ccv_nnc_symbolic_graph_free(accum);
1970 // Create empty graph.
1971 compiled_data->backward.accum = ccv_nnc_graph_new();
1972 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1973 return;
1974 }
1975 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1976 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1977 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1978 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1979 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1980 ccv_nnc_symbolic_graph_free(accum);
1981 ccv_array_free(tensor_binds);
1982 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1983}
1984
1985void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1986{
1987 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1988 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1988, __extension__ __PRETTY_FUNCTION__); }))
;
1989 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1989, __extension__ __PRETTY_FUNCTION__
); }))
;
1990 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1991 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1991, __extension__ __PRETTY_FUNCTION__
); }))
;
1992 if (outgrad_size > 0)
1993 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1993, __extension__ __PRETTY_FUNCTION__
); }))
; }
1994 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1994, __extension__ __PRETTY_FUNCTION__); }))
;
1995 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1995, __extension__ __PRETTY_FUNCTION__
); }))
;
1996 const int parameter_size = compiled_data->parameters->rnum;
1997 // If we need to accumulate the gradients now, do jit on accumulator.
1998 if (compiled_data->backward.count > 0)
1999 {
2000 if (!compiled_data->backward.accum)
2001 _ccv_cnnp_model_multistage_jit_1(model);
2002 else if (compiled_data->backward.count == 1) {
2003 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
2004 int i;
2005 for (i = 0; i < parameter_size * parallel_count; i++)
2006 {
2007 ccv_nnc_tensor_t* tensor;
2008 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
2009 }
2010 if (compiled_data->backward.tensor_arena)
2011 {
2012 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
2013 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
2014 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
2015 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2016 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
2017 }
2018 }
2019 }
2020 const int ingrad_size_per_p = model->output_size;
2021 const int outgrad_size_per_p = compiled_data->outgrad_size;
2022 int i, j;
2023 for (i = 0; i < ingrad_size_per_p; i++)
2024 {
2025 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
2026 if (!ingrad_size || !ingrads || ingrads[i] == 0)
2027 {
2028 // Set it to 1 if it is not specified.
2029 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
2030 if (ingrad_tensor)
2031 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2032 for (j = 1; j < parallel_count; j++)
2033 {
2034 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2035 if (ingrad_tensor)
2036 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2037 }
2038 } else {
2039 // Make sure the length matches, in case it is an alias.
2040 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2040, __extension__ __PRETTY_FUNCTION__
); }))
;
2041 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2042 for (j = 1; j < parallel_count; j++)
2043 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2044 }
2045 }
2046 if (outgrad_size > 0)
2047 {
2048 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2048, __extension__ __PRETTY_FUNCTION__
); }))
;
2049 for (i = 0; i < outgrad_size_per_p; i++)
2050 if (outgrads[i])
2051 {
2052 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2053 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2054 for (j = 1; j < parallel_count; j++)
2055 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2056 }
2057 } else {
2058 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__
); }))
2059 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__
); }))
;
2060 }
2061 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2062 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2063 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2064 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2065 if (!compiled_data->backward.schedule)
2066 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2067 // Run the backward pass.
2068 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2069 // If we need to run accumulation round, do that now.
2070 if (compiled_data->backward.count > 0)
2071 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2072 // Update the count, this determines whether we need to accumulate or not.
2073 ++compiled_data->backward.count;
2074}
2075
2076// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2077// Particularly, this method compiles the parameter update graph.
2078static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2079{
2080 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2081 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2081, __extension__ __PRETTY_FUNCTION__
); }))
;
2082 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2083 const int parameter_size = compiled_data->parameters->rnum;
2084 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2085 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2086 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2087 // Bind accumulated gradients.
2088 if (compiled_data->backward.count > 1)
2089 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2090 else
2091 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2092 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2093 int i, j;
2094 for (i = 0; i < compiled_data->backward.to_size; i++)
2095 {
2096 const int* tos;
2097 int to_size;
2098 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2099 for (j = 0; j < to_size; j++)
2100 {
2101 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2102 // gradients graph.
2103 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2104 .d = tos[j],
2105 .graph = model->graph,
2106 });
2107 if (!exec.graph)
2108 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2109 }
2110 }
2111 const int from_size = apply_gradients_from->rnum;
2112 if (from_size == 0)
2113 {
2114 ccv_array_free(apply_gradients_from);
2115 ccv_array_free(tensor_binds);
2116 return;
2117 }
2118 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2119 for (i = 0; i < from_size; i++)
2120 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2121 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2122 .graph = model->graph
2123 };
2124 ccv_array_free(apply_gradients_from);
2125 // It can only ends with updates on the parameters.
2126 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2127 for (i = 0; i < parameter_size; i++)
2128 {
2129 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2130 continue;
2131 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2132 for (j = 1; j < parallel_count; j++)
2133 {
2134 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2135 ccv_array_push(tos, &copy);
2136 }
2137 }
2138 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2139 ccv_array_free(tos);
2140 ccv_array_free(tensor_binds);
2141 ccfreefree(froms);
2142 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2143 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2144 {
2145 // Skip on no tensor.
2146 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2147 continue;
2148 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2149 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2150 for (j = 1; j < parallel_count; j++)
2151 {
2152 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2153 if (copy)
2154 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2155 }
2156 }
2157 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2158}
2159
2160void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2161{
2162 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2163 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2163, __extension__ __PRETTY_FUNCTION__); }))
;
2164 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2164, __extension__ __PRETTY_FUNCTION__
); }))
;
2165 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2166 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2166, __extension__ __PRETTY_FUNCTION__); }))
;
2167 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2167, __extension__ __PRETTY_FUNCTION__
); }))
;
2168 // Skip if there is no backward pass.
2169 if (compiled_data->backward.count <= 0)
2170 return;
2171 // Skip if there is no parameters.
2172 if (compiled_data->parameters->rnum == 0)
2173 {
2174 compiled_data->backward.count = 0;
2175 return;
2176 }
2177 if (!compiled_data->apply_gradients.graph)
2178 _ccv_cnnp_model_multistage_jit_2(model);
2179 else {
2180 const int parameter_size = compiled_data->parameters->rnum;
2181 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2182 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2183 if (compiled_data->backward.count > 1)
2184 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2185 else
2186 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2187 }
2188 if (compiled_data->apply_gradients.graph)
2189 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2190 // Reset backward count to 0.
2191 compiled_data->backward.count = 0;
2192}
2193
2194void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2195{
2196 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2197 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2198 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2198, __extension__ __PRETTY_FUNCTION__
); }))
;
2199 const int tensors_init = !!compiled_data->tensors_init.v;
2200 int this_tensor_init = tensors_init;
2201 if (!tensors_init)
2202 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2203 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2204 // Check if it is not fully allocated, if it is not, init_1.
2205 this_tensor_init = 0;
2206 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2207 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2208 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2209 if (param_ref < 0)
2210 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2210
, __extension__ __PRETTY_FUNCTION__); }))
; }
2211 else
2212 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2212, __extension__ __PRETTY_FUNCTION__
); }))
; }
2213 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2214 ccv_array_free(parameter_indices);
2215 const int parameter_size = compiled_data->parameters->rnum;
2216 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2216
, __extension__ __PRETTY_FUNCTION__); }))
;
2217 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2217, __extension__ __PRETTY_FUNCTION__
); }))
;
2218 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2219 int i;
2220 if (!this_tensor_init)
2221 {
2222 if (compiled_data->tensors.parameters[d])
2223 {
2224 for (i = 1; i < parallel_count; i++)
2225 { assert(compiled_data->tensors.parameters[d + i * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[d + i *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[d + i * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[d + i * parameter_size]"
, "ccv_cnnp_model.c", 2225, __extension__ __PRETTY_FUNCTION__
); }))
; }
2226 this_tensor_init = 1;
2227 } else {
2228 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
;
2229 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2230 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2231 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2232 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2233 compiled_data->tensors.parameters[d] = ccv_nnc_tensor_new(0, info, 0);
2234 for (i = 1; i < parallel_count; i++)
2235 {
2236 if (i != device_id)
2237 CCV_TENSOR_SET_DEVICE_ID(info.type, i)(info.type) = (((info.type) & ~0xfff00) | (((i) & 0xfff
) << 8))
;
2238 else
2239 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2240 compiled_data->tensors.parameters[d + i * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2241 }
2242 }
2243 }
2244 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2245 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2245, __extension__
__PRETTY_FUNCTION__); }))
;
2246 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2247 for (i = 1; i < parallel_count; i++)
2248 {
2249 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2250 if (copy_tensor)
2251 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2252 }
2253 // Mark this symbol as init'ed.
2254 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2255 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2256 init_v[s >> 5] |= (1u << (s & 0x1f));
2257 // If we just allocated this tensor, now it is time to check if we need to mark it as fully allocated.
2258 if (!this_tensor_init)
2259 {
2260 if (ccv_cnnp_model_tensors_any_to_alloc(model, compiled_data))
2261 compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)compiled_data->tensors_init.v | (uintptr_t)1);
2262 else // Remove the flag.
2263 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2264 }
2265}
2266
2267void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2268{
2269 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2270 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2271 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2271, __extension__ __PRETTY_FUNCTION__
); }))
;
2272 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2272, __extension__ __PRETTY_FUNCTION__
); }))
;
2273 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2274 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2275 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2276 if (param_ref < 0)
2277 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2277
, __extension__ __PRETTY_FUNCTION__); }))
; }
2278 else
2279 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2279, __extension__ __PRETTY_FUNCTION__
); }))
; }
2280 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2281 ccv_array_free(parameter_indices);
2282 const int parameter_size = compiled_data->parameters->rnum;
2283 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2283
, __extension__ __PRETTY_FUNCTION__); }))
;
2284 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2284, __extension__ __PRETTY_FUNCTION__
); }))
;
2285 // We don't need to consider parallel_count, every parameter on each device is identical.
2286 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2287 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2287, __extension__
__PRETTY_FUNCTION__); }))
;
2288 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2289}
2290
2291ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2292{
2293 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2294 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2295 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2295, __extension__ __PRETTY_FUNCTION__
); }))
;
2296 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2296, __extension__ __PRETTY_FUNCTION__
); }))
;
2297 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2298 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2299 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2300 if (param_ref < 0)
2301 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2301
, __extension__ __PRETTY_FUNCTION__); }))
; }
2302 else
2303 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2303, __extension__ __PRETTY_FUNCTION__
); }))
; }
2304 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2305 ccv_array_free(parameter_indices);
2306 const int parameter_size = compiled_data->parameters->rnum;
2307 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2307
, __extension__ __PRETTY_FUNCTION__); }))
;
2308 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2308, __extension__ __PRETTY_FUNCTION__
); }))
;
2309 // We don't need to consider parallel_count, every parameter on each device is identical.
2310 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2311 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2311, __extension__
__PRETTY_FUNCTION__); }))
;
2312 return tensor->info;
2313}
2314
2315const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2316{
2317 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2318 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2319 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2319, __extension__ __PRETTY_FUNCTION__
); }))
;
2320 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2321 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2322 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2323 if (param_ref < 0)
2324 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2324
, __extension__ __PRETTY_FUNCTION__); }))
; }
2325 else
2326 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2326, __extension__ __PRETTY_FUNCTION__
); }))
; }
2327 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2328 ccv_array_free(parameter_indices);
2329 const int parameter_size = compiled_data->parameters->rnum;
2330 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2330
, __extension__ __PRETTY_FUNCTION__); }))
;
2331 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2331, __extension__ __PRETTY_FUNCTION__
); }))
;
2332 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2333}
2334
2335int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2336{
2337 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2337, __extension__ __PRETTY_FUNCTION__
); }))
;
2338 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2339 return compiled_data->parameters->rnum;
2340}
2341
2342uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model)
2343{
2344 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2344, __extension__ __PRETTY_FUNCTION__
); }))
;
2345 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2346 const int parameter_size = compiled_data->parameters->rnum;
2347 int i;
2348 const ccv_nnc_symbolic_graph_t* const graph = model->graph;
2349 uint64_t size = 0;
2350 const int tensors_init = !!compiled_data->tensors_init.v;
2351 uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
: 0;
2352 for (i = 0; i < parameter_size; i++)
2353 {
2354 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2355 if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i])
2356 {
2357 ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info;
2358 size += ccv_nnc_tensor_data_size(params);
2359 continue;
2360 }
2361 ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){
2362 .graph = graph,
2363 .d = d
2364 });
2365 size += ccv_nnc_tensor_data_size(params);
2366 }
2367 return size;
2368}
2369
2370int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type)
2371{
2372 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2372, __extension__ __PRETTY_FUNCTION__
); }))
;
2373 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2374 if (count != compiled_data->parameters->rnum)
2375 return 0;
2376 if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2377 CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) <<
8))
;
2378 int i;
2379 // We don't need to consider parallel_count, every parameter on each device is identical.
2380 for (i = 0; i < count; i++)
2381 {
2382 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2383 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2384 {
2385 tensors[i] = 0;
2386 continue;
2387 }
2388 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2389 if (tensor->info.type == type)
2390 tensors[i] = tensor;
2391 else {
2392 ccv_nnc_tensor_param_t info = tensor->info;
2393 info.type = type;
2394 tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet.
2395 }
2396 }
2397 for (i = 0; i < count; i++)
2398 {
2399 ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i];
2400 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2401 continue;
2402 tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1)
)
;
2403 // Now initiate transfer. We should do this one on a stream.
2404 if (tensor->info.type != type)
2405 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2406 }
2407 // Copy names and remove parameters.
2408 for (i = 0; i < count; i++)
2409 {
2410 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i];
2411 if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything.
2412 {
2413 names[i] = 0;
2414 continue;
2415 }
2416 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2417 const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof
(1023) _b = (1023); (_a < _b) ? _a : _b; })
;
2418 names[i] = ccmallocmalloc(name_len + 1);
2419 names[i][name_len] = 0;
2420 memcpy(names[i], name, name_len);
2421 if (tensor->info.type == type)
2422 compiled_data->tensors.parameters[i] = 0; // Only move when it is moved.
2423 }
2424 return 1;
2425}
2426
2427KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
25
Null pointer value stored to field 'vals'
2428
2429void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates)
2430{
2431 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2431, __extension__ __PRETTY_FUNCTION__
); }))
;
2432 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2433 int i;
2434 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2435 if (count != compiled_data->parameters->rnum)
2436 {
2437 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2438 // Build the map between name and the index.
2439 for (i = 0; i < count; i++)
2440 {
2441 int ret;
2442 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret);
2443 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2443
, __extension__ __PRETTY_FUNCTION__); }))
;
2444 kh_val(id_map, k)((id_map)->vals[k]) = i;
2445 }
2446 }
2447 const int parameter_size = compiled_data->parameters->rnum;
2448 int* copy_back = 0;
2449 const int tensors_init = !!compiled_data->tensors_init.v;
2450 if (!tensors_init)
2451 ccv_cnnp_model_tensors_init_0(model, compiled_data);
2452 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2453 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2454 for (i = 0; i < parameter_size; i++)
2455 {
2456 int j = i;
2457 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2458 if (i >= 0 || strncmp(name, names[i], 1023) != 0)
2459 {
2460 // Build the map.
2461 if (id_map == 0)
2462 {
2463 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2464 for (j = 0; j < count; j++)
2465 {
2466 int ret;
2467 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret);
2468 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2468
, __extension__ __PRETTY_FUNCTION__); }))
;
2469 kh_val(id_map, k)((id_map)->vals[k]) = j;
2470 }
2471 }
2472 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name);
2473 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2474 continue;
2475 j = kh_val(id_map, k)((id_map)->vals[k]);
2476 }
2477 if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read.
2478 { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters
[i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t
)compiled_data->tensors.parameters[i] & (uintptr_t)1))
; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)"
, "ccv_cnnp_model.c", 2478, __extension__ __PRETTY_FUNCTION__
); }))
; }
2479 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
2480 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
2481 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
2482 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2483 const int d = parameter.d;
2484 if (info.type == tensors[j]->info.type && invalidates) // Can move.
2485 {
2486 // Deallocate it if needed.
2487 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2488 if (compiled_data->tensors.parameters[i])
2489 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2490 compiled_data->tensors.parameters[i] = tensors[j];
2491 tensors[j] = 0;
2492 } else {
2493 if (!compiled_data->tensors.parameters[i])
2494 { // Not allocated, to allocate first.
2495 // Create new one, make sure we create this by having the right parameters.
2496 const int type = info.type;
2497 info = tensors[j]->info;
2498 info.type = type; // Revert back the type.
2499 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
2500 }
2501 if (!copy_back)
2502 copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int));
2503 copy_back[i] = j + 1;
2504 }
2505 init_v[d >> 5] |= (1u << (d & 0x1f));
2506 // Create this tensor for other data parallel allocations.
2507 info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
2508 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
2509 for (j = 1; j < parallel_count; j++)
2510 if (!compiled_data->tensors.parameters[i + j * parameter_size])
2511 {
2512 if (j != device_id)
2513 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
2514 else
2515 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
2516 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
2517 }
2518 // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
2519 }
2520 if (id_map)
2521 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2522 // Now do the transfer.
2523 if (copy_back)
2524 {
2525 for (i = 0; i < parameter_size; i++)
2526 {
2527 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[i]) & ~(uintptr_t)1))
;
2528 if (copy_back[i] == 0)
2529 continue;
2530 const int j = copy_back[i] - 1;
2531 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2532 }
2533 ccfreefree(copy_back);
2534 }
2535}
2536
2537ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2538{
2539 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2540 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2540, __extension__ __PRETTY_FUNCTION__); }))
;
2541 const int parameter_size = compiled_data->parameters->rnum;
2542 int i;
2543 for (i = 0; i < parameter_size; i++)
2544 {
2545 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2546 if (first(model, name, context))
2547 return ccv_cnnp_model_parameters(model, -1, i);
2548 }
2549 return 0;
2550}
2551
2552ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2553{
2554 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2555 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2555, __extension__ __PRETTY_FUNCTION__); }))
;
2556 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2557 const int parameter_size = compiled_data->parameters->rnum;
2558 int i;
2559 for (i = 0; i < parameter_size; i++)
2560 {
2561 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2562 if (filter(model, name, context))
2563 {
2564 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2565 ccv_array_push(parameters, &parameter);
2566 }
2567 }
2568 return parameters;
2569
2570}
2571
2572CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2573{
2574 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2575 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2575, __extension__ __PRETTY_FUNCTION__); }))
;
2576 const int tensors_init = !!compiled_data->tensors_init.v;
2577 if (!tensors_init) // If nothing initialized, we return parameter 0.
2578 return ccv_cnnp_model_parameters(model, -1, 0);
2579 const int parameter_size = compiled_data->parameters->rnum;
2580 int i;
2581 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2582 for (i = 0; i < parameter_size; i++)
2583 {
2584 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2585 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2586 return ccv_cnnp_model_parameters(model, -1, i);
2587 }
2588 return 0;
2589}
2590
2591static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2592{
2593 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2594 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2594, __extension__
__PRETTY_FUNCTION__); }))
;
2595 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2596 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2597 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2598 return to_parameter_indices;
2599}
2600
2601static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2602{
2603 // If the model is not compiled yet. Compile them now.
2604 if (!model->graph)
2605 {
2606 model->graph = ccv_nnc_symbolic_graph_new();
2607 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2607, __extension__ __PRETTY_FUNCTION__
); }))
;
2608 const int input_size = from_model->input_size;
2609 ccv_nnc_tensor_param_t input_params[input_size];
2610 int i;
2611 for (i = 0; i < input_size; i++)
2612 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2613 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2614 model->parallel_count = from_model->parallel_count;
2615 model->memory_compression = from_model->memory_compression;
2616 model->memory_reduction = from_model->memory_reduction;
2617 model->gradient_checkpointing = from_model->gradient_checkpointing;
2618 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2619 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2620 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2621 }
2622 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2623 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2623, __extension__ __PRETTY_FUNCTION__
); }))
;
2624 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2625 if (!to_tensors_init)
2626 {
2627 if (only_init_0)
2628 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2629 else
2630 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2631 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2632 // Check if it is not fully allocated, if it is not, init_1.
2633 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2634 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2634, __extension__ __PRETTY_FUNCTION__
); }))
;
2635 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2636 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2637 if (*from_param_ref < 0 && *param_ref >= 0)
2638 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2638, __extension__ __PRETTY_FUNCTION__
); }))
; }
2639 else if (*from_param_ref >= 0)
2640 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2640, __extension__ __PRETTY_FUNCTION__
); }))
; }
2641 if (*param_ref < 0 && *from_param_ref >= 0)
2642 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2642, __extension__ __PRETTY_FUNCTION__); }))
; }
2643 else if (*param_ref >= 0)
2644 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2644, __extension__ __PRETTY_FUNCTION__
); }))
; }
2645}
2646
2647void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2648{
2649 ccv_array_t* to_parameter_indices;
2650 int to_param_ref;
2651 ccv_array_t* from_parameter_indices;
2652 int from_param_ref;
2653 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2654 // Should be exactly the same tensor.
2655 if (to_param_ref < 0 && from_param_ref < 0)
2656 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2656, __extension__ __PRETTY_FUNCTION__
); }))
; }
2657 // To models.
2658 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2659 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2659, __extension__ __PRETTY_FUNCTION__
); }))
;
2660 // From models.
2661 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2662 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2663 const int to_parameter_size = to_compiled_data->parameters->rnum;
2664 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2665 int i, j;
2666 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2667 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2668 for (i = 0; i < rnum; i++)
2669 {
2670 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2671 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2671, __extension__ __PRETTY_FUNCTION__); }))
;
2672 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2672, __extension__ __PRETTY_FUNCTION__
); }))
;
2673 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2674 // If the original is not init'ed. We cannot copy from.
2675 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2676 continue;
2677 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2678 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2678, __extension__ __PRETTY_FUNCTION__); }))
;
2679 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2679, __extension__ __PRETTY_FUNCTION__
); }))
;
2680 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2681 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2681, __extension__
__PRETTY_FUNCTION__); }))
;
2682 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2683 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2683, __extension__
__PRETTY_FUNCTION__); }))
;
2684 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2685 for (j = 1; j < parallel_count; j++)
2686 {
2687 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2688 if (copy_tensor)
2689 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2690 }
2691 // Mark this symbol as init'ed.
2692 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2693 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2694 }
2695 ccv_array_free(to_parameter_indices);
2696 ccv_array_free(from_parameter_indices);
2697}
2698
2699void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2700{
2701 ccv_array_t* to_parameter_indices;
2702 int to_param_ref;
2703 ccv_array_t* from_parameter_indices;
2704 int from_param_ref;
2705 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2706 // Should be exactly the same tensor.
2707 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2708 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2708, __extension__ __PRETTY_FUNCTION__
); }))
; }
2709 // To models.
2710 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2711 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2711, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2712 // From models.
2713 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2714 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2715 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2715, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2716 const int from_parameter_size = from_compiled_data->parameters->rnum;
2717 const int to_parameter_size = to_compiled_data->parameters->rnum;
2718 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2719 int i, j;
2720 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2721 char* updated_name = 0;
2722 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2723 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2724 for (i = 0; i < rnum; i++)
2725 {
2726 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is >= 0
11
'?' condition is true
12
Assuming the condition is false
13
'?' condition is false
2727 // Need to figure out how to use the renamer here.
2728 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2729 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2729, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2730 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2730, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2731 if (renamer
18.1
'renamer' is non-null
)
2732 {
2733 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2734 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2735 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2736 updated_name = (char*)ccmallocmalloc(1024);
2737 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2738 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2739 memcpy(updated_name, src_name, src_name_len);
2740 updated_name[src_name_len] = 0;
2741 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2742 continue; // Skip this.
2743 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2744 {
2745 // Nothing changed.
2746 } else {
2747 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2748 {
2749 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
24
Calling 'kh_init_ccv_cnnp_parameter_id'
26
Returning from 'kh_init_ccv_cnnp_parameter_id'
2750 for (j = 0; j < from_parameter_size; j++)
27
Assuming 'j' is >= 'from_parameter_size'
28
Loop condition is false. Execution continues on line 2758
2751 {
2752 int ret;
2753 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
2754 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2754
, __extension__ __PRETTY_FUNCTION__); }))
;
2755 kh_val(id_map, k)((id_map)->vals[k]) = j;
2756 }
2757 }
2758 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2759 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
29
Assuming 'k' is not equal to field 'n_buckets'
30
Taking false branch
2760 continue;
2761 src_d = kh_val(id_map, k)((id_map)->vals[k]);
31
Array access (via field 'vals') results in a null pointer dereference
2762 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2762, __extension__ __PRETTY_FUNCTION__); }))
;
2763 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2763, __extension__
__PRETTY_FUNCTION__); }))
;
2764 }
2765 }
2766 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2766, __extension__ __PRETTY_FUNCTION__); }))
;
2767 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2767, __extension__
__PRETTY_FUNCTION__); }))
;
2768 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2769 // If the original is not init'ed. We cannot share from.
2770 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2771 continue;
2772 for (j = 0; j < parallel_count; j++)
2773 {
2774 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2775 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2775, __extension__
__PRETTY_FUNCTION__); }))
;
2776 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2777 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2778 ccv_nnc_tensor_free(dest);
2779 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2780 }
2781 // Mark this symbol as init'ed.
2782 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2783 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2784 }
2785 ccv_array_free(to_parameter_indices);
2786 ccv_array_free(from_parameter_indices);
2787 if (id_map)
2788 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2789 if (updated_name)
2790 ccfreefree(updated_name);
2791 // Mark it as incomplete so we will call init_1.
2792 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2793 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2794 else // Remove the flag.
2795 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2796}
2797
2798ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2799{
2800 if (!compiled_data->stream_map)
2801 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2802 int ret = 0;
2803 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2804 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2804, __extension__ __PRETTY_FUNCTION__); }))
;
2805 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2806 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2807 if (ret != 0)
2808 {
2809 stream = ccv_nnc_stream_context_new(type);
2810 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2811 }
2812 return stream;
2813}
2814
2815void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2816{
2817 ccv_array_t* to_parameter_indices;
2818 int to_param_ref;
2819 ccv_array_t* from_parameter_indices;
2820 int from_param_ref;
2821 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2822 // Should be exactly the same tensor.
2823 if (to_param_ref < 0 && from_param_ref < 0)
2824 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2824, __extension__ __PRETTY_FUNCTION__
); }))
; }
2825 // To models.
2826 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2827 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2827, __extension__ __PRETTY_FUNCTION__
); }))
;
2828 // From models.
2829 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2830 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2831 const int to_parameter_size = to_compiled_data->parameters->rnum;
2832 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2833 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2833, __extension__ __PRETTY_FUNCTION__
); }))
;
2834 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2834, __extension__ __PRETTY_FUNCTION__
); }))
;
2835 int i, j;
2836 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2837 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2838 for (i = 0; i < aux_in_size; i++)
2839 inputs[i + 2] = aux_ins[i];
2840 for (i = 0; i < aux_out_size; i++)
2841 outputs[i + 1] = aux_outs[i];
2842 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2843 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2844 for (i = 0; i < rnum; i++)
2845 {
2846 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2847 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2847, __extension__ __PRETTY_FUNCTION__); }))
;
2848 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2848, __extension__ __PRETTY_FUNCTION__
); }))
;
2849 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2850 // If the original is not init'ed. We cannot copy from.
2851 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2852 continue;
2853 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2854 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2854, __extension__ __PRETTY_FUNCTION__); }))
;
2855 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2855, __extension__ __PRETTY_FUNCTION__
); }))
;
2856 if (parallel_count > 1)
2857 {
2858 ccv_nnc_stream_context_t* streams[parallel_count];
2859 ccv_nnc_stream_signal_t* signal;
2860 if (stream_context)
2861 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2862 for (j = 0; j < parallel_count; j++)
2863 {
2864 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2865 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2866 if (!dest || !src)
2867 {
2868 streams[j] = 0;
2869 continue;
2870 }
2871 // At the moment, can only handle them on the same device.
2872 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2872, __extension__ __PRETTY_FUNCTION__
); }))
;
2873 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2873, __extension__ __PRETTY_FUNCTION__
); }))
;
2874 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2875 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2876 int type = stream_type;
2877 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2878 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2879 // Wait signal to finish.
2880 if (stream_context)
2881 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2882 inputs[0] = outputs[0] = dest;
2883 inputs[1] = src;
2884 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2885 if (stream_context)
2886 {
2887 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2888 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2889 }
2890 streams[j] = stream_0;
2891 }
2892 // If this should be blocking, blocking it.
2893 if (!stream_context)
2894 for (j = 0; j < parallel_count; j++)
2895 if (streams[j])
2896 ccv_nnc_stream_context_wait(streams[j]);
2897 } else {
2898 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2899 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2899, __extension__
__PRETTY_FUNCTION__); }))
;
2900 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2901 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2901, __extension__
__PRETTY_FUNCTION__); }))
;
2902 inputs[0] = outputs[0] = dest;
2903 inputs[1] = src;
2904 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2905 }
2906 // Mark this symbol as init'ed.
2907 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2908 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2909 }
2910 ccv_array_free(to_parameter_indices);
2911 ccv_array_free(from_parameter_indices);
2912}
2913
2914void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2915{
2916 int to_param_ref;
2917 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2918 // To models.
2919 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2920 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2920, __extension__ __PRETTY_FUNCTION__
); }))
;
2921 // Tensor has to be inited already.
2922 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2922, __extension__ __PRETTY_FUNCTION__
); }))
;
2923 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2923, __extension__ __PRETTY_FUNCTION__
); }))
;
2924 // From models.
2925 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2926 const int to_parameter_size = to_compiled_data->parameters->rnum;
2927 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2928 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2928, __extension__ __PRETTY_FUNCTION__
); }))
;
2929 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2929, __extension__ __PRETTY_FUNCTION__
); }))
;
2930 int i, j;
2931 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2932 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2933 for (i = 0; i < aux_in_size; i++)
2934 inputs[i + 1] = aux_ins[i];
2935 for (i = 0; i < aux_out_size; i++)
2936 outputs[i + 1] = aux_outs[i];
2937 for (i = 0; i < rnum; i++)
2938 {
2939 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2940 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2940, __extension__ __PRETTY_FUNCTION__); }))
;
2941 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2941, __extension__ __PRETTY_FUNCTION__
); }))
;
2942 if (parallel_count > 1)
2943 {
2944 ccv_nnc_stream_context_t* streams[parallel_count];
2945 ccv_nnc_stream_signal_t* signal;
2946 if (stream_context)
2947 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2948 for (j = 0; j < parallel_count; j++)
2949 {
2950 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2951 if (!dest)
2952 {
2953 streams[j] = 0;
2954 continue;
2955 }
2956 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2957 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2958 int type = stream_type;
2959 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2960 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2961 // Wait signal to finish.
2962 if (stream_context)
2963 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2964 inputs[0] = outputs[0] = dest;
2965 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2966 if (stream_context)
2967 {
2968 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2969 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2970 }
2971 streams[j] = stream_0;
2972 }
2973 // If this should be blocking, blocking it.
2974 if (!stream_context)
2975 for (j = 0; j < parallel_count; j++)
2976 if (streams[j])
2977 ccv_nnc_stream_context_wait(streams[j]);
2978 } else {
2979 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2980 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2980, __extension__
__PRETTY_FUNCTION__); }))
;
2981 inputs[0] = outputs[0] = dest;
2982 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2983 }
2984 // No need to mark this symbol as init'ed, it is already.
2985 }
2986 ccv_array_free(to_parameter_indices);
2987}
2988
2989void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2990{
2991 int to_param_ref;
2992 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2993 // To models.
2994 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2995 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2995, __extension__ __PRETTY_FUNCTION__
); }))
;
2996 // Tensor has to be inited already.
2997 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2997, __extension__ __PRETTY_FUNCTION__
); }))
;
2998 ccv_nnc_tensor_t** tensor_gradients;
2999 if (to_compiled_data->backward.count > 1)
3000 tensor_gradients = to_compiled_data->tensors.accum_gradients;
3001 else
3002 tensor_gradients = to_compiled_data->tensors.gradients;
3003 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 3003, __extension__ __PRETTY_FUNCTION__
); }))
;
3004 // From models.
3005 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3006 const int to_parameter_size = to_compiled_data->parameters->rnum;
3007 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3008 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 3008, __extension__ __PRETTY_FUNCTION__
); }))
;
3009 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 3009, __extension__ __PRETTY_FUNCTION__
); }))
;
3010 int i, j;
3011 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
3012 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
3013 for (i = 0; i < aux_in_size; i++)
3014 inputs[i + 1] = aux_ins[i];
3015 for (i = 0; i < aux_out_size; i++)
3016 outputs[i + 1] = aux_outs[i];
3017 for (i = 0; i < rnum; i++)
3018 {
3019 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3020 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3020, __extension__ __PRETTY_FUNCTION__); }))
;
3021 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3021, __extension__ __PRETTY_FUNCTION__
); }))
;
3022 if (parallel_count > 1)
3023 {
3024 ccv_nnc_stream_context_t* streams[parallel_count];
3025 ccv_nnc_stream_signal_t* signal;
3026 if (stream_context)
3027 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
3028 for (j = 0; j < parallel_count; j++)
3029 {
3030 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
3031 if (!dest)
3032 {
3033 streams[j] = 0;
3034 continue;
3035 }
3036 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
3037 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
3038 int type = stream_type;
3039 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
3040 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
3041 // Wait signal to finish.
3042 if (stream_context)
3043 ccv_nnc_stream_context_wait_signal(stream_0, signal);
3044 inputs[0] = outputs[0] = dest;
3045 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
3046 if (stream_context)
3047 {
3048 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
3049 ccv_nnc_stream_context_wait_signal(stream_context, signal);
3050 }
3051 streams[j] = stream_0;
3052 }
3053 // If this should be blocking, blocking it.
3054 if (!stream_context)
3055 for (j = 0; j < parallel_count; j++)
3056 if (streams[j])
3057 ccv_nnc_stream_context_wait(streams[j]);
3058 } else {
3059 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
3060 if (!dest)
3061 continue;
3062 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 3062, __extension__
__PRETTY_FUNCTION__); }))
;
3063 inputs[0] = outputs[0] = dest;
3064 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
3065 }
3066 // No need to mark this symbol as init'ed, it is already.
3067 }
3068 ccv_array_free(to_parameter_indices);
3069}
3070
3071void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
3072{
3073 // Only CUDA backend has this feature.
3074#ifdef HAVE_CUDA1
3075 int to_param_ref;
3076 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
3077 // To models.
3078 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3079 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3079, __extension__ __PRETTY_FUNCTION__); }))
;
3080 // Tensor has to be inited already.
3081 assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0)
, __extension__ ({ if (!!compiled_data->tensors_init.v) ; else
__assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c"
, 3081, __extension__ __PRETTY_FUNCTION__); }))
;
3082 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 3082, __extension__ __PRETTY_FUNCTION__
); }))
;
3083 // From models.
3084 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3085 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
3086 int i;
3087 for (i = 0; i < rnum; i++)
3088 {
3089 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
3090 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 3090, __extension__ __PRETTY_FUNCTION__); }))
;
3091 assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 3091, __extension__ __PRETTY_FUNCTION__
); }))
;
3092 if (parallel_count > 1)
3093 {
3094 assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect."
) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect."
) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\""
, "ccv_cnnp_model.c", 3094, __extension__ __PRETTY_FUNCTION__
); }))
;
3095 } else {
3096 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[dest_d]) & ~(uintptr_t)1))
;
3097 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 3097, __extension__
__PRETTY_FUNCTION__); }))
;
3098 ccv_nnc_tensor_param_t params = src->info;
3099 if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY)
3100 continue;
3101 const size_t size = ccv_nnc_tensor_data_size(params);
3102 if (size <= 0)
3103 continue;
3104 const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1);
3105 const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0);
3106 ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t));
3107 tensor->dataof = 0;
3108 tensor->alias_ref = 0;
3109 tensor->sig = 0;
3110 tensor->refcount = 1;
3111 tensor->info = params;
3112 if (tfb)
3113 {
3114 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2];
3115 // This corresponding to mat->step
3116 tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype
) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12
] * (((((params.datatype) & 0xFF000) | params.dim[2])) &
0xFFF) + 3) & -4)
;
3117 } else // This won't be recognized by ccv_dense_matrix_t
3118 tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000);
3119 // Remove this flag so it can be deallocated as usual.
3120 tensor->type &= ~CCV_NO_DATA_ALLOC;
3121 assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY
) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00
) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY"
, "ccv_cnnp_model.c", 3121, __extension__ __PRETTY_FUNCTION__
); }))
;
3122 void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size);
3123 if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
3124 {
3125 tensor->data.u8 = (uint8_t*)ptr;
3126 tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
3127 } else {
3128 // Allocation failed.
3129 ccfreefree(tensor);
3130 continue;
3131 }
3132 // TODO: Cannot run this on the stream context yet, due to allocation and deallocations.
3133 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0);
3134 cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size);
3135 compiled_data->tensors.parameters[dest_d] = tensor;
3136 // Can free out the old one.
3137 if (should_free)
3138 ccv_nnc_tensor_free(src);
3139 }
3140 // No need to mark this symbol as init'ed, it is already.
3141 }
3142 ccv_array_free(to_parameter_indices);
3143#endif
3144}
3145
3146ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
3147{
3148 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3149 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3149, __extension__ __PRETTY_FUNCTION__); }))
;
3150 return compiled_data->minimize.minimizer;
3151}
3152
3153void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
3154{
3155 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3156 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3156, __extension__ __PRETTY_FUNCTION__); }))
;
3157 const int parameter_size = compiled_data->parameters->rnum;
3158 if (parameter_size == 0)
3159 return;
3160 if (reset)
3161 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 3161, __extension__ __PRETTY_FUNCTION__
); }))
; }
3162 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3163 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
3164 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
3165 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
3166 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
3167 // We update all parameters, at this point, we have one minimizer.
3168 if (set_parameters == 0 || set_parameter_size == 0)
3169 compiled_data->minimize.minimizer = minimizer;
3170 int i;
3171 if (set_parameters && set_parameter_size)
3172 {
3173 // I need to save what's the minimizer along with this.
3174 if (!compiled_data->minimize.parameters)
3175 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
3176 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
3177 set_minimizer_for_parameter->minimizer = minimizer;
3178 set_minimizer_for_parameter->parameter_size = set_parameter_size;
3179 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
3180 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
3181 }
3182 // If reset is true, clear the parameters array.
3183 if (reset && compiled_data->minimize.parameters)
3184 {
3185 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3186 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3187 ccv_array_clear(compiled_data->minimize.parameters);
3188 }
3189 if (!compiled_data->update_nodes)
3190 return;
3191 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
3192 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 3192, __extension__ __PRETTY_FUNCTION__); }))
;
3193 if (saved_aux_size > old_max_saved_aux_size)
3194 {
3195 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 3195, __extension__ __PRETTY_FUNCTION__
); }))
;
3196 // Reallocate first, move them around later.
3197 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
3198 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
3199 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
3200 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
3201 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
3202 }
3203 int flag = 0;
3204 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3205 if (set_parameters && set_parameter_size)
3206 {
3207 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
3208 for (i = 0; i < set_parameter_size; i++)
3209 {
3210 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
3211 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 3211, __extension__ __PRETTY_FUNCTION__
); }))
;
3212 const int old_rnum = parameter_indices->rnum;
3213 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
3214 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
3215 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 3215, __extension__ __PRETTY_FUNCTION__
); }))
;
3216 if (param_ref >= 0)
3217 {
3218 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 3218, __extension__ __PRETTY_FUNCTION__
); }))
;
3219 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
3220 parameter_indices->rnum = old_rnum + 1;
3221 }
3222 }
3223 // We may have duplicated indices, but that is OK, we will set it twice.
3224 for (i = 0; i < parameter_indices->rnum; i++)
3225 {
3226 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
3227 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
3228 flag = 1;
3229 }
3230 ccv_array_free(parameter_indices);
3231 } else {
3232 for (i = 0; i < parameter_size; i++)
3233 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
3234 flag = 1;
3235 if (compiled_data->minimize.parameters)
3236 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
3237 flag = 1;
3238 }
3239 if (flag)
3240 {
3241 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
3242 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
3243 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3244 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3245 }
3246}
3247
3248void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
3249{
3250 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3251 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 3251, __extension__ __PRETTY_FUNCTION__); }))
;
3252 compiled_data->compile_params = compile_params;
3253}
3254
3255void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
3256{
3257 if (model->graph && out_size > 0)
3258 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
3259 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
3260 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
3261 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
3262 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
3263 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
3264 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
3265}
3266
3267void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
3268{
3269 if (model->graph)
3270 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
3271}
3272
3273static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
3274{
3275 int i;
3276 const int parameter_size = compiled_data->parameters->rnum;
3277 ccv_array_free(compiled_data->parameters);
3278 if (compiled_data->parameter_flags)
3279 ccfreefree(compiled_data->parameter_flags);
3280 const int internal_size = compiled_data->internals->rnum;
3281 ccv_array_free(compiled_data->internals);
3282 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 3282, __extension__ __PRETTY_FUNCTION__
); }))
;
3283 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 3283, __extension__ __PRETTY_FUNCTION__
); }))
;
3284 for (i = 0; i < parameter_size; i++)
3285 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
3286 ccv_array_free(compiled_data->ids.parameters);
3287 for (i = 0; i < internal_size; i++)
3288 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
3289 ccv_array_free(compiled_data->ids.internals);
3290 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
3291 if (compiled_data->tensors.parameters)
3292 {
3293 for (i = 0; i < parameter_size * parallel_count; i++)
3294 // If it is not marked as not belonging, we can free it.
3295 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
3296 if (compiled_data->tensors.parameters[i])
3297 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
3298 for (i = 0; i < internal_size * parallel_count; i++)
3299 if (compiled_data->tensors.internals[i])
3300 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
3301 ccfreefree(compiled_data->tensors.parameters);
3302 }
3303 if (compiled_data->tensors.gradients)
3304 {
3305 for (i = 0; i < parameter_size * parallel_count; i++)
3306 {
3307 if (compiled_data->tensors.gradients[i])
3308 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
3309 if (compiled_data->tensors.accum_gradients[i])
3310 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
3311 }
3312 ccfreefree(compiled_data->tensors.gradients);
3313 }
3314 if (compiled_data->minimize.parameters)
3315 {
3316 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
3317 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
3318 ccv_array_free(compiled_data->minimize.parameters);
3319 }
3320 if (compiled_data->rewindables)
3321 ccv_array_free(compiled_data->rewindables);
3322 if (compiled_data->tensors_init.v)
3323 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
3324 if (compiled_data->evaluate.tos)
3325 ccfreefree(compiled_data->evaluate.tos);
3326 compiled_data->evaluate.tos = 0;
3327 if (compiled_data->stream_map)
3328 {
3329 khiter_t k;
3330 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
3331 {
3332 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3333 continue;
3334 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3335 ccv_nnc_stream_context_free(stream);
3336 }
3337 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3338 }
3339 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3340 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3341 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3342 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3343 if (compiled_data->gradient_checkpoints)
3344 {
3345 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3346 {
3347 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3348 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3348, __extension__ __PRETTY_FUNCTION__
); }))
;
3349 ccfreefree(checkpoint->inputs);
3350 ccv_array_free(checkpoint->tensor_symbols);
3351 }
3352 ccv_array_free(compiled_data->gradient_checkpoints);
3353 }
3354 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3355 ccfreefree(compiled_data);
3356}
3357
3358void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3359{
3360 ccv_cnnp_model_deinit(model);
3361 if (model->isa->dealloc)
3362 model->isa->dealloc(model);
3363 if (model->io)
3364 {
3365 int i;
3366 for (i = 0; i < model->io->rnum; i++)
3367 {
3368 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3369 if (model_io->outgoings)
3370 ccv_array_free(model_io->outgoings);
3371 if (model_io->incomings)
3372 ccv_array_free(model_io->incomings);
3373 if (model_io->dependencies)
3374 ccv_array_free(model_io->dependencies);
3375 ccfreefree(model_io);
3376 }
3377 ccv_array_free(model->io);
3378 }
3379 if (model->parameter_indices)
3380 ccv_array_free(model->parameter_indices);
3381 if (model->inputs)
3382 ccfreefree(model->inputs);
3383 if (model->graph)
3384 ccv_nnc_symbolic_graph_free(model->graph);
3385 if (model->compiled_data)
3386 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3387 if (model->name)
3388 ccfreefree(model->name);
3389 ccfreefree(model);
3390}
3391
3392void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3393{
3394 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3395 if (!compiled_data)
3396 return;
3397 if (compiled_data->graph)
3398 ccv_nnc_graph_cancel(compiled_data->graph);
3399 if (compiled_data->apply_gradients.graph)
3400 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3401}
3402
3403void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags)
3404{
3405 model->exec_flags = flags;
3406}
3407
3408int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model)
3409{
3410 return model->exec_flags;
3411}