Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2436, column 1
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-11-07-225153-139491-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7
8// MARK - Level-5 API
9
10ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
11{
12 if (!model->io)
13 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
14 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
15 model_io->param_ref = 0;
16 model_io->param_sel = 0;
17 model_io->visit = 0;
18 model_io->model = model;
19 model_io->dependencies = 0;
20 model_io->dependents = 0;
21 model_io->outgoings = 0;
22 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
23 ccv_array_push(model->io, &model_io);
24 if (input_size > 0)
25 {
26 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
27 ccv_array_resize(model_io->incomings, input_size);
28 int i;
29 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
30 for (i = 0; i < input_size; i++)
31 {
32 if (!inputs[i]->outgoings)
33 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
34 ccv_array_push(inputs[i]->outgoings, &model_io);
35 }
36 } else {
37 model_io->incomings = 0;
38 }
39 return model_io;
40}
41
42void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
43{
44 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 44, __extension__ __PRETTY_FUNCTION__);
}))
;
45 if (!model_io->dependencies)
46 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
47 int i, j;
48 for (i = 0; i < dependency_size; i++)
49 {
50 int flag = 0;
51 // Check if it is already exist or not.
52 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
53 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
54 flag = 1;
55 if (flag)
56 continue;
57 ccv_array_push(model_io->dependencies, dependencies + i);
58 ++dependencies[i]->dependents;
59 }
60}
61
62int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
63{
64 return model->output_size;
65}
66
67int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
68{
69 // If the model is compiled, it is default to 1 unless it is not.
70 if (model->compiled_data)
71 return model->is_trainable >= 0 ? model->is_trainable : 1;
72 return model->is_trainable;
73}
74
75ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
76{
77 if (!model->io)
78 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
79 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
80 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
81 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
82 model_io->visit = 0;
83 model_io->model = model;
84 model_io->outputs = 0;
85 model_io->dependencies = 0;
86 model_io->dependents = 0;
87 model_io->incomings = 0;
88 model_io->outgoings = 0;
89 ccv_array_push(model->io, &model_io);
90 return model_io;
91}
92
93void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
94{
95 model->notify_hook.func = func;
96 model->notify_hook.context = context;
97}
98
99void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
100{
101 if (model->notify_hook.func)
102 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
103 if (model->isa->notify)
104 model->isa->notify(model, tag, payload);
105}
106
107static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
108{
109 int i, j;
110 for (i = 0; i < graph_exec_symbol_size; i++)
111 {
112 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
113 // Check whether this tensor symbol has any duplicate.
114 for (j = i + 1; j < graph_exec_symbol_size;)
115 {
116 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
117 // If there is a same tensor symbol, remove it.
118 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
119 {
120 if (j + 1 < graph_exec_symbol_size)
121 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
122 --graph_exec_symbol_size;
123 continue;
124 }
125 ++j;
126 }
127 }
128 return graph_exec_symbol_size;
129}
130
131void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
132{
133 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
134 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
135 int i;
136 if (!model->parameter_indices)
137 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
138 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
139 {
140 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
141 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
142 {
143 // Only add to parameter_indices if it is trainable.
144 if (add_to_array_context->prefix == 't')
145 ccv_array_add_unique_int(model->parameter_indices, i);
146 // Found it, return, don't add it.
147 return;
148 }
149 }
150 // Only add to parameter_indices if it is trainable.
151 if (add_to_array_context->prefix == 't')
152 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
153 // This is a new one, no need to add_unique_int, it is unique.
154 ccv_array_push(add_to_array_context->symbols, &symbol);
155 if (add_to_array_context->trainables)
156 ccv_array_push(add_to_array_context->trainables, &is_trainable);
157 char id[2048];
158 id[0] = add_to_array_context->prefix;
159 id[1] = '-';
160 int total_len = 2;
161 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
162 {
163 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
164 int len;
165 if (name->name && name->name[0] != '\0')
166 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
167 else
168 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
169 total_len += len;
170 if (total_len >= 2047)
171 break;
172 }
173 if (total_len < 2047)
174 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
175 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 175, __extension__ __PRETTY_FUNCTION__)
; }))
;
176 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
177 memcpy(heap_id, id, total_len + 1);
178 ccv_array_push(add_to_array_context->ids, &heap_id);
179 ++add_to_array_context->sequence->it;
180}
181
182static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
183{
184 compiled_data->f = compiled_data->fits + output_size;
185 compiled_data->xpu_alloc.mp_hdr = -1;
186 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
187 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
188 compiled_data->gradient_checkpoints = gradient_checkpoints;
189}
190
191static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
192{
193 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 193, __extension__ __PRETTY_FUNCTION__); }))
;
194 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
195 int i;
196 for (i = 0; i < input_size; i++)
197 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
198 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
199 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
200 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
201 ccv_cnnp_model_sequence_t model_sequence = {
202 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
203 };
204 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
205 .sequence = &model_sequence,
206 .prefix = 't',
207 .symbols = parameters,
208 .ids = parameter_ids,
209 .trainables = parameter_trainables,
210 };
211 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
212 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
213 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
214 .sequence = &model_sequence,
215 .prefix = 'r',
216 .symbols = internals,
217 .ids = internal_ids,
218 .trainables = 0,
219 };
220 ccv_cnnp_model_build_data_t build_data = {
221 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
222 .model_sequence = &model_sequence,
223 .add_to_array = ccv_cnnp_model_add_to_array,
224 .parameters = parameters,
225 .context = {
226 .add_to_parameter = &add_to_parameter_context,
227 .add_to_output = &add_to_output_context,
228 },
229 .gradient_checkpoints = 0,
230 };
231 model->data = &build_data;
232 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
233 for (i = 0; i < model->output_size; i++)
234 {
235 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
236 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
237 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
238 continue;
239 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
240 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
241 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
242 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
243 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
244 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
245 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
246 }
247 model->data = 0;
248 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
249 if (model_sequence.sequences)
250 ccv_array_free(model_sequence.sequences);
251 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
252 int not_trainables = 0;
253 // Assert no parameter is alias.
254 for (i = 0; i < parameters->rnum; i++)
255 {
256 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
257 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
258 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 258, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
259 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
260 not_trainables = 1;
261 }
262 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 262, __extension__ __PRETTY_FUNCTION__)
; }))
;
263 uint64_t* parameter_flags = 0;
264 if (not_trainables)
265 {
266 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
267 for (i = 0; i < parameter_trainables->rnum; i++)
268 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
269 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
270 }
271 ccv_array_free(parameter_trainables);
272 // Assert no internal is alias.
273 for (i = 0; i < internals->rnum; i++)
274 {
275 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
276 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
277 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 277, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
278 }
279 const int output_size = model->output_size;
280 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
281 const int parameters_rnum = parameters->rnum;
282 if (input_size > 0)
283 {
284 ccv_array_resize(parameters, parameters_rnum + input_size);
285 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
286 }
287 ccv_nnc_symbolic_graph_simplify(model->graph,
288 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
289 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
290 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
291 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
292 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
293 model->outputs, output_size,
294 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
295 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
296 // Size it down.
297 parameters->rnum = parameters_rnum;
298 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
299 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
300 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
301 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 301, __extension__ __PRETTY_FUNCTION__)
; }))
;
302 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
303 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
304 compiled_data->loss = loss;
305 if (loss.cmd == CCV_NNC_NOOP)
306 {
307 // If no loss function provided, there is no fits.
308 for (i = 0; i < output_size; i++)
309 {
310 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
311 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
312 if (alias_to.d < 0)
313 compiled_data->f[i] = model->outputs[i];
314 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
315 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
316 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
317 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
318 int j;
319 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
320 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 320, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
321 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
322 }
323 }
324 } else {
325 for (i = 0; i < output_size; i++)
326 {
327 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
328 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
329 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
330 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
331 }
332 }
333 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
334 ccv_nnc_symbolic_graph_simplify(model->graph,
335 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
336 0, 0, // No need to provide binds at this point.
337 compiled_data->f, model->output_size,
338 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
339 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
340 // If inputs are from GPU, stream type is GPU.
341 compiled_data->parameters = parameters;
342 compiled_data->parameter_flags = parameter_flags;
343 compiled_data->internals = internals;
344 compiled_data->ids.parameters = parameter_ids;
345 compiled_data->ids.internals = internal_ids;
346 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
347}
348
349static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
350{
351 ccv_array_t* const stack = (ccv_array_t*)context;
352 ccv_array_push(stack, &symbol.d);
353}
354
355static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
356{
357 const ccv_nnc_tensor_symbol_t src_symbol = {
358 .d = src_index,
359 .graph = src_graph
360 };
361 const ccv_nnc_tensor_symbol_t dest_symbol = {
362 .d = dest_index,
363 .graph = dest_graph
364 };
365 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
366 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
367 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
368 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
369 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
370 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
371}
372
373static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
374{
375 const ccv_nnc_tensor_symbol_t src_symbol = {
376 .d = src_index,
377 .graph = src_graph
378 };
379 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
380 const ccv_nnc_tensor_symbol_t dest_symbol = {
381 .d = dest_index,
382 .graph = dest_graph
383 };
384 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
385 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
386}
387
388static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
389static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
390
391typedef struct {
392 int parallel_count;
393 ccv_nnc_symbolic_graph_t* graph;
394 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
395} ccv_nnc_graph_exec_update_t;
396
397static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
398{
399 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
400 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
401 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
402 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
403 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
404 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
405 const int parallel_count = graph_exec_update->parallel_count;
406 int i;
407 for (i = 1; i < parallel_count; i++)
408 {
409 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
410 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
411 {
412 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
413 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
414 }
415 }
416}
417
418void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
419{
420 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 420, __extension__ __PRETTY_FUNCTION__); }))
;
421 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 421, __extension__ __PRETTY_FUNCTION__)
; }))
;
422 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 422, __extension__ __PRETTY_FUNCTION__); }))
;
423 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
424 init->graph = ccv_nnc_symbolic_graph_new();
425 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
426 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
427 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
428 init->parallel_count = model->parallel_count;
429 init->memory_compression = model->memory_compression;
430 init->memory_reduction = model->memory_reduction;
431 init->gradient_checkpointing = model->gradient_checkpointing;
432 init->compiled_data->stream_type = model->compiled_data->stream_type;
433 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
434 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
435 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
436 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
437 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
438 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
439 int i, j;
440 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
441 for (i = 0; i < compiled_data->parameters->rnum; i++)
442 {
443 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
444 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 444, __extension__ __PRETTY_FUNCTION__)
; }))
;
445 }
446 for (i = 0; i < compiled_data->internals->rnum; i++)
447 {
448 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
449 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 449, __extension__ __PRETTY_FUNCTION__)
; }))
;
450 }
451 // Update inputs.
452 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 452, __extension__ __PRETTY_FUNCTION__)
; }))
;
453 for (i = 0; i < model->input_size; i++)
454 if (model->inputs[i].d >= 0)
455 {
456 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 456, __extension__ __PRETTY_FUNCTION__)
; }))
;
457 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
458 }
459 // Update outputs.
460 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 460, __extension__ __PRETTY_FUNCTION__)
; }))
;
461 for (i = 0; i < model->output_size; i++)
462 {
463 if (model->outputs[i].d >= 0)
464 {
465 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 465, __extension__
__PRETTY_FUNCTION__); }))
;
466 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
467 }
468 if (model->outputs[i].d != model->compiled_data->f[i].d)
469 {
470 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 470, __extension__ __PRETTY_FUNCTION__)
; }))
;
471 if (model->compiled_data->f[i].d >= 0)
472 {
473 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 473, __extension__ __PRETTY_FUNCTION__)
; }))
;
474 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
475 }
476 }
477 }
478 // Go through the graph to set tensor on matching symbols
479 for (i = 0; i < stack->rnum; i++)
480 {
481 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
482 // If exceed range, skip.
483 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
484 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
485 continue;
486 const ccv_nnc_graph_exec_symbol_t src_symbol = {
487 .d = d,
488 .graph = init->graph
489 };
490 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
491 .d = d,
492 .graph = model->graph
493 };
494 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
495 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
496 // If the name doesn't match, skip.
497 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
498 continue;
499 // Now get all the inputs and outputs, if matches, set them.
500 const int* src_inputs;
501 int src_input_size;
502 const int* src_outputs;
503 int src_output_size;
504 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
505 const int* dest_inputs;
506 int dest_input_size;
507 const int* dest_outputs;
508 int dest_output_size;
509 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
510 // We may have unmatched input / output size because this is the minimizer and it has
511 // different saved_aux (for example, when we shrunk with CMD_NOOP).
512 if (src_input_size != dest_input_size)
513 continue;
514 if (src_output_size != dest_output_size)
515 continue;
516 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
517 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
518 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
519 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
520 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
521 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
522 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
523 // a new exec symbol.
524 for (j = 0; j < src_input_size; j++)
525 if (src_inputs[j] >= 0)
526 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
527 for (j = 0; j < src_output_size; j++)
528 if (src_outputs[j] >= 0)
529 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
530 }
531 ccv_array_free(stack);
532 // After this, we get all tensors in the model graph resolved through tensor_auto.
533 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
534 // Verify symbols we get matches.
535 const int parameter_size = compiled_data->parameters->rnum;
536 for (i = 0; i < parameter_size; i++)
537 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 537, __extension__ __PRETTY_FUNCTION__)
; }))
; }
538 const int internal_size = compiled_data->internals->rnum;
539 for (i = 0; i < internal_size; i++)
540 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 540, __extension__ __PRETTY_FUNCTION__)
; }))
; }
541 // Go through compiled data.
542 if (compiled_data->tensor_arena)
543 {
544 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
545 if (flag == 0 && compiled_data->graph_exec_arena)
546 {
547 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
548 // Since we will reinit, if we previously set is_test, we need to set it again.
549 if (compiled_data->is_test)
550 {
551 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
552 ccv_nnc_graph_exec_update_t update = {
553 .parallel_count = parallel_count,
554 .graph = model->graph,
555 .graph_exec_arena = compiled_data->graph_exec_arena,
556 };
557 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
558 }
559 } else
560 // Free-up tensor arena & graph exec arena.
561 _ccv_cnnp_compiled_data_graph_free(compiled_data);
562 }
563 // There are other compiled graphs, for accum and apply gradients.
564 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
565 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
566 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
567 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
568 // That is why we don't update these compiled graphs at all this point.
569 // Free the model, we've already "absorbed" it.
570 ccv_cnnp_model_free(init);
571}
572
573void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
574{
575 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 575, __extension__ __PRETTY_FUNCTION__)
; }))
;
576 if (model->input_size == 0)
577 model->input_size = input_size;
578 if (!model->graph) // The graph is not compiled yet.
579 {
580 model->graph = ccv_nnc_symbolic_graph_new();
581 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
582 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 582, __extension__ __PRETTY_FUNCTION__)
; }))
;
583 int i, flag = 0;
584 for (i = 0; !flag && i < input_size; i++)
585 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
586 // If inputs are from GPU, stream type is GPU.
587 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
588 model->compiled_data->minimize.minimizer = minimizer;
589 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
590 } else {
591 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
592 // And then absorb the "new model" to the old one.
593 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
594 ccv_cnnp_model_absorb(model, init, inputs, input_size);
595 // Reset minimizer.
596 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
597 }
598}
599
600ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
601{
602 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
603 new_model->is_trainable = is_trainable;
604 return new_model;
605}
606
607void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
608{
609 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 609, __extension__ __PRETTY_FUNCTION__); }))
;
610 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 610, __extension__ __PRETTY_FUNCTION__)
; }))
;
611 ccv_nnc_symbolic_graph_t* const graph = model->graph;
612 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
613 int i;
614 for (i = 0; i < output_size; i++)
615 {
616 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 616, __extension__ __PRETTY_FUNCTION__)
; }))
;
617 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
618 }
619}
620
621void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
622{
623 if (workspace_size == model->workspace_size)
624 return;
625 model->workspace_size = workspace_size;
626 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
627 if (compiled_data && compiled_data->graph)
628 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
629}
630
631size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
632{
633 return model->workspace_size;
634}
635
636void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
637{
638 if (parallel == 0)
639 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
640 else
641 model->parallel_count = parallel;
642 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
643 if (compiled_data)
644 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 644, __extension__ __PRETTY_FUNCTION__)
; }))
; }
645}
646
647void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
648{
649 model->max_stream_count = max_stream_count;
650 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
651 if (compiled_data)
652 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 652, __extension__ __PRETTY_FUNCTION__)
; }))
; }
653}
654
655void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
656{
657 model->memory_compression = memory_compression;
658 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
659 if (compiled_data)
660 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 660, __extension__ __PRETTY_FUNCTION__)
; }))
; }
661}
662
663void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
664{
665 model->memory_reduction = memory_reduction;
666 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
667 if (compiled_data)
668 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 668, __extension__ __PRETTY_FUNCTION__)
; }))
; }
669}
670
671void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
672{
673 model->gradient_checkpointing = gradient_checkpointing;
674}
675
676int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
677{
678 return model->gradient_checkpointing;
679}
680
681typedef struct {
682 int parallel_count;
683 ccv_nnc_symbolic_graph_t* graph;
684 ccv_cnnp_compiled_data_t* compiled_data;
685 ccv_nnc_tensor_arena_t* tensor_arena;
686} ccv_nnc_tensor_init_states_t;
687
688static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
689{
690 int i;
691 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
692 for (i = 0; i < compiled_data->parameters->rnum; i++)
693 {
694 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
695 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
696 return 1;
697 }
698 for (i = 0; i < compiled_data->internals->rnum; i++)
699 {
700 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
701 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
702 return 1;
703 }
704 return 0;
705}
706
707static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
708{
709 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
710 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
711 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
712 if (!output_tensor)
713 return;
714 const int d = output_symbol.d;
715 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 715, __extension__ __PRETTY_FUNCTION__)
; }))
;
716 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
717 if (init_v[d >> 5] & (1u << (d & 0x1f)))
718 return;
719 init_v[d >> 5] |= (1u << (d & 0x1f));
720 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
721 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
722 const int parallel_count = tensor_init_states->parallel_count;
723 int i;
724 for (i = 1; i < parallel_count; i++)
725 {
726 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
727 if (copy)
728 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
729 }
730}
731
732// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
733// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
734static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
735{
736 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 736, __extension__ __PRETTY_FUNCTION__); }))
;
737 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 737, __extension__ __PRETTY_FUNCTION__)
; }))
;
738 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
739 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 739, __extension__
__PRETTY_FUNCTION__); }))
;
740 int i;
741 for (i = 0; i < compiled_data->rewindables->rnum; i++)
742 {
743 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
744 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
745 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
746 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
747 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
748 }
749 ccv_array_clear(compiled_data->rewindables);
750 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
751}
752
753static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
754{
755 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
756 .type = CCV_CNNP_REWIND_TENSOR,
757 .tensor = symbol
758 };
759 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
760 ccv_array_push(rewind_symbols, &rewind_symbol);
761}
762
763static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
764{
765 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
766 .type = CCV_CNNP_REWIND_TENSOR,
767 .tensor = symbol
768 };
769 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
770 ccv_array_push(rewind_symbols, &rewind_symbol);
771}
772
773static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
774{
775 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
776 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
777 .graph_exec = symbol
778 };
779 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
780 ccv_array_push(rewind_symbols, &rewind_symbol);
781}
782
783static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
784{
785 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
786 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
787 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
788 int i;
789 for (i = 1; i < parallel_count; i++)
790 {
791 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
792 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
793 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
794 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
795 }
796}
797
798static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
799{
800 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 800, __extension__ __PRETTY_FUNCTION__); }))
;
801 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 801, __extension__ __PRETTY_FUNCTION__); }))
;
802 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
803 int i;
804 for (i = 1; i < parallel_count; i++)
805 {
806 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
807 if (copy_symbol.graph)
808 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
809 }
810 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
811 if (graph_exec_arena)
812 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
813 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
814 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
815 if (gradient_graph_exec_arena)
816 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
817}
818
819static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
820{
821 int this_parameter_flag = 0;
822 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
823 return this_parameter_flag;
824 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
825 int j, k;
826 // For no-op, we can preserve previous saved_aux_size.
827 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
828 {
829 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
830 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
831 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
832 // make sure some model parameters don't update if we don't want them to.
833 int old_saved_aux_size;
834 if (old_minimizer.cmd == CCV_NNC_NOOP)
835 {
836 int input_size;
837 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
838 if (input_size < 2) // This is not legit.
839 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
840 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
841 old_saved_aux_size = input_size - 2;
842 } else
843 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
844 if (old_saved_aux_size != saved_aux_size)
845 {
846 this_parameter_flag = 1;
847 if (saved_aux_size > old_saved_aux_size)
848 {
849 // Allocate new tensor symbols.
850 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
851 for (j = old_saved_aux_size; j < saved_aux_size; j++)
852 {
853 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
854 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
855 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
856 for (k = 1; k < parallel_count; k++)
857 {
858 ccv_nnc_tensor_param_t dev_info = info;
859 if (k != device_id)
860 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
861 else
862 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
863 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
864 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
865 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
866 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
867 }
868 }
869 } else {
870 for (j = saved_aux_size; j < old_saved_aux_size; j++)
871 {
872 for (k = 1; k < parallel_count; k++)
873 {
874 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
875 if (src_copy.d >= 0)
876 {
877 ccv_nnc_tensor_symbol_free(graph, src_copy);
878 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
879 }
880 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
881 if (dest_copy.d >= 0)
882 {
883 ccv_nnc_tensor_symbol_free(graph, dest_copy);
884 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
885 }
886 }
887 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
888 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
889 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
890 }
891 }
892 }
893 }
894 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
895 if (this_parameter_flag)
896 {
897 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
898 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
899 const int* inputs = 0;
900 int input_size = 0;
901 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
902 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 902, __extension__ __PRETTY_FUNCTION__)
; }))
;
903 update_inputs[0].d = inputs[0];
904 update_inputs[0].graph = graph;
905 update_inputs[1].d = inputs[1];
906 update_inputs[1].graph = graph;
907 update_outputs[0] = updated_parameters[parameter_indice];
908 for (j = 0; j < saved_aux_size; j++)
909 {
910 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
911 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
912 }
913 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
914 for (k = 1; k < parallel_count; k++)
915 {
916 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
917 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 917, __extension__ __PRETTY_FUNCTION__); }))
;
918 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
919 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 919, __extension__ __PRETTY_FUNCTION__)
; }))
;
920 update_inputs[0].d = inputs[0];
921 update_inputs[0].graph = graph;
922 update_inputs[1].d = inputs[1];
923 update_inputs[1].graph = graph;
924 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
925 for (j = 0; j < saved_aux_size; j++)
926 {
927 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
928 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
929 }
930 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
931 }
932 }
933 return this_parameter_flag;
934}
935
936typedef struct {
937 int parameter_size;
938 ccv_nnc_cmd_t minimizer;
939 ccv_cnnp_model_io_t parameters[1];
940} ccv_cnnp_set_minimizer_for_parameter_t;
941
942static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
943{
944 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
945 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 945, __extension__ __PRETTY_FUNCTION__); }))
;
946 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
947 // We update all parameters, at this point, we have one minimizer.
948 const int parameter_size = compiled_data->parameters->rnum;
949 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
950 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
951 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 951, __extension__ __PRETTY_FUNCTION__); }))
;
952 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
953 ccv_array_t* const parameters = compiled_data->minimize.parameters;
954 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
955 int i, j, flag = 0;
956 for (i = 0; i < parameters->rnum; i++)
957 {
958 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
959 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
960 {
961 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
962 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 962, __extension__ __PRETTY_FUNCTION__)
; }))
;
963 const int old_rnum = parameter_indices->rnum;
964 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
965 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
966 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 966, __extension__ __PRETTY_FUNCTION__)
; }))
;
967 if (param_ref >= 0)
968 {
969 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 969, __extension__ __PRETTY_FUNCTION__)
; }))
;
970 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
971 parameter_indices->rnum = old_rnum + 1;
972 }
973 }
974 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
975 // We may have duplicated indices, but that is OK, we will set it twice.
976 for (j = 0; j < parameter_indices->rnum; j++)
977 {
978 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
979 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 979, __extension__ __PRETTY_FUNCTION__)
; }))
;
980 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
981 flag = 1;
982 }
983 ccv_array_clear(parameter_indices);
984 }
985 ccv_array_free(parameter_indices);
986 return flag;
987}
988
989static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
990{
991 if (new_saved_aux_size == old_saved_aux_size)
992 return;
993 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 993, __extension__ __PRETTY_FUNCTION__)
; }))
;
994 int i, j;
995 for (i = parameter_size - 1; i >= 0; i--)
996 {
997 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
998 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
999 for (j = old_saved_aux_size - 1; j >= 0; j--)
1000 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1001 }
1002}
1003
1004static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1005{
1006 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1007 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1007, __extension__ __PRETTY_FUNCTION__); }))
;
1008 if (!compiled_data->rewindables)
1009 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1010 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1011 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1012 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1013}
1014
1015static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1016{
1017 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1018 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1018, __extension__ __PRETTY_FUNCTION__
); }))
;
1019 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1019, __extension__ __PRETTY_FUNCTION__
); }))
;
1020 const int evaluate_to_size = compiled_data->evaluate.to_size;
1021 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1021, __extension__ __PRETTY_FUNCTION__
); }))
;
1022 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1023 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1024 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1025 int i, j;
1026 const int output_size = model->output_size;
1027 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1027, __extension__ __PRETTY_FUNCTION__
); }))
;
1028 if (fits)
1029 for (i = 0; i < output_size; i++)
1030 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1031 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1032 const int parameter_size = compiled_data->parameters->rnum;
1033 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1034 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1035 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1036 int parameter_size_maybe_more = parameter_size;
1037 compiled_data->disable_outgrad = disable_outgrad;
1038 int outgrad_size;
1039 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1040 outgrad_size = 0;
1041 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1042 outgrad_size = model->input_size;
1043 else {
1044 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1044, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1045 outgrad_size = 0;
1046 for (i = 0; i < model->input_size; i++)
1047 if (!(disable_outgrad & ((uint64_t)1 << i)))
1048 ++outgrad_size;
1049 }
1050 compiled_data->outgrad_size = outgrad_size;
1051 parameter_size_maybe_more += outgrad_size;
1052 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1053 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1054 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1055 compiled_data->backward.to_size = parameter_size_maybe_more;
1056 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1057 if (compiled_data->parameter_flags)
1058 {
1059 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1060 for (i = 0; i < parameter_size; i++)
1061 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1062 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1063 else
1064 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1065 }
1066 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1067 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1068 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1069 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1070 else { // Compute minimize with gradients including selected inputs.
1071 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1071, __extension__ __PRETTY_FUNCTION__
); }))
;
1072 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1072, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1073 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1073, __extension__ __PRETTY_FUNCTION__
); }))
;
1074 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1075 j = 0;
1076 for (i = 0; i < model->input_size; i++)
1077 if (!(disable_outgrad & ((uint64_t)1 << i)))
1078 outgrads[j++] = model->inputs[i];
1079 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1080 }
1081 if (compiled_data->parameter_flags)
1082 ccfreefree(parameters);
1083 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1084 if (compiled_data->minimize.parameters)
1085 _ccv_cnnp_apply_parameters_with_minimizer(model);
1086 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1087 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1088 for (i = 0; i < output_size; i++)
1089 {
1090 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1091 // Init this to 1 so we can backprop.
1092 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1093 }
1094 compiled_data->backward.to_size = 0;
1095 for (i = 0; i < parameter_size_maybe_more; i++)
1096 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1097 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1098 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1099 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1100 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1101 {
1102 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1103 continue;
1104 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1105 const int* tos;
1106 int to_size;
1107 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1108 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1109 {
1110 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1111 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1112 int flag = 0;
1113 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1114 for (j = i - 1; !flag && j >= 0; j--)
1115 if (j + outgrad_destination_start < destination_count)
1116 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1117 if (!flag) // Only if we cannot find it, we add it.
1118 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1119 }
1120 }
1121 if (parallel_count > 1)
1122 {
1123 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1124 0, 0,
1125 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1126 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1127 0, 0, 0,
1128 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1129 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1130 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1131 for (i = 0; i < evaluate_to_size; i++)
1132 for (j = 1; j < parallel_count; j++)
1133 {
1134 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1135 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1136 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1137 }
1138 const int backward_to_size = compiled_data->backward.to_size;
1139 for (i = 0; i < backward_to_size; i++)
1140 for (j = 1; j < parallel_count; j++)
1141 {
1142 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1143 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1144 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1145 }
1146 }
1147 // Only use memory compression if we are in gradient parameter mode.
1148 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1149 {
1150 if (model->memory_compression)
1151 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1152 if (model->memory_reduction)
1153 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1154 }
1155 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1156 compiled_data->gradient_mode = gradient_mode;
1157}
1158
1159void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1160{
1161 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1161, __extension__ __PRETTY_FUNCTION__
); }))
;
1162 const int parameter_size = compiled_data->parameters->rnum;
1163 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1164 const int internal_size = compiled_data->internals->rnum;
1165 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1166 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1167 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1168 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1169}
1170
1171int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1172{
1173 int i, j;
1174 const int parameter_size = compiled_data->parameters->rnum;
1175 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1176 const int internal_size = compiled_data->internals->rnum;
1177 for (i = 0; i < parameter_size; i++)
1178 {
1179 // parameters has to be allocated all together.
1180 if (compiled_data->tensors.parameters[i])
1181 {
1182 for (j = 1; j < parallel_count; j++)
1183 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1183, __extension__ __PRETTY_FUNCTION__
); }))
; }
1184 continue;
1185 }
1186 return 1;
1187 }
1188 for (i = 0; i < internal_size; i++)
1189 {
1190 if (!compiled_data->tensors.internals[i])
1191 return 1;
1192 for (j = 1; j < parallel_count; j++)
1193 if (!compiled_data->tensors.internals[i + j * internal_size])
1194 return 1;
1195 }
1196 return 0;
1197}
1198
1199void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1200{
1201 int i, j;
1202 const int parameter_size = compiled_data->parameters->rnum;
1203 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1204 const int internal_size = compiled_data->internals->rnum;
1205 for (i = 0; i < parameter_size; i++)
1206 {
1207 // parameters has to be allocated all together.
1208 if (compiled_data->tensors.parameters[i])
1209 {
1210 for (j = 1; j < parallel_count; j++)
1211 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1211, __extension__ __PRETTY_FUNCTION__
); }))
; }
1212 continue;
1213 }
1214 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1215 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1216 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1217 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1218 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1219 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1220 for (j = 1; j < parallel_count; j++)
1221 {
1222 if (j != device_id)
1223 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1224 else
1225 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1226 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1227 }
1228 }
1229 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1230 for (i = 0; i < internal_size; i++)
1231 {
1232 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1233 const int d = retained.d;
1234 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1235 continue;
1236 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1237 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1238 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1239 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1240 if (!compiled_data->tensors.internals[i])
1241 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1242 for (j = 1; j < parallel_count; j++)
1243 {
1244 if (j != device_id)
1245 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1246 else
1247 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1248 if (!compiled_data->tensors.internals[i + j * internal_size])
1249 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1250 }
1251 }
1252 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1253}
1254
1255static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1256{
1257 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1258 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1259}
1260
1261static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1262{
1263 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1263, __extension__ __PRETTY_FUNCTION__
); }))
;
1264 int i, j;
1265 for (i = 0; i < tensor_size; i++)
1266 {
1267 if (!tensors[i])
1268 continue;
1269 const int d = tensor_symbols[i].d;
1270 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1271 continue;
1272 for (j = 1; j < parallel_count; j++)
1273 if (tensors[i + j * tensor_size])
1274 {
1275 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1276 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1277 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1278 }
1279 }
1280}
1281
1282static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1283{
1284 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1284, __extension__ __PRETTY_FUNCTION__
); }))
;
1285 int i, j;
1286 for (i = 0; i < tensor_size; i++)
1287 {
1288 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1289 for (j = 1; j < parallel_count; j++)
1290 {
1291 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1292 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1293 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1294 { // We shouldn't allocate this, free it up.
1295 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1296 tensors[i + j * tensor_size] = 0;
1297 }
1298 }
1299 }
1300}
1301
1302static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1303{
1304 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1304, __extension__ __PRETTY_FUNCTION__
); }))
;
1305 int i, j;
1306 for (i = 0; i < tensor_size; i++)
1307 {
1308 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1309 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1310 continue;
1311 if (graph)
1312 {
1313 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1314 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1315 tensor_symbol = alias_to;
1316 }
1317 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1318 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1319 {
1320 const ccv_nnc_tensor_bind_t retained_bind = {
1321 .symbol = tensor_symbol,
1322 .tensor = tensor
1323 };
1324 ccv_array_push(tensor_binds, &retained_bind);
1325 }
1326 for (j = 1; j < parallel_count; j++)
1327 {
1328 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1329 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1330 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1331 {
1332 const ccv_nnc_tensor_bind_t bind = {
1333 .symbol = copy,
1334 .tensor = tensors[i + j * tensor_size]
1335 };
1336 ccv_array_push(tensor_binds, &bind);
1337 }
1338 }
1339 }
1340}
1341
1342static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1343{
1344 if (compiled_data->graph)
1345 ccv_nnc_graph_free(compiled_data->graph);
1346 compiled_data->graph = 0;
1347 compiled_data->is_test = 0;
1348 if (compiled_data->tensor_arena)
1349 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1350 compiled_data->tensor_arena = 0;
1351 if (compiled_data->graph_exec_arena)
1352 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1353 compiled_data->graph_exec_arena = 0;
1354 if (compiled_data->backward.from_ops)
1355 ccfreefree(compiled_data->backward.from_ops);
1356 compiled_data->backward.from_ops = 0;
1357 if (compiled_data->evaluate.schedule)
1358 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1359 compiled_data->evaluate.schedule = 0;
1360 if (compiled_data->backward.schedule)
1361 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1362 compiled_data->backward.schedule = 0;
1363}
1364
1365static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1366{
1367 if (compiled_data->gradients)
1368 ccfreefree(compiled_data->gradients);
1369 compiled_data->gradients = 0;
1370 if (compiled_data->updated_parameters)
1371 ccfreefree(compiled_data->updated_parameters);
1372 compiled_data->updated_parameters = 0;
1373 compiled_data->update_nodes = 0;
1374 compiled_data->saved_aux = 0;
1375}
1376
1377static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1378{
1379 if (compiled_data->backward.gradients)
1380 ccfreefree(compiled_data->backward.gradients);
1381 compiled_data->backward.gradients = 0;
1382 if (compiled_data->backward.accum)
1383 ccv_nnc_graph_free(compiled_data->backward.accum);
1384 compiled_data->backward.accum = 0;
1385 if (compiled_data->backward.tensor_arena)
1386 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1387 compiled_data->backward.tensor_arena = 0;
1388 if (compiled_data->backward.graph_exec_arena)
1389 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1390 compiled_data->backward.graph_exec_arena = 0;
1391}
1392
1393static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1394{
1395 if (compiled_data->apply_gradients.graph)
1396 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1397 compiled_data->apply_gradients.graph = 0;
1398 if (compiled_data->apply_gradients.tensor_arena)
1399 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1400 compiled_data->apply_gradients.tensor_arena = 0;
1401 if (compiled_data->apply_gradients.graph_exec_arena)
1402 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1403 compiled_data->apply_gradients.graph_exec_arena = 0;
1404}
1405
1406// Compile the graph to run ccv_cnnp_model_fit
1407static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1408{
1409 int i, j;
1410 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1411 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1411, __extension__ __PRETTY_FUNCTION__
); }))
;
1412 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1413 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1414 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1414, __extension__ __PRETTY_FUNCTION__
); }))
;
1415 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1415
, __extension__ __PRETTY_FUNCTION__); }))
;
1416 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1416, __extension__ __PRETTY_FUNCTION__
); }))
;
1417 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1418 {
1419 _ccv_cnnp_model_set_rewindables(model);
1420 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1421 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1422 _ccv_cnnp_model_rewind_graph(model);
1423 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1424 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1425 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1426 }
1427 const int tensors_init = !!compiled_data->tensors_init.v;
1428 if (!tensors_init)
1429 _ccv_cnnp_model_tensors_init(model, compiled_data);
1430 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1431 // Check if it is not fully allocated, if it is not, init_1.
1432 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1433 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1434 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1434, __extension__ __PRETTY_FUNCTION__); }))
;
1435 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1435, __extension__ __PRETTY_FUNCTION__); }))
;
1436 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1436
, __extension__ __PRETTY_FUNCTION__); }))
;
1437 const int input_size_per_p = input_size / parallel_count;
1438 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1439 const int output_size_per_p = output_size / parallel_count;
1440 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1441 const int fit_size_per_p = fit_size / parallel_count;
1442 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1443 const int parameter_size = compiled_data->parameters->rnum;
1444 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1445 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1446 const int internal_size = compiled_data->internals->rnum;
1447 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1448 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1449 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1450 ccv_array_free(tensor_binds);
1451 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1452 if (tensors_init && parallel_count > 1)
1453 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1454 // If tensor is not init'ed, we need to init states first.
1455 if (_ccv_cnnp_any_to_init(compiled_data))
1456 {
1457 ccv_nnc_tensor_init_states_t tensor_init_states = {
1458 .parallel_count = parallel_count,
1459 .graph = model->graph,
1460 .compiled_data = compiled_data,
1461 .tensor_arena = compiled_data->tensor_arena
1462 };
1463 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1464 }
1465 compiled_data->is_test = 0;
1466 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1467 // No need to set because it is default to training mode.
1468 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1469 for (i = 0; i < saved_aux_size * parameter_size; i++)
1470 {
1471 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1472 continue;
1473 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1474 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1475 for (j = 1; j < parallel_count; j++)
1476 {
1477 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1478 if (copy)
1479 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1480 }
1481 }
1482 const int evaluate_to_size = compiled_data->evaluate.to_size;
1483 compiled_data->evaluate.to_op_size = 0;
1484 for (i = 0; i < evaluate_to_size; i++)
1485 {
1486 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1487 if (to.graph)
1488 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1489 }
1490 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1491 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1492}
1493
1494ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1495{
1496 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1497 if (!compiled_data || !compiled_data->graph)
1498 return 0;
1499 return ccv_nnc_graph_default_stream(compiled_data->graph);
1500}
1501
1502uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1503{
1504 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1505 if (!compiled_data || !compiled_data->tensor_arena)
1506 return 0;
1507 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1508}
1509
1510static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1511{
1512 int i, j;
1513 for (i = 0; i < tensor_size; i++)
1514 {
1515 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1516 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1517 continue;
1518 if (graph)
1519 {
1520 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1521 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1522 tensor_symbol = alias_to;
1523 }
1524 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1525 for (j = 1; j < parallel_count; j++)
1526 {
1527 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1528 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1529 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1530 }
1531 }
1532}
1533
1534void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1535{
1536 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1537 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1537, __extension__ __PRETTY_FUNCTION__); }))
;
1538 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1539 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1539, __extension__ __PRETTY_FUNCTION__
); }))
;
1540 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1540, __extension__ __PRETTY_FUNCTION__
); }))
;
1541 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1541
, __extension__ __PRETTY_FUNCTION__); }))
;
1542 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1542, __extension__ __PRETTY_FUNCTION__); }))
;
1543 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1544 {
1545 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1546 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1547 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1548 // Compile the symbolic graph down only when needed.
1549 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1550 } else {
1551 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1551, __extension__ __PRETTY_FUNCTION__); }))
;
1552 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1552, __extension__ __PRETTY_FUNCTION__); }))
;
1553 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1553
, __extension__ __PRETTY_FUNCTION__); }))
;
1554 const int input_size_per_p = input_size / parallel_count;
1555 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1556 const int output_size_per_p = output_size / parallel_count;
1557 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1558 const int fit_size_per_p = fit_size / parallel_count;
1559 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1560 }
1561 if (compiled_data->is_test)
1562 {
1563 compiled_data->is_test = 0;
1564 ccv_nnc_graph_exec_update_t update = {
1565 .parallel_count = parallel_count,
1566 .graph = model->graph,
1567 .graph_exec_arena = compiled_data->graph_exec_arena,
1568 };
1569 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1570 }
1571 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1572}
1573
1574// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1575static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1576{
1577 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1578 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1579 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1580 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1580, __extension__ __PRETTY_FUNCTION__
); }))
;
1581 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1581, __extension__ __PRETTY_FUNCTION__
); }))
;
1582 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1583 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1584 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1585 {
1586 const int evaluate_to_size = compiled_data->evaluate.to_size;
1587 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1588 _ccv_cnnp_model_set_rewindables(model);
1589 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1590 0, 0,
1591 0, 0, 0,
1592 0, 0, 0,
1593 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1594 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1595 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1596 int i, j;
1597 for (i = 0; i < evaluate_to_size; i++)
1598 for (j = 1; j < parallel_count; j++)
1599 {
1600 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1601 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1602 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1603 }
1604 }
1605 const int tensors_init = !!compiled_data->tensors_init.v;
1606 if (!tensors_init)
1607 _ccv_cnnp_model_tensors_init(model, compiled_data);
1608 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1609 // Check if it is not fully allocated, if it is not, init_1.
1610 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1611 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1612 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1612, __extension__ __PRETTY_FUNCTION__); }))
;
1613 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1613, __extension__ __PRETTY_FUNCTION__); }))
;
1614 const int input_size_per_p = input_size / parallel_count;
1615 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1616 const int output_size_per_p = output_size / parallel_count;
1617 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1618 const int parameter_size = compiled_data->parameters->rnum;
1619 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1620 const int internal_size = compiled_data->internals->rnum;
1621 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1622 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1623 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1624 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1625 ccv_array_free(tensor_binds);
1626 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1627 // If tensor is not init'ed, we need to init states first.
1628 if (tensors_init && parallel_count > 1)
1629 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1630 if (_ccv_cnnp_any_to_init(compiled_data))
1631 {
1632 ccv_nnc_tensor_init_states_t tensor_init_states = {
1633 .parallel_count = parallel_count,
1634 .graph = model->graph,
1635 .compiled_data = compiled_data,
1636 .tensor_arena = compiled_data->tensor_arena
1637 };
1638 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1639 }
1640 compiled_data->is_test = 1;
1641 ccv_nnc_graph_exec_update_t update = {
1642 .parallel_count = parallel_count,
1643 .graph = model->graph,
1644 .graph_exec_arena = compiled_data->graph_exec_arena,
1645 };
1646 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1647 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1648 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1649}
1650
1651static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1652{
1653 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1653, __extension__ __PRETTY_FUNCTION__
); }))
;
1654 const int parameter_size = compiled_data->parameters->rnum;
1655 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1656 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1657 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1658 int i, j;
1659 for (i = 0; i < parameter_size; i++)
1660 {
1661 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1662 {
1663 compiled_data->tensors.gradients[i] = 0;
1664 compiled_data->tensors.accum_gradients[i] = 0;
1665 for (j = 1; j < parallel_count; j++)
1666 {
1667 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1668 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1669 }
1670 continue;
1671 }
1672 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1673 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1674 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1675 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1676 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1677 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1678 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1679 for (j = 1; j < parallel_count; j++)
1680 {
1681 if (j != device_id)
1682 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1683 else
1684 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1685 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1686 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1687 }
1688 }
1689}
1690
1691static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1692{
1693 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1694 return 1;
1695 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1696 return 0;
1697 int i;
1698 for (i = 0; i < input_size; i++)
1699 if (!(disable_outgrad & ((uint64_t)1 << i)))
1700 return 0;
1701 return 1;
1702}
1703
1704// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1705// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1706static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1707{
1708 int i, j;
1709 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1710 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1711 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1711, __extension__ __PRETTY_FUNCTION__
); }))
;
1712 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1713 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1714 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1714, __extension__ __PRETTY_FUNCTION__
); }))
;
1715 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1715, __extension__ __PRETTY_FUNCTION__
); }))
;
1716 // There shouldn't be a loss function if we evaluate with multistage jit.
1717 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1717, __extension__ __PRETTY_FUNCTION__
); }))
;
1718 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1719 {
1720 _ccv_cnnp_model_set_rewindables(model);
1721 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1722 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1723 _ccv_cnnp_model_rewind_graph(model);
1724 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1725 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1726 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1727 }
1728 const int tensors_init = !!compiled_data->tensors_init.v;
1729 if (!tensors_init)
1730 _ccv_cnnp_model_tensors_init(model, compiled_data);
1731 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1732 // Check if it is not fully allocated, if it is not, init_1.
1733 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1734 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1735 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1735, __extension__ __PRETTY_FUNCTION__); }))
;
1736 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1736, __extension__ __PRETTY_FUNCTION__); }))
;
1737 const int input_size_per_p = input_size / parallel_count;
1738 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1739 const int output_size_per_p = output_size / parallel_count;
1740 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1741 const int parameter_size = compiled_data->parameters->rnum;
1742 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1743 const int internal_size = compiled_data->internals->rnum;
1744 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1745 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1746 if (!compiled_data->tensors.gradients)
1747 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1748 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1749 if (compiled_data->backward.to_size > 0)
1750 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1751 else
1752 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1753 ccv_array_free(tensor_binds);
1754 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1755 if (tensors_init && parallel_count > 1)
1756 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1757 // If tensor is not init'ed, we need to init states first.
1758 if (_ccv_cnnp_any_to_init(compiled_data))
1759 {
1760 ccv_nnc_tensor_init_states_t tensor_init_states = {
1761 .parallel_count = parallel_count,
1762 .graph = model->graph,
1763 .compiled_data = compiled_data,
1764 .tensor_arena = compiled_data->tensor_arena
1765 };
1766 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1767 }
1768 compiled_data->is_test = is_test;
1769 ccv_nnc_graph_exec_update_t update = {
1770 .parallel_count = parallel_count,
1771 .graph = model->graph,
1772 .graph_exec_arena = compiled_data->graph_exec_arena,
1773 };
1774 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1775 const int evaluate_to_size = compiled_data->evaluate.to_size;
1776 compiled_data->evaluate.to_op_size = 0;
1777 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1778 for (i = 0; i < evaluate_to_size; i++)
1779 {
1780 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1781 if (to_op.graph)
1782 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1783 const int* tos;
1784 int to_size;
1785 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1786 for (j = 0; j < to_size; j++)
1787 {
1788 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1789 .d = tos[j],
1790 .graph = model->graph
1791 });
1792 if (to_op.graph)
1793 ccv_array_add_unique_int(backward_from, to_op.d);
1794 }
1795 }
1796 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1796, __extension__
__PRETTY_FUNCTION__); }))
;
1797 compiled_data->backward.from_op_size = backward_from->rnum;
1798 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1799 for (i = 0; i < backward_from->rnum; i++)
1800 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1801 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1802 .graph = compiled_data->graph,
1803 };
1804 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1805 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1806 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1807 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1808 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1809 const int source_size = compiled_data->graph->sources->rnum;
1810 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1810, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1811 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1812 visited[(idx >> 5)] |= (1u << (idx & 31));
1813 } ccv_nnc_graph_visit_endfor} }
1814 ccv_nnc_graph_visit_free(visit);
1815 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1816 const int destination_size = compiled_data->graph->destinations->rnum;
1817 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1817, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1818 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1819 visited[(idx >> 5)] |= (1u << (idx & 31));
1820 } ccv_nnc_graph_visit_endfor} }
1821 ccv_nnc_graph_visit_free(visit);
1822 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1823 for (i = 0; i < exec_info_size; i++)
1824 if (!(visited[(i >> 5)] & (1u << (i & 31))))
1825 {
1826 assert(exec_info[i].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[i].cmd.cmd == CCV_NNC_SET_FORWARD)
? 1 : 0), __extension__ ({ if (exec_info[i].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[i].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1826, __extension__ __PRETTY_FUNCTION__
); }))
;
1827 ccv_array_add_unique_int(backward_from, i);
1828 }
1829 ccfreefree(visited);
1830 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1831 {
1832 compiled_data->backward.from_op_size = backward_from->rnum;
1833 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1834 for (i = 0; i < backward_from->rnum; i++)
1835 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1836 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1837 .graph = compiled_data->graph,
1838 };
1839 }
1840 ccv_array_free(backward_from);
1841 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1842 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1843}
1844
1845void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1846{
1847 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1848 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1848, __extension__ __PRETTY_FUNCTION__); }))
;
1849 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1850 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__
); }))
;
1851 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1851, __extension__ __PRETTY_FUNCTION__
); }))
;
1852 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1852, __extension__ __PRETTY_FUNCTION__); }))
;
1853 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1854 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1855 if (!compiled_data->graph || mode_mismatch)
1856 {
1857 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1858 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1859 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1860 if (params.requires_grad)
1861 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1862 else
1863 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1864 } else {
1865 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1866 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1866, __extension__ __PRETTY_FUNCTION__); }))
;
1867 const int input_size_per_p = input_size / parallel_count;
1868 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1869 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1869, __extension__ __PRETTY_FUNCTION__); }))
;
1870 const int output_size_per_p = output_size / parallel_count;
1871 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1872 }
1873 if (compiled_data->is_test != params.is_test)
1874 {
1875 compiled_data->is_test = params.is_test;
1876 ccv_nnc_graph_exec_update_t update = {
1877 .parallel_count = parallel_count,
1878 .graph = model->graph,
1879 .graph_exec_arena = compiled_data->graph_exec_arena,
1880 };
1881 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1882 }
1883}
1884
1885void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1886{
1887 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1888 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1888, __extension__ __PRETTY_FUNCTION__); }))
;
1889 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1890 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1891 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1892 else {
1893 if (!compiled_data->evaluate.schedule)
1894 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1895 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1896 }
1897}
1898
1899// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1900// Particularly, this method compiles the accumulator graph.
1901static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1902{
1903 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1904 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1904, __extension__ __PRETTY_FUNCTION__); }))
;
1905 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); }))
;
1906 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1907 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1908 const int parameter_size = compiled_data->parameters->rnum;
1909 int i, j;
1910 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1911 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1912 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1913 for (i = 0; i < parameter_size; i++)
1914 for (j = 0; j < parallel_count; j++)
1915 if (compiled_data->tensors.gradients[i + j * parameter_size])
1916 {
1917 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1918 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1919 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1920 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1921 ccv_nnc_tensor_symbol_t inputs[2];
1922 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1923 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1924 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1925 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1926 } else {
1927 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1928 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1929 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1930 }
1931 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1932 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1933 {
1934 ccv_nnc_symbolic_graph_free(accum);
1935 // Create empty graph.
1936 compiled_data->backward.accum = ccv_nnc_graph_new();
1937 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1938 return;
1939 }
1940 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1941 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1942 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1943 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1944 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1945 ccv_nnc_symbolic_graph_free(accum);
1946 ccv_array_free(tensor_binds);
1947 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1948}
1949
1950void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1951{
1952 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1953 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1953, __extension__ __PRETTY_FUNCTION__); }))
;
1954 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1954, __extension__ __PRETTY_FUNCTION__
); }))
;
1955 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1956 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1956, __extension__ __PRETTY_FUNCTION__
); }))
;
1957 if (outgrad_size > 0)
1958 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1958, __extension__ __PRETTY_FUNCTION__
); }))
; }
1959 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1959, __extension__ __PRETTY_FUNCTION__); }))
;
1960 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1960, __extension__ __PRETTY_FUNCTION__
); }))
;
1961 const int parameter_size = compiled_data->parameters->rnum;
1962 // If we need to accumulate the gradients now, do jit on accumulator.
1963 if (compiled_data->backward.count > 0)
1964 {
1965 if (!compiled_data->backward.accum)
1966 _ccv_cnnp_model_multistage_jit_1(model);
1967 else if (compiled_data->backward.count == 1) {
1968 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1969 int i;
1970 for (i = 0; i < parameter_size * parallel_count; i++)
1971 {
1972 ccv_nnc_tensor_t* tensor;
1973 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1974 }
1975 if (compiled_data->backward.tensor_arena)
1976 {
1977 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1978 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1979 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1980 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1981 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1982 }
1983 }
1984 }
1985 const int ingrad_size_per_p = model->output_size;
1986 const int outgrad_size_per_p = compiled_data->outgrad_size;
1987 int i, j;
1988 for (i = 0; i < ingrad_size_per_p; i++)
1989 {
1990 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1991 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1992 {
1993 // Set it to 1 if it is not specified.
1994 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1995 if (ingrad_tensor)
1996 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1997 for (j = 1; j < parallel_count; j++)
1998 {
1999 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2000 if (ingrad_tensor)
2001 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2002 }
2003 } else {
2004 // Make sure the length matches, in case it is an alias.
2005 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2005, __extension__ __PRETTY_FUNCTION__
); }))
;
2006 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2007 for (j = 1; j < parallel_count; j++)
2008 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2009 }
2010 }
2011 if (outgrad_size > 0)
2012 {
2013 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2013, __extension__ __PRETTY_FUNCTION__
); }))
;
2014 for (i = 0; i < outgrad_size_per_p; i++)
2015 if (outgrads[i])
2016 {
2017 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2018 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2019 for (j = 1; j < parallel_count; j++)
2020 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2021 }
2022 } else {
2023 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2024, __extension__ __PRETTY_FUNCTION__
); }))
2024 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2024, __extension__ __PRETTY_FUNCTION__
); }))
;
2025 }
2026 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2027 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2028 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2029 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2030 if (!compiled_data->backward.schedule)
2031 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2032 // Run the backward pass.
2033 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2034 // If we need to run accumulation round, do that now.
2035 if (compiled_data->backward.count > 0)
2036 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2037 // Update the count, this determines whether we need to accumulate or not.
2038 ++compiled_data->backward.count;
2039}
2040
2041// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2042// Particularly, this method compiles the parameter update graph.
2043static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2044{
2045 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2046 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2046, __extension__ __PRETTY_FUNCTION__
); }))
;
2047 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2048 const int parameter_size = compiled_data->parameters->rnum;
2049 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2050 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2051 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2052 // Bind accumulated gradients.
2053 if (compiled_data->backward.count > 1)
2054 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2055 else
2056 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2057 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2058 int i, j;
2059 for (i = 0; i < compiled_data->backward.to_size; i++)
2060 {
2061 const int* tos;
2062 int to_size;
2063 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2064 for (j = 0; j < to_size; j++)
2065 {
2066 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2067 // gradients graph.
2068 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2069 .d = tos[j],
2070 .graph = model->graph,
2071 });
2072 if (!exec.graph)
2073 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2074 }
2075 }
2076 const int from_size = apply_gradients_from->rnum;
2077 if (from_size == 0)
2078 {
2079 ccv_array_free(apply_gradients_from);
2080 ccv_array_free(tensor_binds);
2081 return;
2082 }
2083 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2084 for (i = 0; i < from_size; i++)
2085 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2086 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2087 .graph = model->graph
2088 };
2089 ccv_array_free(apply_gradients_from);
2090 // It can only ends with updates on the parameters.
2091 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2092 for (i = 0; i < parameter_size; i++)
2093 {
2094 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2095 continue;
2096 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2097 for (j = 1; j < parallel_count; j++)
2098 {
2099 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2100 ccv_array_push(tos, &copy);
2101 }
2102 }
2103 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2104 ccv_array_free(tos);
2105 ccv_array_free(tensor_binds);
2106 ccfreefree(froms);
2107 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2108 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2109 {
2110 // Skip on no tensor.
2111 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2112 continue;
2113 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2114 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2115 for (j = 1; j < parallel_count; j++)
2116 {
2117 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2118 if (copy)
2119 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2120 }
2121 }
2122 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2123}
2124
2125void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2126{
2127 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2128 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2128, __extension__ __PRETTY_FUNCTION__); }))
;
2129 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2129, __extension__ __PRETTY_FUNCTION__
); }))
;
2130 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2131 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2131, __extension__ __PRETTY_FUNCTION__); }))
;
2132 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2132, __extension__ __PRETTY_FUNCTION__
); }))
;
2133 // Skip if there is no backward pass.
2134 if (compiled_data->backward.count <= 0)
2135 return;
2136 // Skip if there is no parameters.
2137 if (compiled_data->parameters->rnum == 0)
2138 {
2139 compiled_data->backward.count = 0;
2140 return;
2141 }
2142 if (!compiled_data->apply_gradients.graph)
2143 _ccv_cnnp_model_multistage_jit_2(model);
2144 else {
2145 const int parameter_size = compiled_data->parameters->rnum;
2146 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2147 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2148 if (compiled_data->backward.count > 1)
2149 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2150 else
2151 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2152 }
2153 if (compiled_data->apply_gradients.graph)
2154 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2155 // Reset backward count to 0.
2156 compiled_data->backward.count = 0;
2157}
2158
2159void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2160{
2161 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2162 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2163 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2163, __extension__ __PRETTY_FUNCTION__
); }))
;
2164 const int tensors_init = !!compiled_data->tensors_init.v;
2165 if (!tensors_init)
2166 _ccv_cnnp_model_tensors_init(model, compiled_data);
2167 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2168 // Check if it is not fully allocated, if it is not, init_1.
2169 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2170 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2171 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2172 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2173 if (param_ref < 0)
2174 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2174
, __extension__ __PRETTY_FUNCTION__); }))
; }
2175 else
2176 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2176, __extension__ __PRETTY_FUNCTION__
); }))
; }
2177 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2178 ccv_array_free(parameter_indices);
2179 const int parameter_size = compiled_data->parameters->rnum;
2180 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2180
, __extension__ __PRETTY_FUNCTION__); }))
;
2181 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2181, __extension__ __PRETTY_FUNCTION__
); }))
;
2182 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2183 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2184 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2184, __extension__
__PRETTY_FUNCTION__); }))
;
2185 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2186 int i;
2187 for (i = 1; i < parallel_count; i++)
2188 {
2189 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2190 if (copy_tensor)
2191 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2192 }
2193 // Mark this symbol as init'ed.
2194 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2195 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2196 init_v[s >> 5] |= (1u << (s & 0x1f));
2197}
2198
2199void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2200{
2201 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2202 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2203 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2203, __extension__ __PRETTY_FUNCTION__
); }))
;
2204 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2204, __extension__ __PRETTY_FUNCTION__
); }))
;
2205 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2206 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2207 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2208 if (param_ref < 0)
2209 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2209
, __extension__ __PRETTY_FUNCTION__); }))
; }
2210 else
2211 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2211, __extension__ __PRETTY_FUNCTION__
); }))
; }
2212 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2213 ccv_array_free(parameter_indices);
2214 const int parameter_size = compiled_data->parameters->rnum;
2215 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2215
, __extension__ __PRETTY_FUNCTION__); }))
;
2216 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2216, __extension__ __PRETTY_FUNCTION__
); }))
;
2217 // We don't need to consider parallel_count, every parameter on each device is identical.
2218 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2219 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2219, __extension__
__PRETTY_FUNCTION__); }))
;
2220 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2221}
2222
2223ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2224{
2225 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2226 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2227 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2227, __extension__ __PRETTY_FUNCTION__
); }))
;
2228 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2228, __extension__ __PRETTY_FUNCTION__
); }))
;
2229 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2230 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2231 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2232 if (param_ref < 0)
2233 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2233
, __extension__ __PRETTY_FUNCTION__); }))
; }
2234 else
2235 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2235, __extension__ __PRETTY_FUNCTION__
); }))
; }
2236 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2237 ccv_array_free(parameter_indices);
2238 const int parameter_size = compiled_data->parameters->rnum;
2239 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2239
, __extension__ __PRETTY_FUNCTION__); }))
;
2240 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2240, __extension__ __PRETTY_FUNCTION__
); }))
;
2241 // We don't need to consider parallel_count, every parameter on each device is identical.
2242 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2243 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2243, __extension__
__PRETTY_FUNCTION__); }))
;
2244 return tensor->info;
2245}
2246
2247const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2248{
2249 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2250 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2251 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2251, __extension__ __PRETTY_FUNCTION__
); }))
;
2252 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2253 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2254 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2255 if (param_ref < 0)
2256 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2256
, __extension__ __PRETTY_FUNCTION__); }))
; }
2257 else
2258 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2258, __extension__ __PRETTY_FUNCTION__
); }))
; }
2259 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2260 ccv_array_free(parameter_indices);
2261 const int parameter_size = compiled_data->parameters->rnum;
2262 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2262
, __extension__ __PRETTY_FUNCTION__); }))
;
2263 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2263, __extension__ __PRETTY_FUNCTION__
); }))
;
2264 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2265}
2266
2267int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2268{
2269 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2269, __extension__ __PRETTY_FUNCTION__
); }))
;
2270 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2271 return compiled_data->parameters->rnum;
2272}
2273
2274ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2275{
2276 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2277 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2277, __extension__ __PRETTY_FUNCTION__); }))
;
2278 const int parameter_size = compiled_data->parameters->rnum;
2279 int i;
2280 for (i = 0; i < parameter_size; i++)
2281 {
2282 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2283 if (first(model, name, context))
2284 return ccv_cnnp_model_parameters(model, -1, i);
2285 }
2286 return 0;
2287}
2288
2289ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2290{
2291 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2292 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2292, __extension__ __PRETTY_FUNCTION__); }))
;
2293 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2294 const int parameter_size = compiled_data->parameters->rnum;
2295 int i;
2296 for (i = 0; i < parameter_size; i++)
2297 {
2298 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2299 if (filter(model, name, context))
2300 {
2301 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2302 ccv_array_push(parameters, &parameter);
2303 }
2304 }
2305 return parameters;
2306
2307}
2308
2309CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2310{
2311 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2312 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2312, __extension__ __PRETTY_FUNCTION__); }))
;
2313 const int tensors_init = !!compiled_data->tensors_init.v;
2314 if (!tensors_init) // If nothing initialized, we return parameter 0.
2315 return ccv_cnnp_model_parameters(model, -1, 0);
2316 const int parameter_size = compiled_data->parameters->rnum;
2317 int i;
2318 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2319 for (i = 0; i < parameter_size; i++)
2320 {
2321 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2322 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2323 return ccv_cnnp_model_parameters(model, -1, i);
2324 }
2325 return 0;
2326}
2327
2328static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2329{
2330 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2331 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2331, __extension__
__PRETTY_FUNCTION__); }))
;
2332 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2333 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2334 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2335 return to_parameter_indices;
2336}
2337
2338static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2339{
2340 // If the model is not compiled yet. Compile them now.
2341 if (!model->graph)
2342 {
2343 model->graph = ccv_nnc_symbolic_graph_new();
2344 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2344, __extension__ __PRETTY_FUNCTION__
); }))
;
2345 const int input_size = from_model->input_size;
2346 ccv_nnc_tensor_param_t input_params[input_size];
2347 int i;
2348 for (i = 0; i < input_size; i++)
2349 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2350 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2351 model->parallel_count = from_model->parallel_count;
2352 model->memory_compression = from_model->memory_compression;
2353 model->memory_reduction = from_model->memory_reduction;
2354 model->gradient_checkpointing = from_model->gradient_checkpointing;
2355 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2356 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2357 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2358 }
2359 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2360 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2360, __extension__ __PRETTY_FUNCTION__
); }))
;
2361 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2362 if (!to_tensors_init)
2363 {
2364 if (only_init_0)
2365 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2366 else
2367 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2368 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2369 // Check if it is not fully allocated, if it is not, init_1.
2370 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2371 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2371, __extension__ __PRETTY_FUNCTION__
); }))
;
2372 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2373 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2374 if (*from_param_ref < 0 && *param_ref >= 0)
2375 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2375, __extension__ __PRETTY_FUNCTION__
); }))
; }
2376 else if (*from_param_ref >= 0)
2377 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2377, __extension__ __PRETTY_FUNCTION__
); }))
; }
2378 if (*param_ref < 0 && *from_param_ref >= 0)
2379 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2379, __extension__ __PRETTY_FUNCTION__); }))
; }
2380 else if (*param_ref >= 0)
2381 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2381, __extension__ __PRETTY_FUNCTION__
); }))
; }
2382}
2383
2384void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2385{
2386 ccv_array_t* to_parameter_indices;
2387 int to_param_ref;
2388 ccv_array_t* from_parameter_indices;
2389 int from_param_ref;
2390 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2391 // Should be exactly the same tensor.
2392 if (to_param_ref < 0 && from_param_ref < 0)
2393 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2393, __extension__ __PRETTY_FUNCTION__
); }))
; }
2394 // To models.
2395 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2396 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2396, __extension__ __PRETTY_FUNCTION__
); }))
;
2397 // From models.
2398 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2399 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2400 const int to_parameter_size = to_compiled_data->parameters->rnum;
2401 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2402 int i, j;
2403 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2404 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2405 for (i = 0; i < rnum; i++)
2406 {
2407 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2408 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2408, __extension__ __PRETTY_FUNCTION__); }))
;
2409 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2409, __extension__ __PRETTY_FUNCTION__
); }))
;
2410 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2411 // If the original is not init'ed. We cannot copy from.
2412 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2413 continue;
2414 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2415 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2415, __extension__ __PRETTY_FUNCTION__); }))
;
2416 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2416, __extension__ __PRETTY_FUNCTION__
); }))
;
2417 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2418 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2418, __extension__
__PRETTY_FUNCTION__); }))
;
2419 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2420 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2420, __extension__
__PRETTY_FUNCTION__); }))
;
2421 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2422 for (j = 1; j < parallel_count; j++)
2423 {
2424 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2425 if (copy_tensor)
2426 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2427 }
2428 // Mark this symbol as init'ed.
2429 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2430 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2431 }
2432 ccv_array_free(to_parameter_indices);
2433 ccv_array_free(from_parameter_indices);
2434}
2435
2436KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2436
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Assuming field 'n_occupied' is >= field 'upper_bound'
58
Taking true branch
59
Taking true branch
60
Calling 'kh_resize_ccv_cnnp_parameter_id'
61
Taking false branch
62
Assuming the condition is false
63
Taking false branch
64
'?' condition is true
65
Assuming 'new_flags' is non-null
66
Taking false branch
67
'?' condition is true
68
Taking false branch
69
Taking true branch
70
Loop condition is true. Entering loop body
71
Assuming the condition is false
72
Taking false branch
73
The value 1 is assigned to 'j'
74
Loop condition is true. Entering loop body
75
Assuming the condition is true
76
Taking true branch
77
Assigned value is garbage or undefined
2437
2438void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2439{
2440 ccv_array_t* to_parameter_indices;
2441 int to_param_ref;
2442 ccv_array_t* from_parameter_indices;
2443 int from_param_ref;
2444 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2445 // Should be exactly the same tensor.
2446 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2447 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2447, __extension__ __PRETTY_FUNCTION__
); }))
; }
2448 // To models.
2449 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2450 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2450, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2451 // From models.
2452 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2453 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2454 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2454, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2455 const int from_parameter_size = from_compiled_data->parameters->rnum;
2456 const int to_parameter_size = to_compiled_data->parameters->rnum;
2457 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2458 int i, j;
2459 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2460 char* updated_name = 0;
2461 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2462 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2463 for (i = 0; i < rnum; i++)
2464 {
2465 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2466 // Need to figure out how to use the renamer here.
2467 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2468 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2468, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2469 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2469, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2470 if (renamer
18.1
'renamer' is non-null
)
2471 {
2472 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2473 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2474 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2475 updated_name = (char*)ccmallocmalloc(1024);
2476 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2477 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2478 memcpy(updated_name, src_name, src_name_len);
2479 updated_name[src_name_len] = 0;
2480 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2481 continue; // Skip this.
2482 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2483 {
2484 // Nothing changed.
2485 } else {
2486 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2487 {
2488 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2489 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is < 'from_parameter_size'
55
Loop condition is true. Entering loop body
2490 {
2491 int ret;
2492 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
56
Calling 'kh_put_ccv_cnnp_parameter_id'
2493 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2493
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2494 kh_val(id_map, k)((id_map)->vals[k]) = j;
2495 }
2496 }
2497 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2498 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2499 continue;
2500 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2501 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2501, __extension__ __PRETTY_FUNCTION__); }))
;
2502 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2502, __extension__
__PRETTY_FUNCTION__); }))
;
2503 }
2504 }
2505 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2505, __extension__ __PRETTY_FUNCTION__); }))
;
2506 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2506, __extension__
__PRETTY_FUNCTION__); }))
;
2507 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2508 // If the original is not init'ed. We cannot share from.
2509 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2510 continue;
2511 for (j = 0; j < parallel_count; j++)
2512 {
2513 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2514 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2514, __extension__
__PRETTY_FUNCTION__); }))
;
2515 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2516 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2517 ccv_nnc_tensor_free(dest);
2518 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2519 }
2520 // Mark this symbol as init'ed.
2521 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2522 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2523 }
2524 ccv_array_free(to_parameter_indices);
2525 ccv_array_free(from_parameter_indices);
2526 if (id_map)
2527 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2528 if (updated_name)
2529 ccfreefree(updated_name);
2530 // Mark it as incomplete so we will call init_1.
2531 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2532 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2533 else // Remove the flag.
2534 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2535}
2536
2537ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2538{
2539 if (!compiled_data->stream_map)
2540 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2541 int ret = 0;
2542 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2543 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2543, __extension__ __PRETTY_FUNCTION__); }))
;
2544 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2545 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2546 if (ret != 0)
2547 {
2548 stream = ccv_nnc_stream_context_new(type);
2549 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2550 }
2551 return stream;
2552}
2553
2554void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2555{
2556 ccv_array_t* to_parameter_indices;
2557 int to_param_ref;
2558 ccv_array_t* from_parameter_indices;
2559 int from_param_ref;
2560 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2561 // Should be exactly the same tensor.
2562 if (to_param_ref < 0 && from_param_ref < 0)
2563 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2563, __extension__ __PRETTY_FUNCTION__
); }))
; }
2564 // To models.
2565 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2566 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2566, __extension__ __PRETTY_FUNCTION__
); }))
;
2567 // From models.
2568 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2569 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2570 const int to_parameter_size = to_compiled_data->parameters->rnum;
2571 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2572 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2572, __extension__ __PRETTY_FUNCTION__
); }))
;
2573 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2573, __extension__ __PRETTY_FUNCTION__
); }))
;
2574 int i, j;
2575 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2576 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2577 for (i = 0; i < aux_in_size; i++)
2578 inputs[i + 2] = aux_ins[i];
2579 for (i = 0; i < aux_out_size; i++)
2580 outputs[i + 1] = aux_outs[i];
2581 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2582 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2583 for (i = 0; i < rnum; i++)
2584 {
2585 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2586 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2586, __extension__ __PRETTY_FUNCTION__); }))
;
2587 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2587, __extension__ __PRETTY_FUNCTION__
); }))
;
2588 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2589 // If the original is not init'ed. We cannot copy from.
2590 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2591 continue;
2592 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2593 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2593, __extension__ __PRETTY_FUNCTION__); }))
;
2594 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2594, __extension__ __PRETTY_FUNCTION__
); }))
;
2595 if (parallel_count > 1)
2596 {
2597 ccv_nnc_stream_context_t* streams[parallel_count];
2598 ccv_nnc_stream_signal_t* signal;
2599 if (stream_context)
2600 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2601 for (j = 0; j < parallel_count; j++)
2602 {
2603 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2604 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2605 if (!dest || !src)
2606 {
2607 streams[j] = 0;
2608 continue;
2609 }
2610 // At the moment, can only handle them on the same device.
2611 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2611, __extension__ __PRETTY_FUNCTION__
); }))
;
2612 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2612, __extension__ __PRETTY_FUNCTION__
); }))
;
2613 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2614 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2615 int type = stream_type;
2616 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2617 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2618 // Wait signal to finish.
2619 if (stream_context)
2620 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2621 inputs[0] = outputs[0] = dest;
2622 inputs[1] = src;
2623 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2624 if (stream_context)
2625 {
2626 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2627 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2628 }
2629 streams[j] = stream_0;
2630 }
2631 // If this should be blocking, blocking it.
2632 if (!stream_context)
2633 for (j = 0; j < parallel_count; j++)
2634 if (streams[j])
2635 ccv_nnc_stream_context_wait(streams[j]);
2636 } else {
2637 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2638 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2638, __extension__
__PRETTY_FUNCTION__); }))
;
2639 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2640 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2640, __extension__
__PRETTY_FUNCTION__); }))
;
2641 inputs[0] = outputs[0] = dest;
2642 inputs[1] = src;
2643 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2644 }
2645 // Mark this symbol as init'ed.
2646 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2647 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2648 }
2649 ccv_array_free(to_parameter_indices);
2650 ccv_array_free(from_parameter_indices);
2651}
2652
2653void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2654{
2655 int to_param_ref;
2656 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2657 // To models.
2658 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2659 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2659, __extension__ __PRETTY_FUNCTION__
); }))
;
2660 // Tensor has to be inited already.
2661 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2661, __extension__ __PRETTY_FUNCTION__
); }))
;
2662 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2662, __extension__ __PRETTY_FUNCTION__
); }))
;
2663 // From models.
2664 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2665 const int to_parameter_size = to_compiled_data->parameters->rnum;
2666 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2667 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2667, __extension__ __PRETTY_FUNCTION__
); }))
;
2668 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2668, __extension__ __PRETTY_FUNCTION__
); }))
;
2669 int i, j;
2670 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2671 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2672 for (i = 0; i < aux_in_size; i++)
2673 inputs[i + 1] = aux_ins[i];
2674 for (i = 0; i < aux_out_size; i++)
2675 outputs[i + 1] = aux_outs[i];
2676 for (i = 0; i < rnum; i++)
2677 {
2678 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2679 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2679, __extension__ __PRETTY_FUNCTION__); }))
;
2680 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2680, __extension__ __PRETTY_FUNCTION__
); }))
;
2681 if (parallel_count > 1)
2682 {
2683 ccv_nnc_stream_context_t* streams[parallel_count];
2684 ccv_nnc_stream_signal_t* signal;
2685 if (stream_context)
2686 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2687 for (j = 0; j < parallel_count; j++)
2688 {
2689 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2690 if (!dest)
2691 {
2692 streams[j] = 0;
2693 continue;
2694 }
2695 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2696 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2697 int type = stream_type;
2698 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2699 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2700 // Wait signal to finish.
2701 if (stream_context)
2702 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2703 inputs[0] = outputs[0] = dest;
2704 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2705 if (stream_context)
2706 {
2707 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2708 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2709 }
2710 streams[j] = stream_0;
2711 }
2712 // If this should be blocking, blocking it.
2713 if (!stream_context)
2714 for (j = 0; j < parallel_count; j++)
2715 if (streams[j])
2716 ccv_nnc_stream_context_wait(streams[j]);
2717 } else {
2718 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2719 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2719, __extension__
__PRETTY_FUNCTION__); }))
;
2720 inputs[0] = outputs[0] = dest;
2721 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2722 }
2723 // No need to mark this symbol as init'ed, it is already.
2724 }
2725 ccv_array_free(to_parameter_indices);
2726}
2727
2728void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2729{
2730 int to_param_ref;
2731 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2732 // To models.
2733 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2734 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2734, __extension__ __PRETTY_FUNCTION__
); }))
;
2735 // Tensor has to be inited already.
2736 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2736, __extension__ __PRETTY_FUNCTION__
); }))
;
2737 ccv_nnc_tensor_t** tensor_gradients;
2738 if (to_compiled_data->backward.count > 1)
2739 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2740 else
2741 tensor_gradients = to_compiled_data->tensors.gradients;
2742 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2742, __extension__ __PRETTY_FUNCTION__
); }))
;
2743 // From models.
2744 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2745 const int to_parameter_size = to_compiled_data->parameters->rnum;
2746 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2747 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2747, __extension__ __PRETTY_FUNCTION__
); }))
;
2748 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2748, __extension__ __PRETTY_FUNCTION__
); }))
;
2749 int i, j;
2750 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2751 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2752 for (i = 0; i < aux_in_size; i++)
2753 inputs[i + 1] = aux_ins[i];
2754 for (i = 0; i < aux_out_size; i++)
2755 outputs[i + 1] = aux_outs[i];
2756 for (i = 0; i < rnum; i++)
2757 {
2758 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2759 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2759, __extension__ __PRETTY_FUNCTION__); }))
;
2760 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2760, __extension__ __PRETTY_FUNCTION__
); }))
;
2761 if (parallel_count > 1)
2762 {
2763 ccv_nnc_stream_context_t* streams[parallel_count];
2764 ccv_nnc_stream_signal_t* signal;
2765 if (stream_context)
2766 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2767 for (j = 0; j < parallel_count; j++)
2768 {
2769 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2770 if (!dest)
2771 {
2772 streams[j] = 0;
2773 continue;
2774 }
2775 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2776 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2777 int type = stream_type;
2778 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2779 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2780 // Wait signal to finish.
2781 if (stream_context)
2782 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2783 inputs[0] = outputs[0] = dest;
2784 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2785 if (stream_context)
2786 {
2787 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2788 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2789 }
2790 streams[j] = stream_0;
2791 }
2792 // If this should be blocking, blocking it.
2793 if (!stream_context)
2794 for (j = 0; j < parallel_count; j++)
2795 if (streams[j])
2796 ccv_nnc_stream_context_wait(streams[j]);
2797 } else {
2798 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2799 if (!dest)
2800 continue;
2801 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2801, __extension__
__PRETTY_FUNCTION__); }))
;
2802 inputs[0] = outputs[0] = dest;
2803 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2804 }
2805 // No need to mark this symbol as init'ed, it is already.
2806 }
2807 ccv_array_free(to_parameter_indices);
2808}
2809
2810ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2811{
2812 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2813 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2813, __extension__ __PRETTY_FUNCTION__); }))
;
2814 return compiled_data->minimize.minimizer;
2815}
2816
2817void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2818{
2819 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2820 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2820, __extension__ __PRETTY_FUNCTION__); }))
;
2821 const int parameter_size = compiled_data->parameters->rnum;
2822 if (parameter_size == 0)
2823 return;
2824 if (reset)
2825 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2825, __extension__ __PRETTY_FUNCTION__
); }))
; }
2826 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2827 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2828 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2829 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2830 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2831 // We update all parameters, at this point, we have one minimizer.
2832 if (set_parameters == 0 || set_parameter_size == 0)
2833 compiled_data->minimize.minimizer = minimizer;
2834 int i;
2835 if (set_parameters && set_parameter_size)
2836 {
2837 // I need to save what's the minimizer along with this.
2838 if (!compiled_data->minimize.parameters)
2839 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2840 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2841 set_minimizer_for_parameter->minimizer = minimizer;
2842 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2843 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2844 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2845 }
2846 // If reset is true, clear the parameters array.
2847 if (reset && compiled_data->minimize.parameters)
2848 {
2849 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2850 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2851 ccv_array_clear(compiled_data->minimize.parameters);
2852 }
2853 if (!compiled_data->update_nodes)
2854 return;
2855 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2856 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2856, __extension__ __PRETTY_FUNCTION__); }))
;
2857 if (saved_aux_size > old_max_saved_aux_size)
2858 {
2859 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2859, __extension__ __PRETTY_FUNCTION__
); }))
;
2860 // Reallocate first, move them around later.
2861 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2862 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2863 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2864 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2865 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2866 }
2867 int flag = 0;
2868 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2869 if (set_parameters && set_parameter_size)
2870 {
2871 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2872 for (i = 0; i < set_parameter_size; i++)
2873 {
2874 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2875 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2875, __extension__ __PRETTY_FUNCTION__
); }))
;
2876 const int old_rnum = parameter_indices->rnum;
2877 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2878 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2879 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2879, __extension__ __PRETTY_FUNCTION__
); }))
;
2880 if (param_ref >= 0)
2881 {
2882 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2882, __extension__ __PRETTY_FUNCTION__
); }))
;
2883 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2884 parameter_indices->rnum = old_rnum + 1;
2885 }
2886 }
2887 // We may have duplicated indices, but that is OK, we will set it twice.
2888 for (i = 0; i < parameter_indices->rnum; i++)
2889 {
2890 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2891 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2892 flag = 1;
2893 }
2894 ccv_array_free(parameter_indices);
2895 } else {
2896 for (i = 0; i < parameter_size; i++)
2897 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2898 flag = 1;
2899 if (compiled_data->minimize.parameters)
2900 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2901 flag = 1;
2902 }
2903 if (flag)
2904 {
2905 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2906 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2907 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2908 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2909 }
2910}
2911
2912void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2913{
2914 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2915 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2915, __extension__ __PRETTY_FUNCTION__); }))
;
2916 compiled_data->compile_params = compile_params;
2917}
2918
2919void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2920{
2921 if (model->graph && out_size > 0)
2922 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2923 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2924 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2925 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2926 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2927 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2928 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2929}
2930
2931void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2932{
2933 if (model->graph)
2934 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2935}
2936
2937static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2938{
2939 int i;
2940 const int parameter_size = compiled_data->parameters->rnum;
2941 ccv_array_free(compiled_data->parameters);
2942 if (compiled_data->parameter_flags)
2943 ccfreefree(compiled_data->parameter_flags);
2944 const int internal_size = compiled_data->internals->rnum;
2945 ccv_array_free(compiled_data->internals);
2946 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2946, __extension__ __PRETTY_FUNCTION__
); }))
;
2947 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2947, __extension__ __PRETTY_FUNCTION__
); }))
;
2948 for (i = 0; i < parameter_size; i++)
2949 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2950 ccv_array_free(compiled_data->ids.parameters);
2951 for (i = 0; i < internal_size; i++)
2952 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2953 ccv_array_free(compiled_data->ids.internals);
2954 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2955 if (compiled_data->tensors.parameters)
2956 {
2957 for (i = 0; i < parameter_size * parallel_count; i++)
2958 // If it is not marked as not belonging, we can free it.
2959 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2960 if (compiled_data->tensors.parameters[i])
2961 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2962 for (i = 0; i < internal_size * parallel_count; i++)
2963 if (compiled_data->tensors.internals[i])
2964 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2965 ccfreefree(compiled_data->tensors.parameters);
2966 }
2967 if (compiled_data->tensors.gradients)
2968 {
2969 for (i = 0; i < parameter_size * parallel_count; i++)
2970 {
2971 if (compiled_data->tensors.gradients[i])
2972 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2973 if (compiled_data->tensors.accum_gradients[i])
2974 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2975 }
2976 ccfreefree(compiled_data->tensors.gradients);
2977 }
2978 if (compiled_data->minimize.parameters)
2979 {
2980 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2981 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2982 ccv_array_free(compiled_data->minimize.parameters);
2983 }
2984 if (compiled_data->rewindables)
2985 ccv_array_free(compiled_data->rewindables);
2986 if (compiled_data->tensors_init.v)
2987 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
2988 if (compiled_data->evaluate.tos)
2989 ccfreefree(compiled_data->evaluate.tos);
2990 compiled_data->evaluate.tos = 0;
2991 if (compiled_data->stream_map)
2992 {
2993 khiter_t k;
2994 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2995 {
2996 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
2997 continue;
2998 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2999 ccv_nnc_stream_context_free(stream);
3000 }
3001 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3002 }
3003 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3004 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3005 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3006 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3007 if (compiled_data->gradient_checkpoints)
3008 {
3009 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3010 {
3011 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3012 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3012, __extension__ __PRETTY_FUNCTION__
); }))
;
3013 ccfreefree(checkpoint->inputs);
3014 ccv_array_free(checkpoint->tensor_symbols);
3015 }
3016 ccv_array_free(compiled_data->gradient_checkpoints);
3017 }
3018 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3019 ccfreefree(compiled_data);
3020}
3021
3022void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3023{
3024 if (model->isa->deinit)
3025 model->isa->deinit(model);
3026 if (model->io)
3027 {
3028 int i;
3029 for (i = 0; i < model->io->rnum; i++)
3030 {
3031 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3032 if (model_io->outgoings)
3033 ccv_array_free(model_io->outgoings);
3034 if (model_io->incomings)
3035 ccv_array_free(model_io->incomings);
3036 if (model_io->dependencies)
3037 ccv_array_free(model_io->dependencies);
3038 ccfreefree(model_io);
3039 }
3040 ccv_array_free(model->io);
3041 }
3042 if (model->parameter_indices)
3043 ccv_array_free(model->parameter_indices);
3044 if (model->inputs)
3045 ccfreefree(model->inputs);
3046 if (model->graph)
3047 ccv_nnc_symbolic_graph_free(model->graph);
3048 if (model->compiled_data)
3049 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3050 if (model->name)
3051 ccfreefree(model->name);
3052 ccfreefree(model);
3053}
3054
3055void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3056{
3057 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3058 if (!compiled_data)
3059 return;
3060 if (compiled_data->graph)
3061 ccv_nnc_graph_cancel(compiled_data->graph);
3062 if (compiled_data->apply_gradients.graph)
3063 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3064}