Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2498, column 25
Array access (via field 'vals') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-11-08-093000-160011-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6#include "_ccv_nnc_graph.h"
7
8// MARK - Level-5 API
9
10ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
11{
12 if (!model->io)
13 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
14 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
15 model_io->param_ref = 0;
16 model_io->param_sel = 0;
17 model_io->visit = 0;
18 model_io->model = model;
19 model_io->dependencies = 0;
20 model_io->dependents = 0;
21 model_io->outgoings = 0;
22 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
23 ccv_array_push(model->io, &model_io);
24 if (input_size > 0)
25 {
26 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
27 ccv_array_resize(model_io->incomings, input_size);
28 int i;
29 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
30 for (i = 0; i < input_size; i++)
31 {
32 if (!inputs[i]->outgoings)
33 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
34 ccv_array_push(inputs[i]->outgoings, &model_io);
35 }
36 } else {
37 model_io->incomings = 0;
38 }
39 return model_io;
40}
41
42void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
43{
44 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 44, __extension__ __PRETTY_FUNCTION__);
}))
;
45 if (!model_io->dependencies)
46 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
47 int i, j;
48 for (i = 0; i < dependency_size; i++)
49 {
50 int flag = 0;
51 // Check if it is already exist or not.
52 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
53 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
54 flag = 1;
55 if (flag)
56 continue;
57 ccv_array_push(model_io->dependencies, dependencies + i);
58 ++dependencies[i]->dependents;
59 }
60}
61
62int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
63{
64 return model->output_size;
65}
66
67int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
68{
69 // If the model is compiled, it is default to 1 unless it is not.
70 if (model->compiled_data)
71 return model->is_trainable >= 0 ? model->is_trainable : 1;
72 return model->is_trainable;
73}
74
75ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
76{
77 if (!model->io)
78 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
79 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
80 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
81 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
82 model_io->visit = 0;
83 model_io->model = model;
84 model_io->outputs = 0;
85 model_io->dependencies = 0;
86 model_io->dependents = 0;
87 model_io->incomings = 0;
88 model_io->outgoings = 0;
89 ccv_array_push(model->io, &model_io);
90 return model_io;
91}
92
93void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
94{
95 model->notify_hook.func = func;
96 model->notify_hook.context = context;
97}
98
99void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
100{
101 if (model->notify_hook.func)
102 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
103 if (model->isa->notify)
104 model->isa->notify(model, tag, payload);
105}
106
107static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
108{
109 int i, j;
110 for (i = 0; i < graph_exec_symbol_size; i++)
111 {
112 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
113 // Check whether this tensor symbol has any duplicate.
114 for (j = i + 1; j < graph_exec_symbol_size;)
115 {
116 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
117 // If there is a same tensor symbol, remove it.
118 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
119 {
120 if (j + 1 < graph_exec_symbol_size)
121 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
122 --graph_exec_symbol_size;
123 continue;
124 }
125 ++j;
126 }
127 }
128 return graph_exec_symbol_size;
129}
130
131void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
132{
133 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
134 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
135 int i;
136 if (!model->parameter_indices)
137 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
138 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
139 {
140 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
141 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
142 {
143 // Only add to parameter_indices if it is trainable.
144 if (add_to_array_context->prefix == 't')
145 ccv_array_add_unique_int(model->parameter_indices, i);
146 // Found it, return, don't add it.
147 return;
148 }
149 }
150 // Only add to parameter_indices if it is trainable.
151 if (add_to_array_context->prefix == 't')
152 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
153 // This is a new one, no need to add_unique_int, it is unique.
154 ccv_array_push(add_to_array_context->symbols, &symbol);
155 if (add_to_array_context->trainables)
156 ccv_array_push(add_to_array_context->trainables, &is_trainable);
157 char id[2048];
158 id[0] = add_to_array_context->prefix;
159 id[1] = '-';
160 int total_len = 2;
161 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
162 {
163 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
164 int len;
165 if (name->name && name->name[0] != '\0')
166 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
167 else
168 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
169 total_len += len;
170 if (total_len >= 2047)
171 break;
172 }
173 if (total_len < 2047)
174 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
175 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 175, __extension__ __PRETTY_FUNCTION__)
; }))
;
176 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
177 memcpy(heap_id, id, total_len + 1);
178 ccv_array_push(add_to_array_context->ids, &heap_id);
179 ++add_to_array_context->sequence->it;
180}
181
182static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
183{
184 compiled_data->f = compiled_data->fits + output_size;
185 compiled_data->xpu_alloc.mp_hdr = -1;
186 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
187 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
188 compiled_data->gradient_checkpoints = gradient_checkpoints;
189}
190
191static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
192{
193 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 193, __extension__ __PRETTY_FUNCTION__); }))
;
194 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
195 int i;
196 for (i = 0; i < input_size; i++)
197 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
198 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
199 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
200 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
201 ccv_cnnp_model_sequence_t model_sequence = {
202 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
203 };
204 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
205 .sequence = &model_sequence,
206 .prefix = 't',
207 .symbols = parameters,
208 .ids = parameter_ids,
209 .trainables = parameter_trainables,
210 };
211 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
212 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
213 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
214 .sequence = &model_sequence,
215 .prefix = 'r',
216 .symbols = internals,
217 .ids = internal_ids,
218 .trainables = 0,
219 };
220 ccv_cnnp_model_build_data_t build_data = {
221 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
222 .model_sequence = &model_sequence,
223 .add_to_array = ccv_cnnp_model_add_to_array,
224 .parameters = parameters,
225 .context = {
226 .add_to_parameter = &add_to_parameter_context,
227 .add_to_output = &add_to_output_context,
228 },
229 .gradient_checkpoints = 0,
230 };
231 model->data = &build_data;
232 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
233 for (i = 0; i < model->output_size; i++)
234 {
235 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
236 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
237 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
238 continue;
239 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
240 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
241 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
242 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
243 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
244 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
245 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
246 }
247 model->data = 0;
248 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
249 if (model_sequence.sequences)
250 ccv_array_free(model_sequence.sequences);
251 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
252 int not_trainables = 0;
253 // Assert no parameter is alias.
254 for (i = 0; i < parameters->rnum; i++)
255 {
256 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
257 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
258 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 258, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
259 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
260 not_trainables = 1;
261 }
262 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 262, __extension__ __PRETTY_FUNCTION__)
; }))
;
263 uint64_t* parameter_flags = 0;
264 if (not_trainables)
265 {
266 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
267 for (i = 0; i < parameter_trainables->rnum; i++)
268 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
269 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
270 }
271 ccv_array_free(parameter_trainables);
272 // Assert no internal is alias.
273 for (i = 0; i < internals->rnum; i++)
274 {
275 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
276 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
277 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 277, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
278 }
279 const int output_size = model->output_size;
280 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
281 const int parameters_rnum = parameters->rnum;
282 if (input_size > 0)
283 {
284 ccv_array_resize(parameters, parameters_rnum + input_size);
285 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
286 }
287 ccv_nnc_symbolic_graph_simplify(model->graph,
288 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
289 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
290 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
291 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
292 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
293 model->outputs, output_size,
294 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
295 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
296 // Size it down.
297 parameters->rnum = parameters_rnum;
298 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
299 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
300 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
301 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 301, __extension__ __PRETTY_FUNCTION__)
; }))
;
302 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
303 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
304 compiled_data->loss = loss;
305 if (loss.cmd == CCV_NNC_NOOP)
306 {
307 // If no loss function provided, there is no fits.
308 for (i = 0; i < output_size; i++)
309 {
310 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
311 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
312 if (alias_to.d < 0)
313 compiled_data->f[i] = model->outputs[i];
314 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
315 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
316 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
317 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
318 int j;
319 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
320 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 320, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
321 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
322 }
323 }
324 } else {
325 for (i = 0; i < output_size; i++)
326 {
327 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
328 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
329 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
330 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
331 }
332 }
333 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
334 ccv_nnc_symbolic_graph_simplify(model->graph,
335 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
336 0, 0, // No need to provide binds at this point.
337 compiled_data->f, model->output_size,
338 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
339 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
340 // If inputs are from GPU, stream type is GPU.
341 compiled_data->parameters = parameters;
342 compiled_data->parameter_flags = parameter_flags;
343 compiled_data->internals = internals;
344 compiled_data->ids.parameters = parameter_ids;
345 compiled_data->ids.internals = internal_ids;
346 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
347}
348
349static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
350{
351 ccv_array_t* const stack = (ccv_array_t*)context;
352 ccv_array_push(stack, &symbol.d);
353}
354
355static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
356{
357 const ccv_nnc_tensor_symbol_t src_symbol = {
358 .d = src_index,
359 .graph = src_graph
360 };
361 const ccv_nnc_tensor_symbol_t dest_symbol = {
362 .d = dest_index,
363 .graph = dest_graph
364 };
365 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
366 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
367 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
368 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
369 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
370 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
371}
372
373static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
374{
375 const ccv_nnc_tensor_symbol_t src_symbol = {
376 .d = src_index,
377 .graph = src_graph
378 };
379 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
380 const ccv_nnc_tensor_symbol_t dest_symbol = {
381 .d = dest_index,
382 .graph = dest_graph
383 };
384 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
385 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
386}
387
388static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
389static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
390
391typedef struct {
392 int parallel_count;
393 ccv_nnc_symbolic_graph_t* graph;
394 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
395} ccv_nnc_graph_exec_update_t;
396
397static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
398{
399 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
400 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
401 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
402 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
403 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
404 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
405 const int parallel_count = graph_exec_update->parallel_count;
406 int i;
407 for (i = 1; i < parallel_count; i++)
408 {
409 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
410 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
411 {
412 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
413 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
414 }
415 }
416}
417
418void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
419{
420 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 420, __extension__ __PRETTY_FUNCTION__); }))
;
421 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 421, __extension__ __PRETTY_FUNCTION__)
; }))
;
422 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 422, __extension__ __PRETTY_FUNCTION__); }))
;
423 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
424 init->graph = ccv_nnc_symbolic_graph_new();
425 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
426 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
427 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
428 init->parallel_count = model->parallel_count;
429 init->memory_compression = model->memory_compression;
430 init->memory_reduction = model->memory_reduction;
431 init->gradient_checkpointing = model->gradient_checkpointing;
432 init->compiled_data->stream_type = model->compiled_data->stream_type;
433 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
434 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
435 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
436 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
437 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
438 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
439 int i, j;
440 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
441 for (i = 0; i < compiled_data->parameters->rnum; i++)
442 {
443 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
444 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 444, __extension__ __PRETTY_FUNCTION__)
; }))
;
445 }
446 for (i = 0; i < compiled_data->internals->rnum; i++)
447 {
448 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
449 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 449, __extension__ __PRETTY_FUNCTION__)
; }))
;
450 }
451 // Update inputs.
452 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 452, __extension__ __PRETTY_FUNCTION__)
; }))
;
453 for (i = 0; i < model->input_size; i++)
454 if (model->inputs[i].d >= 0)
455 {
456 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 456, __extension__ __PRETTY_FUNCTION__)
; }))
;
457 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
458 }
459 // Update outputs.
460 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 460, __extension__ __PRETTY_FUNCTION__)
; }))
;
461 for (i = 0; i < model->output_size; i++)
462 {
463 if (model->outputs[i].d >= 0)
464 {
465 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 465, __extension__
__PRETTY_FUNCTION__); }))
;
466 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
467 }
468 if (model->outputs[i].d != model->compiled_data->f[i].d)
469 {
470 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 470, __extension__ __PRETTY_FUNCTION__)
; }))
;
471 if (model->compiled_data->f[i].d >= 0)
472 {
473 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 473, __extension__ __PRETTY_FUNCTION__)
; }))
;
474 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
475 }
476 }
477 }
478 // Go through the graph to set tensor on matching symbols
479 for (i = 0; i < stack->rnum; i++)
480 {
481 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
482 // If exceed range, skip.
483 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
484 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
485 continue;
486 const ccv_nnc_graph_exec_symbol_t src_symbol = {
487 .d = d,
488 .graph = init->graph
489 };
490 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
491 .d = d,
492 .graph = model->graph
493 };
494 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
495 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
496 // If the name doesn't match, skip.
497 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
498 continue;
499 // Now get all the inputs and outputs, if matches, set them.
500 const int* src_inputs;
501 int src_input_size;
502 const int* src_outputs;
503 int src_output_size;
504 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
505 const int* dest_inputs;
506 int dest_input_size;
507 const int* dest_outputs;
508 int dest_output_size;
509 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
510 // We may have unmatched input / output size because this is the minimizer and it has
511 // different saved_aux (for example, when we shrunk with CMD_NOOP).
512 if (src_input_size != dest_input_size)
513 continue;
514 if (src_output_size != dest_output_size)
515 continue;
516 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
517 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
518 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
519 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
520 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
521 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
522 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
523 // a new exec symbol.
524 for (j = 0; j < src_input_size; j++)
525 if (src_inputs[j] >= 0)
526 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
527 for (j = 0; j < src_output_size; j++)
528 if (src_outputs[j] >= 0)
529 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
530 }
531 ccv_array_free(stack);
532 // After this, we get all tensors in the model graph resolved through tensor_auto.
533 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
534 // Verify symbols we get matches.
535 const int parameter_size = compiled_data->parameters->rnum;
536 for (i = 0; i < parameter_size; i++)
537 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 537, __extension__ __PRETTY_FUNCTION__)
; }))
; }
538 const int internal_size = compiled_data->internals->rnum;
539 for (i = 0; i < internal_size; i++)
540 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 540, __extension__ __PRETTY_FUNCTION__)
; }))
; }
541 // Go through compiled data.
542 if (compiled_data->tensor_arena)
543 {
544 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
545 if (flag == 0 && compiled_data->graph_exec_arena)
546 {
547 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
548 // Since we will reinit, if we previously set is_test, we need to set it again.
549 if (compiled_data->is_test)
550 {
551 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
552 ccv_nnc_graph_exec_update_t update = {
553 .parallel_count = parallel_count,
554 .graph = model->graph,
555 .graph_exec_arena = compiled_data->graph_exec_arena,
556 };
557 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
558 }
559 } else
560 // Free-up tensor arena & graph exec arena.
561 _ccv_cnnp_compiled_data_graph_free(compiled_data);
562 }
563 // There are other compiled graphs, for accum and apply gradients.
564 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
565 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
566 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
567 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
568 // That is why we don't update these compiled graphs at all this point.
569 // Free the model, we've already "absorbed" it.
570 ccv_cnnp_model_free(init);
571}
572
573void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
574{
575 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 575, __extension__ __PRETTY_FUNCTION__)
; }))
;
576 if (model->input_size == 0)
577 model->input_size = input_size;
578 if (!model->graph) // The graph is not compiled yet.
579 {
580 model->graph = ccv_nnc_symbolic_graph_new();
581 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
582 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 582, __extension__ __PRETTY_FUNCTION__)
; }))
;
583 int i, flag = 0;
584 for (i = 0; !flag && i < input_size; i++)
585 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
586 // If inputs are from GPU, stream type is GPU.
587 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
588 model->compiled_data->minimize.minimizer = minimizer;
589 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
590 } else {
591 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
592 // And then absorb the "new model" to the old one.
593 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
594 ccv_cnnp_model_absorb(model, init, inputs, input_size);
595 // Reset minimizer.
596 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
597 }
598}
599
600ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
601{
602 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
603 new_model->is_trainable = is_trainable;
604 return new_model;
605}
606
607void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
608{
609 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 609, __extension__ __PRETTY_FUNCTION__); }))
;
610 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 610, __extension__ __PRETTY_FUNCTION__)
; }))
;
611 ccv_nnc_symbolic_graph_t* const graph = model->graph;
612 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
613 int i;
614 for (i = 0; i < output_size; i++)
615 {
616 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 616, __extension__ __PRETTY_FUNCTION__)
; }))
;
617 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
618 }
619}
620
621void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
622{
623 if (workspace_size == model->workspace_size)
624 return;
625 model->workspace_size = workspace_size;
626 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
627 if (compiled_data && compiled_data->graph)
628 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
629}
630
631size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
632{
633 return model->workspace_size;
634}
635
636void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
637{
638 if (parallel == 0)
639 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
640 else
641 model->parallel_count = parallel;
642 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
643 if (compiled_data)
644 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 644, __extension__ __PRETTY_FUNCTION__)
; }))
; }
645}
646
647void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
648{
649 model->max_stream_count = max_stream_count;
650 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
651 if (compiled_data)
652 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 652, __extension__ __PRETTY_FUNCTION__)
; }))
; }
653}
654
655void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
656{
657 model->memory_compression = memory_compression;
658 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
659 if (compiled_data)
660 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 660, __extension__ __PRETTY_FUNCTION__)
; }))
; }
661}
662
663void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
664{
665 model->memory_reduction = memory_reduction;
666 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
667 if (compiled_data)
668 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 668, __extension__ __PRETTY_FUNCTION__)
; }))
; }
669}
670
671void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
672{
673 model->gradient_checkpointing = gradient_checkpointing;
674}
675
676int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
677{
678 return model->gradient_checkpointing;
679}
680
681typedef struct {
682 int parallel_count;
683 ccv_nnc_symbolic_graph_t* graph;
684 ccv_cnnp_compiled_data_t* compiled_data;
685 ccv_nnc_tensor_arena_t* tensor_arena;
686} ccv_nnc_tensor_init_states_t;
687
688static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
689{
690 int i;
691 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
692 for (i = 0; i < compiled_data->parameters->rnum; i++)
693 {
694 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
695 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
696 return 1;
697 }
698 for (i = 0; i < compiled_data->internals->rnum; i++)
699 {
700 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
701 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
702 return 1;
703 }
704 return 0;
705}
706
707static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
708{
709 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
710 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
711 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
712 if (!output_tensor)
713 return;
714 const int d = output_symbol.d;
715 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 715, __extension__ __PRETTY_FUNCTION__)
; }))
;
716 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
717 if (init_v[d >> 5] & (1u << (d & 0x1f)))
718 return;
719 init_v[d >> 5] |= (1u << (d & 0x1f));
720 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
721 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
722 const int parallel_count = tensor_init_states->parallel_count;
723 int i;
724 for (i = 1; i < parallel_count; i++)
725 {
726 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
727 if (copy)
728 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
729 }
730}
731
732// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
733// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
734static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
735{
736 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 736, __extension__ __PRETTY_FUNCTION__); }))
;
737 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 737, __extension__ __PRETTY_FUNCTION__)
; }))
;
738 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
739 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 739, __extension__
__PRETTY_FUNCTION__); }))
;
740 int i;
741 for (i = 0; i < compiled_data->rewindables->rnum; i++)
742 {
743 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
744 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
745 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
746 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
747 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
748 }
749 ccv_array_clear(compiled_data->rewindables);
750 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
751}
752
753static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
754{
755 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
756 .type = CCV_CNNP_REWIND_TENSOR,
757 .tensor = symbol
758 };
759 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
760 ccv_array_push(rewind_symbols, &rewind_symbol);
761}
762
763static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
764{
765 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
766 .type = CCV_CNNP_REWIND_TENSOR,
767 .tensor = symbol
768 };
769 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
770 ccv_array_push(rewind_symbols, &rewind_symbol);
771}
772
773static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
774{
775 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
776 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
777 .graph_exec = symbol
778 };
779 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
780 ccv_array_push(rewind_symbols, &rewind_symbol);
781}
782
783static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
784{
785 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
786 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
787 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
788 int i;
789 for (i = 1; i < parallel_count; i++)
790 {
791 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
792 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
793 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
794 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
795 }
796}
797
798static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
799{
800 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 800, __extension__ __PRETTY_FUNCTION__); }))
;
801 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 801, __extension__ __PRETTY_FUNCTION__); }))
;
802 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
803 int i;
804 for (i = 1; i < parallel_count; i++)
805 {
806 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
807 if (copy_symbol.graph)
808 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
809 }
810 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
811 if (graph_exec_arena)
812 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
813 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
814 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
815 if (gradient_graph_exec_arena)
816 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
817}
818
819static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
820{
821 int this_parameter_flag = 0;
822 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
823 return this_parameter_flag;
824 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
825 int j, k;
826 // For no-op, we can preserve previous saved_aux_size.
827 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
828 {
829 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
830 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
831 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
832 // make sure some model parameters don't update if we don't want them to.
833 int old_saved_aux_size;
834 if (old_minimizer.cmd == CCV_NNC_NOOP)
835 {
836 int input_size;
837 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
838 if (input_size < 2) // This is not legit.
839 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
840 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
841 old_saved_aux_size = input_size - 2;
842 } else
843 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
844 if (old_saved_aux_size != saved_aux_size)
845 {
846 this_parameter_flag = 1;
847 if (saved_aux_size > old_saved_aux_size)
848 {
849 // Allocate new tensor symbols.
850 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
851 for (j = old_saved_aux_size; j < saved_aux_size; j++)
852 {
853 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
854 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
855 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
856 for (k = 1; k < parallel_count; k++)
857 {
858 ccv_nnc_tensor_param_t dev_info = info;
859 if (k != device_id)
860 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
861 else
862 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
863 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
864 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
865 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
866 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
867 }
868 }
869 } else {
870 for (j = saved_aux_size; j < old_saved_aux_size; j++)
871 {
872 for (k = 1; k < parallel_count; k++)
873 {
874 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
875 if (src_copy.d >= 0)
876 {
877 ccv_nnc_tensor_symbol_free(graph, src_copy);
878 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
879 }
880 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
881 if (dest_copy.d >= 0)
882 {
883 ccv_nnc_tensor_symbol_free(graph, dest_copy);
884 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
885 }
886 }
887 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
888 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
889 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
890 }
891 }
892 }
893 }
894 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
895 if (this_parameter_flag)
896 {
897 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
898 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
899 const int* inputs = 0;
900 int input_size = 0;
901 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
902 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 902, __extension__ __PRETTY_FUNCTION__)
; }))
;
903 update_inputs[0].d = inputs[0];
904 update_inputs[0].graph = graph;
905 update_inputs[1].d = inputs[1];
906 update_inputs[1].graph = graph;
907 update_outputs[0] = updated_parameters[parameter_indice];
908 for (j = 0; j < saved_aux_size; j++)
909 {
910 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
911 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
912 }
913 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
914 for (k = 1; k < parallel_count; k++)
915 {
916 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
917 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 917, __extension__ __PRETTY_FUNCTION__); }))
;
918 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
919 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 919, __extension__ __PRETTY_FUNCTION__)
; }))
;
920 update_inputs[0].d = inputs[0];
921 update_inputs[0].graph = graph;
922 update_inputs[1].d = inputs[1];
923 update_inputs[1].graph = graph;
924 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
925 for (j = 0; j < saved_aux_size; j++)
926 {
927 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
928 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
929 }
930 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
931 }
932 }
933 return this_parameter_flag;
934}
935
936typedef struct {
937 int parameter_size;
938 ccv_nnc_cmd_t minimizer;
939 ccv_cnnp_model_io_t parameters[1];
940} ccv_cnnp_set_minimizer_for_parameter_t;
941
942static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
943{
944 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
945 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 945, __extension__ __PRETTY_FUNCTION__); }))
;
946 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
947 // We update all parameters, at this point, we have one minimizer.
948 const int parameter_size = compiled_data->parameters->rnum;
949 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
950 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
951 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 951, __extension__ __PRETTY_FUNCTION__); }))
;
952 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
953 ccv_array_t* const parameters = compiled_data->minimize.parameters;
954 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
955 int i, j, flag = 0;
956 for (i = 0; i < parameters->rnum; i++)
957 {
958 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
959 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
960 {
961 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
962 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 962, __extension__ __PRETTY_FUNCTION__)
; }))
;
963 const int old_rnum = parameter_indices->rnum;
964 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
965 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
966 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 966, __extension__ __PRETTY_FUNCTION__)
; }))
;
967 if (param_ref >= 0)
968 {
969 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 969, __extension__ __PRETTY_FUNCTION__)
; }))
;
970 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
971 parameter_indices->rnum = old_rnum + 1;
972 }
973 }
974 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
975 // We may have duplicated indices, but that is OK, we will set it twice.
976 for (j = 0; j < parameter_indices->rnum; j++)
977 {
978 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
979 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 979, __extension__ __PRETTY_FUNCTION__)
; }))
;
980 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
981 flag = 1;
982 }
983 ccv_array_clear(parameter_indices);
984 }
985 ccv_array_free(parameter_indices);
986 return flag;
987}
988
989static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
990{
991 if (new_saved_aux_size == old_saved_aux_size)
992 return;
993 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 993, __extension__ __PRETTY_FUNCTION__)
; }))
;
994 int i, j;
995 for (i = parameter_size - 1; i >= 0; i--)
996 {
997 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
998 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
999 for (j = old_saved_aux_size - 1; j >= 0; j--)
1000 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1001 }
1002}
1003
1004static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1005{
1006 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1007 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1007, __extension__ __PRETTY_FUNCTION__); }))
;
1008 if (!compiled_data->rewindables)
1009 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1010 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1011 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1012 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1013}
1014
1015static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1016{
1017 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1018 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1018, __extension__ __PRETTY_FUNCTION__
); }))
;
1019 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1019, __extension__ __PRETTY_FUNCTION__
); }))
;
1020 const int evaluate_to_size = compiled_data->evaluate.to_size;
1021 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1021, __extension__ __PRETTY_FUNCTION__
); }))
;
1022 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1023 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1024 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1025 int i, j;
1026 const int output_size = model->output_size;
1027 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1027, __extension__ __PRETTY_FUNCTION__
); }))
;
1028 if (fits)
1029 for (i = 0; i < output_size; i++)
1030 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1031 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1032 const int parameter_size = compiled_data->parameters->rnum;
1033 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1034 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1035 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1036 int parameter_size_maybe_more = parameter_size;
1037 compiled_data->disable_outgrad = disable_outgrad;
1038 int outgrad_size;
1039 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1040 outgrad_size = 0;
1041 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1042 outgrad_size = model->input_size;
1043 else {
1044 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1044, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1045 outgrad_size = 0;
1046 for (i = 0; i < model->input_size; i++)
1047 if (!(disable_outgrad & ((uint64_t)1 << i)))
1048 ++outgrad_size;
1049 }
1050 compiled_data->outgrad_size = outgrad_size;
1051 parameter_size_maybe_more += outgrad_size;
1052 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1053 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1054 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1055 compiled_data->backward.to_size = parameter_size_maybe_more;
1056 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1057 if (compiled_data->parameter_flags)
1058 {
1059 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1060 for (i = 0; i < parameter_size; i++)
1061 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1062 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1063 else
1064 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1065 }
1066 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1067 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1068 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1069 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1070 else { // Compute minimize with gradients including selected inputs.
1071 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1071, __extension__ __PRETTY_FUNCTION__
); }))
;
1072 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1072, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1073 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1073, __extension__ __PRETTY_FUNCTION__
); }))
;
1074 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1075 j = 0;
1076 for (i = 0; i < model->input_size; i++)
1077 if (!(disable_outgrad & ((uint64_t)1 << i)))
1078 outgrads[j++] = model->inputs[i];
1079 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1080 }
1081 if (compiled_data->parameter_flags)
1082 ccfreefree(parameters);
1083 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1084 if (compiled_data->minimize.parameters)
1085 _ccv_cnnp_apply_parameters_with_minimizer(model);
1086 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1087 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1088 for (i = 0; i < output_size; i++)
1089 {
1090 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1091 // Init this to 1 so we can backprop.
1092 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1093 }
1094 compiled_data->backward.to_size = 0;
1095 for (i = 0; i < parameter_size_maybe_more; i++)
1096 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1097 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1098 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1099 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1100 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1101 {
1102 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1103 continue;
1104 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1105 const int* tos;
1106 int to_size;
1107 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1108 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1109 {
1110 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1111 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1112 int flag = 0;
1113 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1114 for (j = i - 1; !flag && j >= 0; j--)
1115 if (j + outgrad_destination_start < destination_count)
1116 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1117 if (!flag) // Only if we cannot find it, we add it.
1118 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1119 }
1120 }
1121 if (parallel_count > 1)
1122 {
1123 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1124 0, 0,
1125 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1126 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1127 0, 0, 0,
1128 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1129 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1130 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1131 for (i = 0; i < evaluate_to_size; i++)
1132 for (j = 1; j < parallel_count; j++)
1133 {
1134 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1135 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1136 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1137 }
1138 const int backward_to_size = compiled_data->backward.to_size;
1139 for (i = 0; i < backward_to_size; i++)
1140 for (j = 1; j < parallel_count; j++)
1141 {
1142 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1143 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1144 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1145 }
1146 }
1147 // Only use memory compression if we are in gradient parameter mode.
1148 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1149 {
1150 if (model->memory_compression)
1151 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1152 if (model->memory_reduction)
1153 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1154 }
1155 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1156 compiled_data->gradient_mode = gradient_mode;
1157}
1158
1159void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1160{
1161 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1161, __extension__ __PRETTY_FUNCTION__
); }))
;
1162 const int parameter_size = compiled_data->parameters->rnum;
1163 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1164 const int internal_size = compiled_data->internals->rnum;
1165 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1166 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1167 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1168 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1169}
1170
1171int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1172{
1173 int i, j;
1174 const int parameter_size = compiled_data->parameters->rnum;
1175 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1176 const int internal_size = compiled_data->internals->rnum;
1177 for (i = 0; i < parameter_size; i++)
1178 {
1179 // parameters has to be allocated all together.
1180 if (compiled_data->tensors.parameters[i])
1181 {
1182 for (j = 1; j < parallel_count; j++)
1183 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1183, __extension__ __PRETTY_FUNCTION__
); }))
; }
1184 continue;
1185 }
1186 return 1;
1187 }
1188 for (i = 0; i < internal_size; i++)
1189 {
1190 if (!compiled_data->tensors.internals[i])
1191 return 1;
1192 for (j = 1; j < parallel_count; j++)
1193 if (!compiled_data->tensors.internals[i + j * internal_size])
1194 return 1;
1195 }
1196 return 0;
1197}
1198
1199void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1200{
1201 int i, j;
1202 const int parameter_size = compiled_data->parameters->rnum;
1203 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1204 const int internal_size = compiled_data->internals->rnum;
1205 for (i = 0; i < parameter_size; i++)
1206 {
1207 // parameters has to be allocated all together.
1208 if (compiled_data->tensors.parameters[i])
1209 {
1210 for (j = 1; j < parallel_count; j++)
1211 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1211, __extension__ __PRETTY_FUNCTION__
); }))
; }
1212 continue;
1213 }
1214 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1215 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1216 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1217 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1218 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1219 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1220 for (j = 1; j < parallel_count; j++)
1221 {
1222 if (j != device_id)
1223 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1224 else
1225 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1226 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1227 }
1228 }
1229 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1230 for (i = 0; i < internal_size; i++)
1231 {
1232 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1233 const int d = retained.d;
1234 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1235 continue;
1236 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1237 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1238 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1239 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1240 if (!compiled_data->tensors.internals[i])
1241 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1242 for (j = 1; j < parallel_count; j++)
1243 {
1244 if (j != device_id)
1245 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1246 else
1247 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1248 if (!compiled_data->tensors.internals[i + j * internal_size])
1249 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1250 }
1251 }
1252 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1253}
1254
1255static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1256{
1257 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1258 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1259}
1260
1261static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1262{
1263 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1263, __extension__ __PRETTY_FUNCTION__
); }))
;
1264 int i, j;
1265 for (i = 0; i < tensor_size; i++)
1266 {
1267 if (!tensors[i])
1268 continue;
1269 const int d = tensor_symbols[i].d;
1270 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1271 continue;
1272 for (j = 1; j < parallel_count; j++)
1273 if (tensors[i + j * tensor_size])
1274 {
1275 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1276 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1277 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1278 }
1279 }
1280}
1281
1282static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1283{
1284 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1284, __extension__ __PRETTY_FUNCTION__
); }))
;
1285 int i, j;
1286 for (i = 0; i < tensor_size; i++)
1287 {
1288 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1289 for (j = 1; j < parallel_count; j++)
1290 {
1291 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1292 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1293 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1294 { // We shouldn't allocate this, free it up.
1295 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1296 tensors[i + j * tensor_size] = 0;
1297 }
1298 }
1299 }
1300}
1301
1302static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1303{
1304 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1304, __extension__ __PRETTY_FUNCTION__
); }))
;
1305 int i, j;
1306 for (i = 0; i < tensor_size; i++)
1307 {
1308 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1309 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1310 continue;
1311 if (graph)
1312 {
1313 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1314 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1315 tensor_symbol = alias_to;
1316 }
1317 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1318 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1319 {
1320 const ccv_nnc_tensor_bind_t retained_bind = {
1321 .symbol = tensor_symbol,
1322 .tensor = tensor
1323 };
1324 ccv_array_push(tensor_binds, &retained_bind);
1325 }
1326 for (j = 1; j < parallel_count; j++)
1327 {
1328 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1329 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1330 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1331 {
1332 const ccv_nnc_tensor_bind_t bind = {
1333 .symbol = copy,
1334 .tensor = tensors[i + j * tensor_size]
1335 };
1336 ccv_array_push(tensor_binds, &bind);
1337 }
1338 }
1339 }
1340}
1341
1342static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1343{
1344 if (compiled_data->graph)
1345 ccv_nnc_graph_free(compiled_data->graph);
1346 compiled_data->graph = 0;
1347 compiled_data->is_test = 0;
1348 if (compiled_data->tensor_arena)
1349 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1350 compiled_data->tensor_arena = 0;
1351 if (compiled_data->graph_exec_arena)
1352 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1353 compiled_data->graph_exec_arena = 0;
1354 if (compiled_data->backward.from_ops)
1355 ccfreefree(compiled_data->backward.from_ops);
1356 compiled_data->backward.from_ops = 0;
1357 if (compiled_data->evaluate.schedule)
1358 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1359 compiled_data->evaluate.schedule = 0;
1360 if (compiled_data->backward.schedule)
1361 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1362 compiled_data->backward.schedule = 0;
1363}
1364
1365static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1366{
1367 if (compiled_data->gradients)
1368 ccfreefree(compiled_data->gradients);
1369 compiled_data->gradients = 0;
1370 if (compiled_data->updated_parameters)
1371 ccfreefree(compiled_data->updated_parameters);
1372 compiled_data->updated_parameters = 0;
1373 compiled_data->update_nodes = 0;
1374 compiled_data->saved_aux = 0;
1375}
1376
1377static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1378{
1379 if (compiled_data->backward.gradients)
1380 ccfreefree(compiled_data->backward.gradients);
1381 compiled_data->backward.gradients = 0;
1382 if (compiled_data->backward.accum)
1383 ccv_nnc_graph_free(compiled_data->backward.accum);
1384 compiled_data->backward.accum = 0;
1385 if (compiled_data->backward.tensor_arena)
1386 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1387 compiled_data->backward.tensor_arena = 0;
1388 if (compiled_data->backward.graph_exec_arena)
1389 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1390 compiled_data->backward.graph_exec_arena = 0;
1391}
1392
1393static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1394{
1395 if (compiled_data->apply_gradients.graph)
1396 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1397 compiled_data->apply_gradients.graph = 0;
1398 if (compiled_data->apply_gradients.tensor_arena)
1399 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1400 compiled_data->apply_gradients.tensor_arena = 0;
1401 if (compiled_data->apply_gradients.graph_exec_arena)
1402 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1403 compiled_data->apply_gradients.graph_exec_arena = 0;
1404}
1405
1406// Compile the graph to run ccv_cnnp_model_fit
1407static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1408{
1409 int i, j;
1410 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1411 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1411, __extension__ __PRETTY_FUNCTION__
); }))
;
1412 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1413 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1414 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1414, __extension__ __PRETTY_FUNCTION__
); }))
;
1415 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1415
, __extension__ __PRETTY_FUNCTION__); }))
;
1416 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1416, __extension__ __PRETTY_FUNCTION__
); }))
;
1417 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1418 {
1419 _ccv_cnnp_model_set_rewindables(model);
1420 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1421 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1422 _ccv_cnnp_model_rewind_graph(model);
1423 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1424 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1425 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1426 }
1427 const int tensors_init = !!compiled_data->tensors_init.v;
1428 if (!tensors_init)
1429 _ccv_cnnp_model_tensors_init(model, compiled_data);
1430 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1431 // Check if it is not fully allocated, if it is not, init_1.
1432 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1433 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1434 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1434, __extension__ __PRETTY_FUNCTION__); }))
;
1435 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1435, __extension__ __PRETTY_FUNCTION__); }))
;
1436 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1436
, __extension__ __PRETTY_FUNCTION__); }))
;
1437 const int input_size_per_p = input_size / parallel_count;
1438 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1439 const int output_size_per_p = output_size / parallel_count;
1440 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1441 const int fit_size_per_p = fit_size / parallel_count;
1442 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1443 const int parameter_size = compiled_data->parameters->rnum;
1444 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1445 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1446 const int internal_size = compiled_data->internals->rnum;
1447 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1448 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1449 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1450 ccv_array_free(tensor_binds);
1451 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1452 if (tensors_init && parallel_count > 1)
1453 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1454 // If tensor is not init'ed, we need to init states first.
1455 if (_ccv_cnnp_any_to_init(compiled_data))
1456 {
1457 ccv_nnc_tensor_init_states_t tensor_init_states = {
1458 .parallel_count = parallel_count,
1459 .graph = model->graph,
1460 .compiled_data = compiled_data,
1461 .tensor_arena = compiled_data->tensor_arena
1462 };
1463 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1464 }
1465 compiled_data->is_test = 0;
1466 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1467 // No need to set because it is default to training mode.
1468 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1469 for (i = 0; i < saved_aux_size * parameter_size; i++)
1470 {
1471 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1472 continue;
1473 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1474 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1475 for (j = 1; j < parallel_count; j++)
1476 {
1477 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1478 if (copy)
1479 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1480 }
1481 }
1482 const int evaluate_to_size = compiled_data->evaluate.to_size;
1483 compiled_data->evaluate.to_op_size = 0;
1484 for (i = 0; i < evaluate_to_size; i++)
1485 {
1486 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1487 if (to.graph)
1488 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1489 }
1490 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1491 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1492}
1493
1494ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1495{
1496 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1497 if (!compiled_data || !compiled_data->graph)
1498 return 0;
1499 return ccv_nnc_graph_default_stream(compiled_data->graph);
1500}
1501
1502uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1503{
1504 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1505 if (!compiled_data || !compiled_data->tensor_arena)
1506 return 0;
1507 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1508}
1509
1510static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1511{
1512 int i, j;
1513 for (i = 0; i < tensor_size; i++)
1514 {
1515 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1516 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1517 continue;
1518 if (graph)
1519 {
1520 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1521 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1522 tensor_symbol = alias_to;
1523 }
1524 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1525 for (j = 1; j < parallel_count; j++)
1526 {
1527 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1528 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1529 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1530 }
1531 }
1532}
1533
1534void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1535{
1536 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1537 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1537, __extension__ __PRETTY_FUNCTION__); }))
;
1538 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1539 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1539, __extension__ __PRETTY_FUNCTION__
); }))
;
1540 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1540, __extension__ __PRETTY_FUNCTION__
); }))
;
1541 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1541
, __extension__ __PRETTY_FUNCTION__); }))
;
1542 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1542, __extension__ __PRETTY_FUNCTION__); }))
;
1543 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1544 {
1545 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1546 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1547 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1548 // Compile the symbolic graph down only when needed.
1549 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1550 } else {
1551 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1551, __extension__ __PRETTY_FUNCTION__); }))
;
1552 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1552, __extension__ __PRETTY_FUNCTION__); }))
;
1553 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1553
, __extension__ __PRETTY_FUNCTION__); }))
;
1554 const int input_size_per_p = input_size / parallel_count;
1555 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1556 const int output_size_per_p = output_size / parallel_count;
1557 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1558 const int fit_size_per_p = fit_size / parallel_count;
1559 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1560 }
1561 if (compiled_data->is_test)
1562 {
1563 compiled_data->is_test = 0;
1564 ccv_nnc_graph_exec_update_t update = {
1565 .parallel_count = parallel_count,
1566 .graph = model->graph,
1567 .graph_exec_arena = compiled_data->graph_exec_arena,
1568 };
1569 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1570 }
1571 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1572}
1573
1574// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1575static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1576{
1577 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1578 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1579 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1580 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1580, __extension__ __PRETTY_FUNCTION__
); }))
;
1581 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1581, __extension__ __PRETTY_FUNCTION__
); }))
;
1582 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1583 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1584 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1585 {
1586 const int evaluate_to_size = compiled_data->evaluate.to_size;
1587 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1588 _ccv_cnnp_model_set_rewindables(model);
1589 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1590 0, 0,
1591 0, 0, 0,
1592 0, 0, 0,
1593 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1594 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1595 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1596 int i, j;
1597 for (i = 0; i < evaluate_to_size; i++)
1598 for (j = 1; j < parallel_count; j++)
1599 {
1600 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1601 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1602 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1603 }
1604 }
1605 const int tensors_init = !!compiled_data->tensors_init.v;
1606 if (!tensors_init)
1607 _ccv_cnnp_model_tensors_init(model, compiled_data);
1608 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1609 // Check if it is not fully allocated, if it is not, init_1.
1610 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1611 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1612 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1612, __extension__ __PRETTY_FUNCTION__); }))
;
1613 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1613, __extension__ __PRETTY_FUNCTION__); }))
;
1614 const int input_size_per_p = input_size / parallel_count;
1615 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1616 const int output_size_per_p = output_size / parallel_count;
1617 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1618 const int parameter_size = compiled_data->parameters->rnum;
1619 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1620 const int internal_size = compiled_data->internals->rnum;
1621 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1622 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1623 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1624 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1625 ccv_array_free(tensor_binds);
1626 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1627 // If tensor is not init'ed, we need to init states first.
1628 if (tensors_init && parallel_count > 1)
1629 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1630 if (_ccv_cnnp_any_to_init(compiled_data))
1631 {
1632 ccv_nnc_tensor_init_states_t tensor_init_states = {
1633 .parallel_count = parallel_count,
1634 .graph = model->graph,
1635 .compiled_data = compiled_data,
1636 .tensor_arena = compiled_data->tensor_arena
1637 };
1638 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1639 }
1640 compiled_data->is_test = 1;
1641 ccv_nnc_graph_exec_update_t update = {
1642 .parallel_count = parallel_count,
1643 .graph = model->graph,
1644 .graph_exec_arena = compiled_data->graph_exec_arena,
1645 };
1646 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1647 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1648 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1649}
1650
1651static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1652{
1653 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1653, __extension__ __PRETTY_FUNCTION__
); }))
;
1654 const int parameter_size = compiled_data->parameters->rnum;
1655 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1656 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1657 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1658 int i, j;
1659 for (i = 0; i < parameter_size; i++)
1660 {
1661 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1662 {
1663 compiled_data->tensors.gradients[i] = 0;
1664 compiled_data->tensors.accum_gradients[i] = 0;
1665 for (j = 1; j < parallel_count; j++)
1666 {
1667 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1668 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1669 }
1670 continue;
1671 }
1672 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1673 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1674 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1675 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1676 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1677 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1678 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1679 for (j = 1; j < parallel_count; j++)
1680 {
1681 if (j != device_id)
1682 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1683 else
1684 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1685 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1686 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1687 }
1688 }
1689}
1690
1691static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1692{
1693 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1694 return 1;
1695 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1696 return 0;
1697 int i;
1698 for (i = 0; i < input_size; i++)
1699 if (!(disable_outgrad & ((uint64_t)1 << i)))
1700 return 0;
1701 return 1;
1702}
1703
1704// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1705// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1706static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1707{
1708 int i, j;
1709 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1710 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1711 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1711, __extension__ __PRETTY_FUNCTION__
); }))
;
1712 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1713 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1714 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1714, __extension__ __PRETTY_FUNCTION__
); }))
;
1715 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1715, __extension__ __PRETTY_FUNCTION__
); }))
;
1716 // There shouldn't be a loss function if we evaluate with multistage jit.
1717 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1717, __extension__ __PRETTY_FUNCTION__
); }))
;
1718 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1719 {
1720 _ccv_cnnp_model_set_rewindables(model);
1721 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1722 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1723 _ccv_cnnp_model_rewind_graph(model);
1724 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1725 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1726 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1727 }
1728 const int tensors_init = !!compiled_data->tensors_init.v;
1729 if (!tensors_init)
1730 _ccv_cnnp_model_tensors_init(model, compiled_data);
1731 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1732 // Check if it is not fully allocated, if it is not, init_1.
1733 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1734 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1735 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1735, __extension__ __PRETTY_FUNCTION__); }))
;
1736 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1736, __extension__ __PRETTY_FUNCTION__); }))
;
1737 const int input_size_per_p = input_size / parallel_count;
1738 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1739 const int output_size_per_p = output_size / parallel_count;
1740 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1741 const int parameter_size = compiled_data->parameters->rnum;
1742 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1743 const int internal_size = compiled_data->internals->rnum;
1744 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1745 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1746 if (!compiled_data->tensors.gradients)
1747 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1748 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1749 if (compiled_data->backward.to_size > 0)
1750 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1751 else
1752 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1753 ccv_array_free(tensor_binds);
1754 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1755 if (tensors_init && parallel_count > 1)
1756 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1757 // If tensor is not init'ed, we need to init states first.
1758 if (_ccv_cnnp_any_to_init(compiled_data))
1759 {
1760 ccv_nnc_tensor_init_states_t tensor_init_states = {
1761 .parallel_count = parallel_count,
1762 .graph = model->graph,
1763 .compiled_data = compiled_data,
1764 .tensor_arena = compiled_data->tensor_arena
1765 };
1766 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1767 }
1768 compiled_data->is_test = is_test;
1769 ccv_nnc_graph_exec_update_t update = {
1770 .parallel_count = parallel_count,
1771 .graph = model->graph,
1772 .graph_exec_arena = compiled_data->graph_exec_arena,
1773 };
1774 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1775 const int evaluate_to_size = compiled_data->evaluate.to_size;
1776 compiled_data->evaluate.to_op_size = 0;
1777 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1778 for (i = 0; i < evaluate_to_size; i++)
1779 {
1780 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1781 if (to_op.graph)
1782 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1783 const int* tos;
1784 int to_size;
1785 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1786 for (j = 0; j < to_size; j++)
1787 {
1788 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1789 .d = tos[j],
1790 .graph = model->graph
1791 });
1792 if (to_op.graph)
1793 ccv_array_add_unique_int(backward_from, to_op.d);
1794 }
1795 }
1796 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1796, __extension__
__PRETTY_FUNCTION__); }))
;
1797 compiled_data->backward.from_op_size = backward_from->rnum;
1798 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1799 for (i = 0; i < backward_from->rnum; i++)
1800 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1801 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1802 .graph = compiled_data->graph,
1803 };
1804 // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops.
1805 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)->
data)) + (size_t)(compiled_data->graph->exec_info)->
rsize * (size_t)(0)))
;
1806 const int exec_info_size = compiled_data->graph->exec_info->rnum;
1807 uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t));
1808 const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data
)) + (size_t)(compiled_data->graph->sources)->rsize *
(size_t)(0)))
;
1809 const int source_size = compiled_data->graph->sources->rnum;
1810 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_
++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_].
d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == compiled_data->graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[
_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void
*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t
)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); --
_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size
); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_
].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(compiled_data->evaluate
.to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1810, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(compiled_data->evaluate.to_ops
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_
->node[_visit_->size].term = ((_incomings_[(compiled_data
->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if (
_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof (
(_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__
({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail
("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c"
, 1810, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
1811 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1812 visited[(idx >> 5)] |= (1u << (idx & 31));
1813 } ccv_nnc_graph_visit_endfor} }
1814 ccv_nnc_graph_visit_free(visit);
1815 const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)->
data)) + (size_t)(compiled_data->graph->destinations)->
rsize * (size_t)(0)))
;
1816 const int destination_size = compiled_data->graph->destinations->rnum;
1817 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } int _exist_size_[2] = { (compiled_data->backward
.from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (compiled_data->backward.from_op_size)
; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(compiled_data->backward.from_ops)[_i_
].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c"
, 1817, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == compiled_data->graph)
; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (compiled_data->backward.from_op_size); _i_++
) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_
].graph == compiled_data->graph) ? 1 : 0), __extension__ (
{ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (compiled_data->backward.from_ops
)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data
->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1817, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1818 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1819 visited[(idx >> 5)] |= (1u << (idx & 31));
1820 } ccv_nnc_graph_visit_endfor} }
1821 ccv_nnc_graph_visit_free(visit);
1822 visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_]
= (sources)[_i_].d; } int _exist_size_[2] = { (source_size),
0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue;
_incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0]
[_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == compiled_data->graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data
->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == compiled_data->graph) ; else
__assert_fail ("(sources)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info
)[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum
== 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[
_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(destination_size)) { _exists_[_p_][_i_] = d; continue; } } else
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == compiled_data->
graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph
== compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_cnnp_model.c", 1822, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
1823 // Find any missing nodes to be added as source. Right now, these are only set nodes.
1824 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1825 if (!(visited[(idx >> 5)] & (1u << (idx & 31))))
1826 {
1827 assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD
) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD"
, "ccv_cnnp_model.c", 1827, __extension__ __PRETTY_FUNCTION__
); }))
;
1828 if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one.
1829 ccv_array_add_unique_int(backward_from, idx);
1830 }
1831 } ccv_nnc_graph_visit_endfor} }
1832 ccv_nnc_graph_visit_free(visit);
1833 ccfreefree(visited);
1834 if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this.
1835 {
1836 compiled_data->backward.from_op_size = backward_from->rnum;
1837 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1838 for (i = 0; i < backward_from->rnum; i++)
1839 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1840 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1841 .graph = compiled_data->graph,
1842 };
1843 }
1844 ccv_array_free(backward_from);
1845 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1846 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1847}
1848
1849void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1850{
1851 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1852 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1852, __extension__ __PRETTY_FUNCTION__); }))
;
1853 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1854 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1854, __extension__ __PRETTY_FUNCTION__
); }))
;
1855 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1855, __extension__ __PRETTY_FUNCTION__
); }))
;
1856 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1856, __extension__ __PRETTY_FUNCTION__); }))
;
1857 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1858 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1859 if (!compiled_data->graph || mode_mismatch)
1860 {
1861 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1862 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1863 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1864 if (params.requires_grad)
1865 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1866 else
1867 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1868 } else {
1869 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1870 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1870, __extension__ __PRETTY_FUNCTION__); }))
;
1871 const int input_size_per_p = input_size / parallel_count;
1872 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1873 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1873, __extension__ __PRETTY_FUNCTION__); }))
;
1874 const int output_size_per_p = output_size / parallel_count;
1875 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1876 }
1877 if (compiled_data->is_test != params.is_test)
1878 {
1879 compiled_data->is_test = params.is_test;
1880 ccv_nnc_graph_exec_update_t update = {
1881 .parallel_count = parallel_count,
1882 .graph = model->graph,
1883 .graph_exec_arena = compiled_data->graph_exec_arena,
1884 };
1885 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1886 }
1887}
1888
1889void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1890{
1891 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1892 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1892, __extension__ __PRETTY_FUNCTION__); }))
;
1893 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1894 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1895 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1896 else {
1897 if (!compiled_data->evaluate.schedule)
1898 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1899 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1900 }
1901}
1902
1903// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1904// Particularly, this method compiles the accumulator graph.
1905static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1906{
1907 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1908 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1908, __extension__ __PRETTY_FUNCTION__); }))
;
1909 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1909, __extension__ __PRETTY_FUNCTION__
); }))
;
1910 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1911 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1912 const int parameter_size = compiled_data->parameters->rnum;
1913 int i, j;
1914 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1915 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1916 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1917 for (i = 0; i < parameter_size; i++)
1918 for (j = 0; j < parallel_count; j++)
1919 if (compiled_data->tensors.gradients[i + j * parameter_size])
1920 {
1921 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1922 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1923 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1924 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1925 ccv_nnc_tensor_symbol_t inputs[2];
1926 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1927 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1928 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1929 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1930 } else {
1931 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1932 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1933 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1934 }
1935 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1936 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1937 {
1938 ccv_nnc_symbolic_graph_free(accum);
1939 // Create empty graph.
1940 compiled_data->backward.accum = ccv_nnc_graph_new();
1941 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1942 return;
1943 }
1944 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1945 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1946 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1947 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1948 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1949 ccv_nnc_symbolic_graph_free(accum);
1950 ccv_array_free(tensor_binds);
1951 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1952}
1953
1954void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1955{
1956 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1957 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1957, __extension__ __PRETTY_FUNCTION__); }))
;
1958 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1958, __extension__ __PRETTY_FUNCTION__
); }))
;
1959 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1960 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1960, __extension__ __PRETTY_FUNCTION__
); }))
;
1961 if (outgrad_size > 0)
1962 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1962, __extension__ __PRETTY_FUNCTION__
); }))
; }
1963 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1963, __extension__ __PRETTY_FUNCTION__); }))
;
1964 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1964, __extension__ __PRETTY_FUNCTION__
); }))
;
1965 const int parameter_size = compiled_data->parameters->rnum;
1966 // If we need to accumulate the gradients now, do jit on accumulator.
1967 if (compiled_data->backward.count > 0)
1968 {
1969 if (!compiled_data->backward.accum)
1970 _ccv_cnnp_model_multistage_jit_1(model);
1971 else if (compiled_data->backward.count == 1) {
1972 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1973 int i;
1974 for (i = 0; i < parameter_size * parallel_count; i++)
1975 {
1976 ccv_nnc_tensor_t* tensor;
1977 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1978 }
1979 if (compiled_data->backward.tensor_arena)
1980 {
1981 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1982 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1983 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1984 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1985 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1986 }
1987 }
1988 }
1989 const int ingrad_size_per_p = model->output_size;
1990 const int outgrad_size_per_p = compiled_data->outgrad_size;
1991 int i, j;
1992 for (i = 0; i < ingrad_size_per_p; i++)
1993 {
1994 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1995 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1996 {
1997 // Set it to 1 if it is not specified.
1998 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1999 if (ingrad_tensor)
2000 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2001 for (j = 1; j < parallel_count; j++)
2002 {
2003 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
2004 if (ingrad_tensor)
2005 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2006 }
2007 } else {
2008 // Make sure the length matches, in case it is an alias.
2009 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 2009, __extension__ __PRETTY_FUNCTION__
); }))
;
2010 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
2011 for (j = 1; j < parallel_count; j++)
2012 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
2013 }
2014 }
2015 if (outgrad_size > 0)
2016 {
2017 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 2017, __extension__ __PRETTY_FUNCTION__
); }))
;
2018 for (i = 0; i < outgrad_size_per_p; i++)
2019 if (outgrads[i])
2020 {
2021 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
2022 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
2023 for (j = 1; j < parallel_count; j++)
2024 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
2025 }
2026 } else {
2027 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2028, __extension__ __PRETTY_FUNCTION__
); }))
2028 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 2028, __extension__ __PRETTY_FUNCTION__
); }))
;
2029 }
2030 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
2031 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
2032 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
2033 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2034 if (!compiled_data->backward.schedule)
2035 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
2036 // Run the backward pass.
2037 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
2038 // If we need to run accumulation round, do that now.
2039 if (compiled_data->backward.count > 0)
2040 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2041 // Update the count, this determines whether we need to accumulate or not.
2042 ++compiled_data->backward.count;
2043}
2044
2045// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2046// Particularly, this method compiles the parameter update graph.
2047static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2048{
2049 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2050 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2050, __extension__ __PRETTY_FUNCTION__
); }))
;
2051 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2052 const int parameter_size = compiled_data->parameters->rnum;
2053 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2054 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2055 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2056 // Bind accumulated gradients.
2057 if (compiled_data->backward.count > 1)
2058 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2059 else
2060 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2061 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2062 int i, j;
2063 for (i = 0; i < compiled_data->backward.to_size; i++)
2064 {
2065 const int* tos;
2066 int to_size;
2067 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2068 for (j = 0; j < to_size; j++)
2069 {
2070 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2071 // gradients graph.
2072 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2073 .d = tos[j],
2074 .graph = model->graph,
2075 });
2076 if (!exec.graph)
2077 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2078 }
2079 }
2080 const int from_size = apply_gradients_from->rnum;
2081 if (from_size == 0)
2082 {
2083 ccv_array_free(apply_gradients_from);
2084 ccv_array_free(tensor_binds);
2085 return;
2086 }
2087 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2088 for (i = 0; i < from_size; i++)
2089 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2090 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2091 .graph = model->graph
2092 };
2093 ccv_array_free(apply_gradients_from);
2094 // It can only ends with updates on the parameters.
2095 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2096 for (i = 0; i < parameter_size; i++)
2097 {
2098 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2099 continue;
2100 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2101 for (j = 1; j < parallel_count; j++)
2102 {
2103 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2104 ccv_array_push(tos, &copy);
2105 }
2106 }
2107 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2108 ccv_array_free(tos);
2109 ccv_array_free(tensor_binds);
2110 ccfreefree(froms);
2111 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2112 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2113 {
2114 // Skip on no tensor.
2115 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2116 continue;
2117 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2118 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2119 for (j = 1; j < parallel_count; j++)
2120 {
2121 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2122 if (copy)
2123 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2124 }
2125 }
2126 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2127}
2128
2129void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2130{
2131 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2132 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2132, __extension__ __PRETTY_FUNCTION__); }))
;
2133 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2133, __extension__ __PRETTY_FUNCTION__
); }))
;
2134 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2135 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2135, __extension__ __PRETTY_FUNCTION__); }))
;
2136 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2136, __extension__ __PRETTY_FUNCTION__
); }))
;
2137 // Skip if there is no backward pass.
2138 if (compiled_data->backward.count <= 0)
2139 return;
2140 // Skip if there is no parameters.
2141 if (compiled_data->parameters->rnum == 0)
2142 {
2143 compiled_data->backward.count = 0;
2144 return;
2145 }
2146 if (!compiled_data->apply_gradients.graph)
2147 _ccv_cnnp_model_multistage_jit_2(model);
2148 else {
2149 const int parameter_size = compiled_data->parameters->rnum;
2150 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2151 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2152 if (compiled_data->backward.count > 1)
2153 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2154 else
2155 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2156 }
2157 if (compiled_data->apply_gradients.graph)
2158 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2159 // Reset backward count to 0.
2160 compiled_data->backward.count = 0;
2161}
2162
2163void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2164{
2165 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2166 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2167 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2167, __extension__ __PRETTY_FUNCTION__
); }))
;
2168 const int tensors_init = !!compiled_data->tensors_init.v;
2169 if (!tensors_init)
2170 _ccv_cnnp_model_tensors_init(model, compiled_data);
2171 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2172 // Check if it is not fully allocated, if it is not, init_1.
2173 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2174 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2175 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2176 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2177 if (param_ref < 0)
2178 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2178
, __extension__ __PRETTY_FUNCTION__); }))
; }
2179 else
2180 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2180, __extension__ __PRETTY_FUNCTION__
); }))
; }
2181 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2182 ccv_array_free(parameter_indices);
2183 const int parameter_size = compiled_data->parameters->rnum;
2184 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2184
, __extension__ __PRETTY_FUNCTION__); }))
;
2185 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2185, __extension__ __PRETTY_FUNCTION__
); }))
;
2186 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2187 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2188 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2188, __extension__
__PRETTY_FUNCTION__); }))
;
2189 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2190 int i;
2191 for (i = 1; i < parallel_count; i++)
2192 {
2193 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2194 if (copy_tensor)
2195 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2196 }
2197 // Mark this symbol as init'ed.
2198 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2199 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2200 init_v[s >> 5] |= (1u << (s & 0x1f));
2201}
2202
2203void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2204{
2205 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2206 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2207 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2207, __extension__ __PRETTY_FUNCTION__
); }))
;
2208 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2208, __extension__ __PRETTY_FUNCTION__
); }))
;
2209 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2210 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2211 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2212 if (param_ref < 0)
2213 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2213
, __extension__ __PRETTY_FUNCTION__); }))
; }
2214 else
2215 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2215, __extension__ __PRETTY_FUNCTION__
); }))
; }
2216 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2217 ccv_array_free(parameter_indices);
2218 const int parameter_size = compiled_data->parameters->rnum;
2219 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2219
, __extension__ __PRETTY_FUNCTION__); }))
;
2220 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2220, __extension__ __PRETTY_FUNCTION__
); }))
;
2221 // We don't need to consider parallel_count, every parameter on each device is identical.
2222 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2223 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2223, __extension__
__PRETTY_FUNCTION__); }))
;
2224 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2225}
2226
2227ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2228{
2229 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2230 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2231 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2231, __extension__ __PRETTY_FUNCTION__
); }))
;
2232 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2232, __extension__ __PRETTY_FUNCTION__
); }))
;
2233 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2234 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2235 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2236 if (param_ref < 0)
2237 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2237
, __extension__ __PRETTY_FUNCTION__); }))
; }
2238 else
2239 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2239, __extension__ __PRETTY_FUNCTION__
); }))
; }
2240 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2241 ccv_array_free(parameter_indices);
2242 const int parameter_size = compiled_data->parameters->rnum;
2243 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2243
, __extension__ __PRETTY_FUNCTION__); }))
;
2244 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2244, __extension__ __PRETTY_FUNCTION__
); }))
;
2245 // We don't need to consider parallel_count, every parameter on each device is identical.
2246 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2247 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2247, __extension__
__PRETTY_FUNCTION__); }))
;
2248 return tensor->info;
2249}
2250
2251const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2252{
2253 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2254 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2255 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2255, __extension__ __PRETTY_FUNCTION__
); }))
;
2256 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2257 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2258 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2259 if (param_ref < 0)
2260 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2260
, __extension__ __PRETTY_FUNCTION__); }))
; }
2261 else
2262 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2262, __extension__ __PRETTY_FUNCTION__
); }))
; }
2263 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2264 ccv_array_free(parameter_indices);
2265 const int parameter_size = compiled_data->parameters->rnum;
2266 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2266
, __extension__ __PRETTY_FUNCTION__); }))
;
2267 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2267, __extension__ __PRETTY_FUNCTION__
); }))
;
2268 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2269}
2270
2271int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2272{
2273 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2273, __extension__ __PRETTY_FUNCTION__
); }))
;
2274 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2275 return compiled_data->parameters->rnum;
2276}
2277
2278ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2279{
2280 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2281 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2281, __extension__ __PRETTY_FUNCTION__); }))
;
2282 const int parameter_size = compiled_data->parameters->rnum;
2283 int i;
2284 for (i = 0; i < parameter_size; i++)
2285 {
2286 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2287 if (first(model, name, context))
2288 return ccv_cnnp_model_parameters(model, -1, i);
2289 }
2290 return 0;
2291}
2292
2293ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2294{
2295 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2296 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2296, __extension__ __PRETTY_FUNCTION__); }))
;
2297 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2298 const int parameter_size = compiled_data->parameters->rnum;
2299 int i;
2300 for (i = 0; i < parameter_size; i++)
2301 {
2302 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2303 if (filter(model, name, context))
2304 {
2305 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2306 ccv_array_push(parameters, &parameter);
2307 }
2308 }
2309 return parameters;
2310
2311}
2312
2313CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2314{
2315 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2316 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2316, __extension__ __PRETTY_FUNCTION__); }))
;
2317 const int tensors_init = !!compiled_data->tensors_init.v;
2318 if (!tensors_init) // If nothing initialized, we return parameter 0.
2319 return ccv_cnnp_model_parameters(model, -1, 0);
2320 const int parameter_size = compiled_data->parameters->rnum;
2321 int i;
2322 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2323 for (i = 0; i < parameter_size; i++)
2324 {
2325 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2326 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2327 return ccv_cnnp_model_parameters(model, -1, i);
2328 }
2329 return 0;
2330}
2331
2332static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2333{
2334 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2335 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2335, __extension__
__PRETTY_FUNCTION__); }))
;
2336 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2337 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2338 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2339 return to_parameter_indices;
2340}
2341
2342static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2343{
2344 // If the model is not compiled yet. Compile them now.
2345 if (!model->graph)
2346 {
2347 model->graph = ccv_nnc_symbolic_graph_new();
2348 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2348, __extension__ __PRETTY_FUNCTION__
); }))
;
2349 const int input_size = from_model->input_size;
2350 ccv_nnc_tensor_param_t input_params[input_size];
2351 int i;
2352 for (i = 0; i < input_size; i++)
2353 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2354 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2355 model->parallel_count = from_model->parallel_count;
2356 model->memory_compression = from_model->memory_compression;
2357 model->memory_reduction = from_model->memory_reduction;
2358 model->gradient_checkpointing = from_model->gradient_checkpointing;
2359 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2360 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2361 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2362 }
2363 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2364 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2364, __extension__ __PRETTY_FUNCTION__
); }))
;
2365 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2366 if (!to_tensors_init)
2367 {
2368 if (only_init_0)
2369 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2370 else
2371 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2372 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2373 // Check if it is not fully allocated, if it is not, init_1.
2374 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2375 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2375, __extension__ __PRETTY_FUNCTION__
); }))
;
2376 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2377 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2378 if (*from_param_ref < 0 && *param_ref >= 0)
2379 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2379, __extension__ __PRETTY_FUNCTION__
); }))
; }
2380 else if (*from_param_ref >= 0)
2381 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2381, __extension__ __PRETTY_FUNCTION__
); }))
; }
2382 if (*param_ref < 0 && *from_param_ref >= 0)
2383 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2383, __extension__ __PRETTY_FUNCTION__); }))
; }
2384 else if (*param_ref >= 0)
2385 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2385, __extension__ __PRETTY_FUNCTION__
); }))
; }
2386}
2387
2388void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2389{
2390 ccv_array_t* to_parameter_indices;
2391 int to_param_ref;
2392 ccv_array_t* from_parameter_indices;
2393 int from_param_ref;
2394 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2395 // Should be exactly the same tensor.
2396 if (to_param_ref < 0 && from_param_ref < 0)
2397 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2397, __extension__ __PRETTY_FUNCTION__
); }))
; }
2398 // To models.
2399 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2400 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2400, __extension__ __PRETTY_FUNCTION__
); }))
;
2401 // From models.
2402 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2403 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2404 const int to_parameter_size = to_compiled_data->parameters->rnum;
2405 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2406 int i, j;
2407 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2408 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2409 for (i = 0; i < rnum; i++)
2410 {
2411 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2412 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2412, __extension__ __PRETTY_FUNCTION__); }))
;
2413 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2413, __extension__ __PRETTY_FUNCTION__
); }))
;
2414 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2415 // If the original is not init'ed. We cannot copy from.
2416 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2417 continue;
2418 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2419 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2419, __extension__ __PRETTY_FUNCTION__); }))
;
2420 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2420, __extension__ __PRETTY_FUNCTION__
); }))
;
2421 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2422 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2422, __extension__
__PRETTY_FUNCTION__); }))
;
2423 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2424 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2424, __extension__
__PRETTY_FUNCTION__); }))
;
2425 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2426 for (j = 1; j < parallel_count; j++)
2427 {
2428 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2429 if (copy_tensor)
2430 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2431 }
2432 // Mark this symbol as init'ed.
2433 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2434 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2435 }
2436 ccv_array_free(to_parameter_indices);
2437 ccv_array_free(from_parameter_indices);
2438}
2439
2440KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
25
Null pointer value stored to field 'vals'
30
Taking true branch
31
Taking false branch
32
Calling 'kh_resize_ccv_cnnp_parameter_id'
33
Taking true branch
34
Assuming the condition is false
35
Taking false branch
36
'?' condition is true
37
Assuming 'new_flags' is null
38
Taking true branch
39
Returning without writing to 'h->vals'
40
Returning from 'kh_resize_ccv_cnnp_parameter_id'
41
Taking true branch
42
Returning without writing to 'h->vals'
2441
2442void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2443{
2444 ccv_array_t* to_parameter_indices;
2445 int to_param_ref;
2446 ccv_array_t* from_parameter_indices;
2447 int from_param_ref;
2448 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2449 // Should be exactly the same tensor.
2450 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2451 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2451, __extension__ __PRETTY_FUNCTION__
); }))
; }
2452 // To models.
2453 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2454 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2454, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2455 // From models.
2456 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2457 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2458 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2458, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2459 const int from_parameter_size = from_compiled_data->parameters->rnum;
2460 const int to_parameter_size = to_compiled_data->parameters->rnum;
2461 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2462 int i, j;
2463 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2464 char* updated_name = 0;
2465 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2466 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2467 for (i = 0; i < rnum; i++)
2468 {
2469 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2470 // Need to figure out how to use the renamer here.
2471 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2472 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2472, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2473 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2473, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2474 if (renamer
18.1
'renamer' is non-null
)
2475 {
2476 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2477 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2478 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2479 updated_name = (char*)ccmallocmalloc(1024);
2480 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2481 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2482 memcpy(updated_name, src_name, src_name_len);
2483 updated_name[src_name_len] = 0;
2484 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2485 continue; // Skip this.
2486 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2487 {
2488 // Nothing changed.
2489 } else {
2490 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2491 {
2492 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
24
Calling 'kh_init_ccv_cnnp_parameter_id'
26
Returning from 'kh_init_ccv_cnnp_parameter_id'
2493 for (j = 0; j < from_parameter_size; j++)
27
Assuming 'j' is < 'from_parameter_size'
28
Loop condition is true. Entering loop body
2494 {
2495 int ret;
2496 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
29
Calling 'kh_put_ccv_cnnp_parameter_id'
43
Returning from 'kh_put_ccv_cnnp_parameter_id'
2497 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2497
, __extension__ __PRETTY_FUNCTION__); }))
;
44
Taking true branch
2498 kh_val(id_map, k)((id_map)->vals[k]) = j;
45
Array access (via field 'vals') results in a null pointer dereference
2499 }
2500 }
2501 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2502 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2503 continue;
2504 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2505 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2505, __extension__ __PRETTY_FUNCTION__); }))
;
2506 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2506, __extension__
__PRETTY_FUNCTION__); }))
;
2507 }
2508 }
2509 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2509, __extension__ __PRETTY_FUNCTION__); }))
;
2510 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2510, __extension__
__PRETTY_FUNCTION__); }))
;
2511 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2512 // If the original is not init'ed. We cannot share from.
2513 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2514 continue;
2515 for (j = 0; j < parallel_count; j++)
2516 {
2517 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2518 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2518, __extension__
__PRETTY_FUNCTION__); }))
;
2519 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2520 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2521 ccv_nnc_tensor_free(dest);
2522 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2523 }
2524 // Mark this symbol as init'ed.
2525 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2526 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2527 }
2528 ccv_array_free(to_parameter_indices);
2529 ccv_array_free(from_parameter_indices);
2530 if (id_map)
2531 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2532 if (updated_name)
2533 ccfreefree(updated_name);
2534 // Mark it as incomplete so we will call init_1.
2535 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2536 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2537 else // Remove the flag.
2538 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2539}
2540
2541ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2542{
2543 if (!compiled_data->stream_map)
2544 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2545 int ret = 0;
2546 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2547 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2547, __extension__ __PRETTY_FUNCTION__); }))
;
2548 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2549 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2550 if (ret != 0)
2551 {
2552 stream = ccv_nnc_stream_context_new(type);
2553 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2554 }
2555 return stream;
2556}
2557
2558void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2559{
2560 ccv_array_t* to_parameter_indices;
2561 int to_param_ref;
2562 ccv_array_t* from_parameter_indices;
2563 int from_param_ref;
2564 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2565 // Should be exactly the same tensor.
2566 if (to_param_ref < 0 && from_param_ref < 0)
2567 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2567, __extension__ __PRETTY_FUNCTION__
); }))
; }
2568 // To models.
2569 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2570 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2570, __extension__ __PRETTY_FUNCTION__
); }))
;
2571 // From models.
2572 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2573 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2574 const int to_parameter_size = to_compiled_data->parameters->rnum;
2575 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2576 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2576, __extension__ __PRETTY_FUNCTION__
); }))
;
2577 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2577, __extension__ __PRETTY_FUNCTION__
); }))
;
2578 int i, j;
2579 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2580 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2581 for (i = 0; i < aux_in_size; i++)
2582 inputs[i + 2] = aux_ins[i];
2583 for (i = 0; i < aux_out_size; i++)
2584 outputs[i + 1] = aux_outs[i];
2585 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2586 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2587 for (i = 0; i < rnum; i++)
2588 {
2589 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2590 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2590, __extension__ __PRETTY_FUNCTION__); }))
;
2591 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2591, __extension__ __PRETTY_FUNCTION__
); }))
;
2592 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2593 // If the original is not init'ed. We cannot copy from.
2594 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2595 continue;
2596 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2597 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2597, __extension__ __PRETTY_FUNCTION__); }))
;
2598 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2598, __extension__ __PRETTY_FUNCTION__
); }))
;
2599 if (parallel_count > 1)
2600 {
2601 ccv_nnc_stream_context_t* streams[parallel_count];
2602 ccv_nnc_stream_signal_t* signal;
2603 if (stream_context)
2604 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2605 for (j = 0; j < parallel_count; j++)
2606 {
2607 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2608 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2609 if (!dest || !src)
2610 {
2611 streams[j] = 0;
2612 continue;
2613 }
2614 // At the moment, can only handle them on the same device.
2615 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2615, __extension__ __PRETTY_FUNCTION__
); }))
;
2616 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2616, __extension__ __PRETTY_FUNCTION__
); }))
;
2617 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2618 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2619 int type = stream_type;
2620 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2621 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2622 // Wait signal to finish.
2623 if (stream_context)
2624 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2625 inputs[0] = outputs[0] = dest;
2626 inputs[1] = src;
2627 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2628 if (stream_context)
2629 {
2630 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2631 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2632 }
2633 streams[j] = stream_0;
2634 }
2635 // If this should be blocking, blocking it.
2636 if (!stream_context)
2637 for (j = 0; j < parallel_count; j++)
2638 if (streams[j])
2639 ccv_nnc_stream_context_wait(streams[j]);
2640 } else {
2641 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2642 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2642, __extension__
__PRETTY_FUNCTION__); }))
;
2643 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2644 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2644, __extension__
__PRETTY_FUNCTION__); }))
;
2645 inputs[0] = outputs[0] = dest;
2646 inputs[1] = src;
2647 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2648 }
2649 // Mark this symbol as init'ed.
2650 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2651 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2652 }
2653 ccv_array_free(to_parameter_indices);
2654 ccv_array_free(from_parameter_indices);
2655}
2656
2657void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2658{
2659 int to_param_ref;
2660 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2661 // To models.
2662 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2663 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2663, __extension__ __PRETTY_FUNCTION__
); }))
;
2664 // Tensor has to be inited already.
2665 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2665, __extension__ __PRETTY_FUNCTION__
); }))
;
2666 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2666, __extension__ __PRETTY_FUNCTION__
); }))
;
2667 // From models.
2668 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2669 const int to_parameter_size = to_compiled_data->parameters->rnum;
2670 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2671 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2671, __extension__ __PRETTY_FUNCTION__
); }))
;
2672 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2672, __extension__ __PRETTY_FUNCTION__
); }))
;
2673 int i, j;
2674 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2675 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2676 for (i = 0; i < aux_in_size; i++)
2677 inputs[i + 1] = aux_ins[i];
2678 for (i = 0; i < aux_out_size; i++)
2679 outputs[i + 1] = aux_outs[i];
2680 for (i = 0; i < rnum; i++)
2681 {
2682 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2683 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2683, __extension__ __PRETTY_FUNCTION__); }))
;
2684 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2684, __extension__ __PRETTY_FUNCTION__
); }))
;
2685 if (parallel_count > 1)
2686 {
2687 ccv_nnc_stream_context_t* streams[parallel_count];
2688 ccv_nnc_stream_signal_t* signal;
2689 if (stream_context)
2690 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2691 for (j = 0; j < parallel_count; j++)
2692 {
2693 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2694 if (!dest)
2695 {
2696 streams[j] = 0;
2697 continue;
2698 }
2699 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2700 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2701 int type = stream_type;
2702 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2703 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2704 // Wait signal to finish.
2705 if (stream_context)
2706 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2707 inputs[0] = outputs[0] = dest;
2708 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2709 if (stream_context)
2710 {
2711 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2712 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2713 }
2714 streams[j] = stream_0;
2715 }
2716 // If this should be blocking, blocking it.
2717 if (!stream_context)
2718 for (j = 0; j < parallel_count; j++)
2719 if (streams[j])
2720 ccv_nnc_stream_context_wait(streams[j]);
2721 } else {
2722 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2723 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2723, __extension__
__PRETTY_FUNCTION__); }))
;
2724 inputs[0] = outputs[0] = dest;
2725 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2726 }
2727 // No need to mark this symbol as init'ed, it is already.
2728 }
2729 ccv_array_free(to_parameter_indices);
2730}
2731
2732void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2733{
2734 int to_param_ref;
2735 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2736 // To models.
2737 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2738 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2738, __extension__ __PRETTY_FUNCTION__
); }))
;
2739 // Tensor has to be inited already.
2740 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2740, __extension__ __PRETTY_FUNCTION__
); }))
;
2741 ccv_nnc_tensor_t** tensor_gradients;
2742 if (to_compiled_data->backward.count > 1)
2743 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2744 else
2745 tensor_gradients = to_compiled_data->tensors.gradients;
2746 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2746, __extension__ __PRETTY_FUNCTION__
); }))
;
2747 // From models.
2748 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2749 const int to_parameter_size = to_compiled_data->parameters->rnum;
2750 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2751 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2751, __extension__ __PRETTY_FUNCTION__
); }))
;
2752 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2752, __extension__ __PRETTY_FUNCTION__
); }))
;
2753 int i, j;
2754 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2755 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2756 for (i = 0; i < aux_in_size; i++)
2757 inputs[i + 1] = aux_ins[i];
2758 for (i = 0; i < aux_out_size; i++)
2759 outputs[i + 1] = aux_outs[i];
2760 for (i = 0; i < rnum; i++)
2761 {
2762 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2763 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2763, __extension__ __PRETTY_FUNCTION__); }))
;
2764 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2764, __extension__ __PRETTY_FUNCTION__
); }))
;
2765 if (parallel_count > 1)
2766 {
2767 ccv_nnc_stream_context_t* streams[parallel_count];
2768 ccv_nnc_stream_signal_t* signal;
2769 if (stream_context)
2770 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2771 for (j = 0; j < parallel_count; j++)
2772 {
2773 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2774 if (!dest)
2775 {
2776 streams[j] = 0;
2777 continue;
2778 }
2779 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2780 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2781 int type = stream_type;
2782 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2783 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2784 // Wait signal to finish.
2785 if (stream_context)
2786 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2787 inputs[0] = outputs[0] = dest;
2788 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2789 if (stream_context)
2790 {
2791 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2792 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2793 }
2794 streams[j] = stream_0;
2795 }
2796 // If this should be blocking, blocking it.
2797 if (!stream_context)
2798 for (j = 0; j < parallel_count; j++)
2799 if (streams[j])
2800 ccv_nnc_stream_context_wait(streams[j]);
2801 } else {
2802 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2803 if (!dest)
2804 continue;
2805 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2805, __extension__
__PRETTY_FUNCTION__); }))
;
2806 inputs[0] = outputs[0] = dest;
2807 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2808 }
2809 // No need to mark this symbol as init'ed, it is already.
2810 }
2811 ccv_array_free(to_parameter_indices);
2812}
2813
2814ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2815{
2816 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2817 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2817, __extension__ __PRETTY_FUNCTION__); }))
;
2818 return compiled_data->minimize.minimizer;
2819}
2820
2821void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2822{
2823 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2824 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2824, __extension__ __PRETTY_FUNCTION__); }))
;
2825 const int parameter_size = compiled_data->parameters->rnum;
2826 if (parameter_size == 0)
2827 return;
2828 if (reset)
2829 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2829, __extension__ __PRETTY_FUNCTION__
); }))
; }
2830 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2831 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2832 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2833 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2834 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2835 // We update all parameters, at this point, we have one minimizer.
2836 if (set_parameters == 0 || set_parameter_size == 0)
2837 compiled_data->minimize.minimizer = minimizer;
2838 int i;
2839 if (set_parameters && set_parameter_size)
2840 {
2841 // I need to save what's the minimizer along with this.
2842 if (!compiled_data->minimize.parameters)
2843 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2844 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2845 set_minimizer_for_parameter->minimizer = minimizer;
2846 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2847 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2848 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2849 }
2850 // If reset is true, clear the parameters array.
2851 if (reset && compiled_data->minimize.parameters)
2852 {
2853 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2854 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2855 ccv_array_clear(compiled_data->minimize.parameters);
2856 }
2857 if (!compiled_data->update_nodes)
2858 return;
2859 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2860 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2860, __extension__ __PRETTY_FUNCTION__); }))
;
2861 if (saved_aux_size > old_max_saved_aux_size)
2862 {
2863 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2863, __extension__ __PRETTY_FUNCTION__
); }))
;
2864 // Reallocate first, move them around later.
2865 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2866 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2867 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2868 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2869 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2870 }
2871 int flag = 0;
2872 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2873 if (set_parameters && set_parameter_size)
2874 {
2875 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2876 for (i = 0; i < set_parameter_size; i++)
2877 {
2878 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2879 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2879, __extension__ __PRETTY_FUNCTION__
); }))
;
2880 const int old_rnum = parameter_indices->rnum;
2881 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2882 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2883 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2883, __extension__ __PRETTY_FUNCTION__
); }))
;
2884 if (param_ref >= 0)
2885 {
2886 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2886, __extension__ __PRETTY_FUNCTION__
); }))
;
2887 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2888 parameter_indices->rnum = old_rnum + 1;
2889 }
2890 }
2891 // We may have duplicated indices, but that is OK, we will set it twice.
2892 for (i = 0; i < parameter_indices->rnum; i++)
2893 {
2894 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2895 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2896 flag = 1;
2897 }
2898 ccv_array_free(parameter_indices);
2899 } else {
2900 for (i = 0; i < parameter_size; i++)
2901 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2902 flag = 1;
2903 if (compiled_data->minimize.parameters)
2904 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2905 flag = 1;
2906 }
2907 if (flag)
2908 {
2909 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2910 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2911 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2912 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2913 }
2914}
2915
2916void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2917{
2918 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2919 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2919, __extension__ __PRETTY_FUNCTION__); }))
;
2920 compiled_data->compile_params = compile_params;
2921}
2922
2923void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2924{
2925 if (model->graph && out_size > 0)
2926 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2927 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2928 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2929 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2930 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2931 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2932 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2933}
2934
2935void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2936{
2937 if (model->graph)
2938 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2939}
2940
2941static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2942{
2943 int i;
2944 const int parameter_size = compiled_data->parameters->rnum;
2945 ccv_array_free(compiled_data->parameters);
2946 if (compiled_data->parameter_flags)
2947 ccfreefree(compiled_data->parameter_flags);
2948 const int internal_size = compiled_data->internals->rnum;
2949 ccv_array_free(compiled_data->internals);
2950 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2950, __extension__ __PRETTY_FUNCTION__
); }))
;
2951 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2951, __extension__ __PRETTY_FUNCTION__
); }))
;
2952 for (i = 0; i < parameter_size; i++)
2953 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2954 ccv_array_free(compiled_data->ids.parameters);
2955 for (i = 0; i < internal_size; i++)
2956 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2957 ccv_array_free(compiled_data->ids.internals);
2958 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2959 if (compiled_data->tensors.parameters)
2960 {
2961 for (i = 0; i < parameter_size * parallel_count; i++)
2962 // If it is not marked as not belonging, we can free it.
2963 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2964 if (compiled_data->tensors.parameters[i])
2965 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2966 for (i = 0; i < internal_size * parallel_count; i++)
2967 if (compiled_data->tensors.internals[i])
2968 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2969 ccfreefree(compiled_data->tensors.parameters);
2970 }
2971 if (compiled_data->tensors.gradients)
2972 {
2973 for (i = 0; i < parameter_size * parallel_count; i++)
2974 {
2975 if (compiled_data->tensors.gradients[i])
2976 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2977 if (compiled_data->tensors.accum_gradients[i])
2978 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2979 }
2980 ccfreefree(compiled_data->tensors.gradients);
2981 }
2982 if (compiled_data->minimize.parameters)
2983 {
2984 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2985 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2986 ccv_array_free(compiled_data->minimize.parameters);
2987 }
2988 if (compiled_data->rewindables)
2989 ccv_array_free(compiled_data->rewindables);
2990 if (compiled_data->tensors_init.v)
2991 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
2992 if (compiled_data->evaluate.tos)
2993 ccfreefree(compiled_data->evaluate.tos);
2994 compiled_data->evaluate.tos = 0;
2995 if (compiled_data->stream_map)
2996 {
2997 khiter_t k;
2998 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2999 {
3000 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
3001 continue;
3002 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
3003 ccv_nnc_stream_context_free(stream);
3004 }
3005 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
3006 }
3007 _ccv_cnnp_compiled_data_graph_free(compiled_data);
3008 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
3009 _ccv_cnnp_compiled_data_backward_free(compiled_data);
3010 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
3011 if (compiled_data->gradient_checkpoints)
3012 {
3013 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
3014 {
3015 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
3016 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 3016, __extension__ __PRETTY_FUNCTION__
); }))
;
3017 ccfreefree(checkpoint->inputs);
3018 ccv_array_free(checkpoint->tensor_symbols);
3019 }
3020 ccv_array_free(compiled_data->gradient_checkpoints);
3021 }
3022 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
3023 ccfreefree(compiled_data);
3024}
3025
3026void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
3027{
3028 if (model->isa->deinit)
3029 model->isa->deinit(model);
3030 if (model->io)
3031 {
3032 int i;
3033 for (i = 0; i < model->io->rnum; i++)
3034 {
3035 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
3036 if (model_io->outgoings)
3037 ccv_array_free(model_io->outgoings);
3038 if (model_io->incomings)
3039 ccv_array_free(model_io->incomings);
3040 if (model_io->dependencies)
3041 ccv_array_free(model_io->dependencies);
3042 ccfreefree(model_io);
3043 }
3044 ccv_array_free(model->io);
3045 }
3046 if (model->parameter_indices)
3047 ccv_array_free(model->parameter_indices);
3048 if (model->inputs)
3049 ccfreefree(model->inputs);
3050 if (model->graph)
3051 ccv_nnc_symbolic_graph_free(model->graph);
3052 if (model->compiled_data)
3053 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3054 if (model->name)
3055 ccfreefree(model->name);
3056 ccfreefree(model);
3057}
3058
3059void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3060{
3061 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3062 if (!compiled_data)
3063 return;
3064 if (compiled_data->graph)
3065 ccv_nnc_graph_cancel(compiled_data->graph);
3066 if (compiled_data->apply_gradients.graph)
3067 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3068}