Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2399, column 1
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-10-21-122132-109054-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6
7// MARK - Level-5 API
8
9ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
10{
11 if (!model->io)
12 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
13 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
14 model_io->param_ref = 0;
15 model_io->param_sel = 0;
16 model_io->visit = 0;
17 model_io->model = model;
18 model_io->dependencies = 0;
19 model_io->dependents = 0;
20 model_io->outgoings = 0;
21 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
22 ccv_array_push(model->io, &model_io);
23 if (input_size > 0)
24 {
25 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
26 ccv_array_resize(model_io->incomings, input_size);
27 int i;
28 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
29 for (i = 0; i < input_size; i++)
30 {
31 if (!inputs[i]->outgoings)
32 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
33 ccv_array_push(inputs[i]->outgoings, &model_io);
34 }
35 } else {
36 model_io->incomings = 0;
37 }
38 return model_io;
39}
40
41void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
42{
43 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 43, __extension__ __PRETTY_FUNCTION__);
}))
;
44 if (!model_io->dependencies)
45 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
46 int i, j;
47 for (i = 0; i < dependency_size; i++)
48 {
49 int flag = 0;
50 // Check if it is already exist or not.
51 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
52 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
53 flag = 1;
54 if (flag)
55 continue;
56 ccv_array_push(model_io->dependencies, dependencies + i);
57 ++dependencies[i]->dependents;
58 }
59}
60
61int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
62{
63 return model->output_size;
64}
65
66int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
67{
68 // If the model is compiled, it is default to 1 unless it is not.
69 if (model->compiled_data)
70 return model->is_trainable >= 0 ? model->is_trainable : 1;
71 return model->is_trainable;
72}
73
74ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
75{
76 if (!model->io)
77 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
78 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
79 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
80 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
81 model_io->visit = 0;
82 model_io->model = model;
83 model_io->outputs = 0;
84 model_io->dependencies = 0;
85 model_io->dependents = 0;
86 model_io->incomings = 0;
87 model_io->outgoings = 0;
88 ccv_array_push(model->io, &model_io);
89 return model_io;
90}
91
92void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
93{
94 model->notify_hook.func = func;
95 model->notify_hook.context = context;
96}
97
98void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
99{
100 if (model->notify_hook.func)
101 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
102 if (model->isa->notify)
103 model->isa->notify(model, tag, payload);
104}
105
106static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
107{
108 int i, j;
109 for (i = 0; i < graph_exec_symbol_size; i++)
110 {
111 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
112 // Check whether this tensor symbol has any duplicate.
113 for (j = i + 1; j < graph_exec_symbol_size;)
114 {
115 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
116 // If there is a same tensor symbol, remove it.
117 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
118 {
119 if (j + 1 < graph_exec_symbol_size)
120 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
121 --graph_exec_symbol_size;
122 continue;
123 }
124 ++j;
125 }
126 }
127 return graph_exec_symbol_size;
128}
129
130void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
131{
132 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
133 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
134 int i;
135 if (!model->parameter_indices)
136 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
137 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
138 {
139 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
140 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
141 {
142 // Only add to parameter_indices if it is trainable.
143 if (add_to_array_context->prefix == 't')
144 ccv_array_add_unique_int(model->parameter_indices, i);
145 // Found it, return, don't add it.
146 return;
147 }
148 }
149 // Only add to parameter_indices if it is trainable.
150 if (add_to_array_context->prefix == 't')
151 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
152 // This is a new one, no need to add_unique_int, it is unique.
153 ccv_array_push(add_to_array_context->symbols, &symbol);
154 if (add_to_array_context->trainables)
155 ccv_array_push(add_to_array_context->trainables, &is_trainable);
156 char id[2048];
157 id[0] = add_to_array_context->prefix;
158 id[1] = '-';
159 int total_len = 2;
160 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
161 {
162 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
163 int len;
164 if (name->name && name->name[0] != '\0')
165 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
166 else
167 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
168 total_len += len;
169 if (total_len >= 2047)
170 break;
171 }
172 if (total_len < 2047)
173 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
174 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 174, __extension__ __PRETTY_FUNCTION__)
; }))
;
175 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
176 memcpy(heap_id, id, total_len + 1);
177 ccv_array_push(add_to_array_context->ids, &heap_id);
178 ++add_to_array_context->sequence->it;
179}
180
181static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
182{
183 compiled_data->f = compiled_data->fits + output_size;
184 compiled_data->xpu_alloc.mp_hdr = -1;
185 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
186 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
187 compiled_data->gradient_checkpoints = gradient_checkpoints;
188}
189
190static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
191{
192 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 192, __extension__ __PRETTY_FUNCTION__); }))
;
193 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
194 int i;
195 for (i = 0; i < input_size; i++)
196 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
197 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
198 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
199 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
200 ccv_cnnp_model_sequence_t model_sequence = {
201 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
202 };
203 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
204 .sequence = &model_sequence,
205 .prefix = 't',
206 .symbols = parameters,
207 .ids = parameter_ids,
208 .trainables = parameter_trainables,
209 };
210 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
211 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
212 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
213 .sequence = &model_sequence,
214 .prefix = 'r',
215 .symbols = internals,
216 .ids = internal_ids,
217 .trainables = 0,
218 };
219 ccv_cnnp_model_build_data_t build_data = {
220 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
221 .model_sequence = &model_sequence,
222 .add_to_array = ccv_cnnp_model_add_to_array,
223 .parameters = parameters,
224 .context = {
225 .add_to_parameter = &add_to_parameter_context,
226 .add_to_output = &add_to_output_context,
227 },
228 .gradient_checkpoints = 0,
229 };
230 model->data = &build_data;
231 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
232 for (i = 0; i < model->output_size; i++)
233 {
234 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
235 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
236 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
237 continue;
238 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
239 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
240 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
241 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
242 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
243 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
244 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
245 }
246 model->data = 0;
247 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
248 if (model_sequence.sequences)
249 ccv_array_free(model_sequence.sequences);
250 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
251 int not_trainables = 0;
252 // Assert no parameter is alias.
253 for (i = 0; i < parameters->rnum; i++)
254 {
255 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
256 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
257 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 257, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
258 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
259 not_trainables = 1;
260 }
261 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 261, __extension__ __PRETTY_FUNCTION__)
; }))
;
262 uint64_t* parameter_flags = 0;
263 if (not_trainables)
264 {
265 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
266 for (i = 0; i < parameter_trainables->rnum; i++)
267 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
268 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
269 }
270 ccv_array_free(parameter_trainables);
271 // Assert no internal is alias.
272 for (i = 0; i < internals->rnum; i++)
273 {
274 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
275 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
276 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 276, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
277 }
278 const int output_size = model->output_size;
279 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
280 const int parameters_rnum = parameters->rnum;
281 if (input_size > 0)
282 {
283 ccv_array_resize(parameters, parameters_rnum + input_size);
284 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
285 }
286 ccv_nnc_symbolic_graph_simplify(model->graph,
287 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
288 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
289 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
290 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
291 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
292 model->outputs, output_size,
293 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
294 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
295 // Size it down.
296 parameters->rnum = parameters_rnum;
297 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
298 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
299 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
300 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 300, __extension__ __PRETTY_FUNCTION__)
; }))
;
301 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
302 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
303 compiled_data->loss = loss;
304 if (loss.cmd == CCV_NNC_NOOP)
305 {
306 // If no loss function provided, there is no fits.
307 for (i = 0; i < output_size; i++)
308 {
309 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
310 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
311 if (alias_to.d < 0)
312 compiled_data->f[i] = model->outputs[i];
313 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
314 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
315 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
316 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
317 int j;
318 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
319 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 319, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
320 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
321 }
322 }
323 } else {
324 for (i = 0; i < output_size; i++)
325 {
326 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
327 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
328 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
329 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
330 }
331 }
332 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
333 ccv_nnc_symbolic_graph_simplify(model->graph,
334 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
335 0, 0, // No need to provide binds at this point.
336 compiled_data->f, model->output_size,
337 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
338 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
339 // If inputs are from GPU, stream type is GPU.
340 compiled_data->parameters = parameters;
341 compiled_data->parameter_flags = parameter_flags;
342 compiled_data->internals = internals;
343 compiled_data->ids.parameters = parameter_ids;
344 compiled_data->ids.internals = internal_ids;
345 ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph);
346}
347
348static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
349{
350 ccv_array_t* const stack = (ccv_array_t*)context;
351 ccv_array_push(stack, &symbol.d);
352}
353
354static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
355{
356 const ccv_nnc_tensor_symbol_t src_symbol = {
357 .d = src_index,
358 .graph = src_graph
359 };
360 const ccv_nnc_tensor_symbol_t dest_symbol = {
361 .d = dest_index,
362 .graph = dest_graph
363 };
364 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
365 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
366 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
367 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
368 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
369 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
370}
371
372static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
373{
374 const ccv_nnc_tensor_symbol_t src_symbol = {
375 .d = src_index,
376 .graph = src_graph
377 };
378 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
379 const ccv_nnc_tensor_symbol_t dest_symbol = {
380 .d = dest_index,
381 .graph = dest_graph
382 };
383 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
384 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
385}
386
387static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
388static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
389
390typedef struct {
391 int parallel_count;
392 ccv_nnc_symbolic_graph_t* graph;
393 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
394} ccv_nnc_graph_exec_update_t;
395
396static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
397{
398 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
399 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
400 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
401 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
402 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
403 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
404 const int parallel_count = graph_exec_update->parallel_count;
405 int i;
406 for (i = 1; i < parallel_count; i++)
407 {
408 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
409 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
410 {
411 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
412 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
413 }
414 }
415}
416
417void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
418{
419 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 419, __extension__ __PRETTY_FUNCTION__); }))
;
420 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 420, __extension__ __PRETTY_FUNCTION__)
; }))
;
421 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 421, __extension__ __PRETTY_FUNCTION__); }))
;
422 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
423 init->graph = ccv_nnc_symbolic_graph_new();
424 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
425 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
426 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
427 init->parallel_count = model->parallel_count;
428 init->memory_compression = model->memory_compression;
429 init->memory_reduction = model->memory_reduction;
430 init->gradient_checkpointing = model->gradient_checkpointing;
431 init->compiled_data->stream_type = model->compiled_data->stream_type;
432 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
433 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
434 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
435 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
436 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
437 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
438 int i, j;
439 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
440 for (i = 0; i < compiled_data->parameters->rnum; i++)
441 {
442 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
443 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 443, __extension__ __PRETTY_FUNCTION__)
; }))
;
444 }
445 for (i = 0; i < compiled_data->internals->rnum; i++)
446 {
447 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
448 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 448, __extension__ __PRETTY_FUNCTION__)
; }))
;
449 }
450 // Update inputs.
451 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 451, __extension__ __PRETTY_FUNCTION__)
; }))
;
452 for (i = 0; i < model->input_size; i++)
453 if (model->inputs[i].d >= 0)
454 {
455 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 455, __extension__ __PRETTY_FUNCTION__)
; }))
;
456 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
457 }
458 // Update outputs.
459 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 459, __extension__ __PRETTY_FUNCTION__)
; }))
;
460 for (i = 0; i < model->output_size; i++)
461 {
462 if (model->outputs[i].d >= 0)
463 {
464 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 464, __extension__
__PRETTY_FUNCTION__); }))
;
465 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
466 }
467 if (model->outputs[i].d != model->compiled_data->f[i].d)
468 {
469 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 469, __extension__ __PRETTY_FUNCTION__)
; }))
;
470 if (model->compiled_data->f[i].d >= 0)
471 {
472 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 472, __extension__ __PRETTY_FUNCTION__)
; }))
;
473 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
474 }
475 }
476 }
477 // Go through the graph to set tensor on matching symbols
478 for (i = 0; i < stack->rnum; i++)
479 {
480 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
481 // If exceed range, skip.
482 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
483 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
484 continue;
485 const ccv_nnc_graph_exec_symbol_t src_symbol = {
486 .d = d,
487 .graph = init->graph
488 };
489 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
490 .d = d,
491 .graph = model->graph
492 };
493 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
494 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
495 // If the name doesn't match, skip.
496 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
497 continue;
498 // Now get all the inputs and outputs, if matches, set them.
499 const int* src_inputs;
500 int src_input_size;
501 const int* src_outputs;
502 int src_output_size;
503 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
504 const int* dest_inputs;
505 int dest_input_size;
506 const int* dest_outputs;
507 int dest_output_size;
508 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
509 // We may have unmatched input / output size because this is the minimizer and it has
510 // different saved_aux (for example, when we shrunk with CMD_NOOP).
511 if (src_input_size != dest_input_size)
512 continue;
513 if (src_output_size != dest_output_size)
514 continue;
515 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
516 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
517 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
518 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
519 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
520 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
521 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
522 // a new exec symbol.
523 for (j = 0; j < src_input_size; j++)
524 if (src_inputs[j] >= 0)
525 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
526 for (j = 0; j < src_output_size; j++)
527 if (src_outputs[j] >= 0)
528 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
529 }
530 ccv_array_free(stack);
531 // After this, we get all tensors in the model graph resolved through tensor_auto.
532 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
533 // Verify symbols we get matches.
534 const int parameter_size = compiled_data->parameters->rnum;
535 for (i = 0; i < parameter_size; i++)
536 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 536, __extension__ __PRETTY_FUNCTION__)
; }))
; }
537 const int internal_size = compiled_data->internals->rnum;
538 for (i = 0; i < internal_size; i++)
539 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 539, __extension__ __PRETTY_FUNCTION__)
; }))
; }
540 // Go through compiled data.
541 if (compiled_data->tensor_arena)
542 {
543 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
544 if (flag == 0 && compiled_data->graph_exec_arena)
545 {
546 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
547 // Since we will reinit, if we previously set is_test, we need to set it again.
548 if (compiled_data->is_test)
549 {
550 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
551 ccv_nnc_graph_exec_update_t update = {
552 .parallel_count = parallel_count,
553 .graph = model->graph,
554 .graph_exec_arena = compiled_data->graph_exec_arena,
555 };
556 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
557 }
558 } else
559 // Free-up tensor arena & graph exec arena.
560 _ccv_cnnp_compiled_data_graph_free(compiled_data);
561 }
562 // There are other compiled graphs, for accum and apply gradients.
563 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
564 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
565 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
566 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
567 // That is why we don't update these compiled graphs at all this point.
568 // Free the model, we've already "absorbed" it.
569 ccv_cnnp_model_free(init);
570}
571
572void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
573{
574 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 574, __extension__ __PRETTY_FUNCTION__)
; }))
;
575 if (model->input_size == 0)
576 model->input_size = input_size;
577 if (!model->graph) // The graph is not compiled yet.
578 {
579 model->graph = ccv_nnc_symbolic_graph_new();
580 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
581 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 581, __extension__ __PRETTY_FUNCTION__)
; }))
;
582 int i, flag = 0;
583 for (i = 0; !flag && i < input_size; i++)
584 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
585 // If inputs are from GPU, stream type is GPU.
586 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
587 model->compiled_data->minimize.minimizer = minimizer;
588 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
589 } else {
590 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
591 // And then absorb the "new model" to the old one.
592 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
593 ccv_cnnp_model_absorb(model, init, inputs, input_size);
594 // Reset minimizer.
595 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
596 }
597}
598
599ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
600{
601 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
602 new_model->is_trainable = is_trainable;
603 return new_model;
604}
605
606void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
607{
608 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 608, __extension__ __PRETTY_FUNCTION__); }))
;
609 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 609, __extension__ __PRETTY_FUNCTION__)
; }))
;
610 ccv_nnc_symbolic_graph_t* const graph = model->graph;
611 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
612 int i;
613 for (i = 0; i < output_size; i++)
614 {
615 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 615, __extension__ __PRETTY_FUNCTION__)
; }))
;
616 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
617 }
618}
619
620void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
621{
622 if (workspace_size == model->workspace_size)
623 return;
624 model->workspace_size = workspace_size;
625 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
626 if (compiled_data && compiled_data->graph)
627 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
628}
629
630size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
631{
632 return model->workspace_size;
633}
634
635void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
636{
637 if (parallel == 0)
638 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
639 else
640 model->parallel_count = parallel;
641 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
642 if (compiled_data)
643 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 643, __extension__ __PRETTY_FUNCTION__)
; }))
; }
644}
645
646void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
647{
648 model->max_stream_count = max_stream_count;
649 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
650 if (compiled_data)
651 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 651, __extension__ __PRETTY_FUNCTION__)
; }))
; }
652}
653
654void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
655{
656 model->memory_compression = memory_compression;
657 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
658 if (compiled_data)
659 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 659, __extension__ __PRETTY_FUNCTION__)
; }))
; }
660}
661
662void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
663{
664 model->memory_reduction = memory_reduction;
665 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
666 if (compiled_data)
667 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 667, __extension__ __PRETTY_FUNCTION__)
; }))
; }
668}
669
670void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
671{
672 model->gradient_checkpointing = gradient_checkpointing;
673}
674
675int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
676{
677 return model->gradient_checkpointing;
678}
679
680typedef struct {
681 int parallel_count;
682 ccv_nnc_symbolic_graph_t* graph;
683 ccv_cnnp_compiled_data_t* compiled_data;
684 ccv_nnc_tensor_arena_t* tensor_arena;
685} ccv_nnc_tensor_init_states_t;
686
687static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
688{
689 int i;
690 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
691 for (i = 0; i < compiled_data->parameters->rnum; i++)
692 {
693 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
694 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
695 return 1;
696 }
697 for (i = 0; i < compiled_data->internals->rnum; i++)
698 {
699 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
700 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
701 return 1;
702 }
703 return 0;
704}
705
706static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
707{
708 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
709 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
710 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
711 if (!output_tensor)
712 return;
713 const int d = output_symbol.d;
714 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 714, __extension__ __PRETTY_FUNCTION__)
; }))
;
715 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
716 if (init_v[d >> 5] & (1u << (d & 0x1f)))
717 return;
718 init_v[d >> 5] |= (1u << (d & 0x1f));
719 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
720 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
721 const int parallel_count = tensor_init_states->parallel_count;
722 int i;
723 for (i = 1; i < parallel_count; i++)
724 {
725 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
726 if (copy)
727 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
728 }
729}
730
731// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
732// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
733static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
734{
735 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 735, __extension__ __PRETTY_FUNCTION__); }))
;
736 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 736, __extension__ __PRETTY_FUNCTION__)
; }))
;
737 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
738 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 738, __extension__
__PRETTY_FUNCTION__); }))
;
739 int i;
740 for (i = 0; i < compiled_data->rewindables->rnum; i++)
741 {
742 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
743 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
744 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
745 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
746 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
747 }
748 ccv_array_clear(compiled_data->rewindables);
749 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
750}
751
752static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
753{
754 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
755 .type = CCV_CNNP_REWIND_TENSOR,
756 .tensor = symbol
757 };
758 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
759 ccv_array_push(rewind_symbols, &rewind_symbol);
760}
761
762static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
763{
764 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
765 .type = CCV_CNNP_REWIND_TENSOR,
766 .tensor = symbol
767 };
768 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
769 ccv_array_push(rewind_symbols, &rewind_symbol);
770}
771
772static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
773{
774 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
775 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
776 .graph_exec = symbol
777 };
778 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
779 ccv_array_push(rewind_symbols, &rewind_symbol);
780}
781
782static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
783{
784 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
785 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
786 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
787 int i;
788 for (i = 1; i < parallel_count; i++)
789 {
790 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
791 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
792 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
793 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
794 }
795}
796
797static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
798{
799 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 799, __extension__ __PRETTY_FUNCTION__); }))
;
800 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 800, __extension__ __PRETTY_FUNCTION__); }))
;
801 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
802 int i;
803 for (i = 1; i < parallel_count; i++)
804 {
805 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
806 if (copy_symbol.graph)
807 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
808 }
809 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
810 if (graph_exec_arena)
811 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
812 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
813 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
814 if (gradient_graph_exec_arena)
815 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
816}
817
818static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
819{
820 int this_parameter_flag = 0;
821 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
822 return this_parameter_flag;
823 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
824 int j, k;
825 // For no-op, we can preserve previous saved_aux_size.
826 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
827 {
828 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
829 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
830 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
831 // make sure some model parameters don't update if we don't want them to.
832 int old_saved_aux_size;
833 if (old_minimizer.cmd == CCV_NNC_NOOP)
834 {
835 int input_size;
836 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
837 if (input_size < 2) // This is not legit.
838 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
839 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
840 old_saved_aux_size = input_size - 2;
841 } else
842 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
843 if (old_saved_aux_size != saved_aux_size)
844 {
845 this_parameter_flag = 1;
846 if (saved_aux_size > old_saved_aux_size)
847 {
848 // Allocate new tensor symbols.
849 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
850 for (j = old_saved_aux_size; j < saved_aux_size; j++)
851 {
852 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
853 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
854 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
855 for (k = 1; k < parallel_count; k++)
856 {
857 ccv_nnc_tensor_param_t dev_info = info;
858 if (k != device_id)
859 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
860 else
861 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
862 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
863 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
864 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
865 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
866 }
867 }
868 } else {
869 for (j = saved_aux_size; j < old_saved_aux_size; j++)
870 {
871 for (k = 1; k < parallel_count; k++)
872 {
873 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
874 if (src_copy.d >= 0)
875 {
876 ccv_nnc_tensor_symbol_free(graph, src_copy);
877 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
878 }
879 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
880 if (dest_copy.d >= 0)
881 {
882 ccv_nnc_tensor_symbol_free(graph, dest_copy);
883 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
884 }
885 }
886 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
887 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
888 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
889 }
890 }
891 }
892 }
893 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
894 if (this_parameter_flag)
895 {
896 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
897 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
898 const int* inputs = 0;
899 int input_size = 0;
900 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
901 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 901, __extension__ __PRETTY_FUNCTION__)
; }))
;
902 update_inputs[0].d = inputs[0];
903 update_inputs[0].graph = graph;
904 update_inputs[1].d = inputs[1];
905 update_inputs[1].graph = graph;
906 update_outputs[0] = updated_parameters[parameter_indice];
907 for (j = 0; j < saved_aux_size; j++)
908 {
909 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
910 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
911 }
912 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
913 for (k = 1; k < parallel_count; k++)
914 {
915 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
916 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 916, __extension__ __PRETTY_FUNCTION__); }))
;
917 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
918 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 918, __extension__ __PRETTY_FUNCTION__)
; }))
;
919 update_inputs[0].d = inputs[0];
920 update_inputs[0].graph = graph;
921 update_inputs[1].d = inputs[1];
922 update_inputs[1].graph = graph;
923 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
924 for (j = 0; j < saved_aux_size; j++)
925 {
926 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
927 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
928 }
929 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
930 }
931 }
932 return this_parameter_flag;
933}
934
935typedef struct {
936 int parameter_size;
937 ccv_nnc_cmd_t minimizer;
938 ccv_cnnp_model_io_t parameters[1];
939} ccv_cnnp_set_minimizer_for_parameter_t;
940
941static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
942{
943 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
944 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 944, __extension__ __PRETTY_FUNCTION__); }))
;
945 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
946 // We update all parameters, at this point, we have one minimizer.
947 const int parameter_size = compiled_data->parameters->rnum;
948 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
949 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
950 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 950, __extension__ __PRETTY_FUNCTION__); }))
;
951 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
952 ccv_array_t* const parameters = compiled_data->minimize.parameters;
953 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
954 int i, j, flag = 0;
955 for (i = 0; i < parameters->rnum; i++)
956 {
957 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
958 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
959 {
960 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
961 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 961, __extension__ __PRETTY_FUNCTION__)
; }))
;
962 const int old_rnum = parameter_indices->rnum;
963 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
964 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
965 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 965, __extension__ __PRETTY_FUNCTION__)
; }))
;
966 if (param_ref >= 0)
967 {
968 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 968, __extension__ __PRETTY_FUNCTION__)
; }))
;
969 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
970 parameter_indices->rnum = old_rnum + 1;
971 }
972 }
973 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
974 // We may have duplicated indices, but that is OK, we will set it twice.
975 for (j = 0; j < parameter_indices->rnum; j++)
976 {
977 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
978 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 978, __extension__ __PRETTY_FUNCTION__)
; }))
;
979 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
980 flag = 1;
981 }
982 ccv_array_clear(parameter_indices);
983 }
984 ccv_array_free(parameter_indices);
985 return flag;
986}
987
988static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
989{
990 if (new_saved_aux_size == old_saved_aux_size)
991 return;
992 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 992, __extension__ __PRETTY_FUNCTION__)
; }))
;
993 int i, j;
994 for (i = parameter_size - 1; i >= 0; i--)
995 {
996 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
997 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
998 for (j = old_saved_aux_size - 1; j >= 0; j--)
999 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
1000 }
1001}
1002
1003static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1004{
1005 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1006 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1006, __extension__ __PRETTY_FUNCTION__); }))
;
1007 if (!compiled_data->rewindables)
1008 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1009 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1010 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1011 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1012}
1013
1014static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1015{
1016 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1017 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1017, __extension__ __PRETTY_FUNCTION__
); }))
;
1018 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1018, __extension__ __PRETTY_FUNCTION__
); }))
;
1019 const int evaluate_to_size = compiled_data->evaluate.to_size;
1020 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1020, __extension__ __PRETTY_FUNCTION__
); }))
;
1021 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1022 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1023 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1024 int i, j;
1025 const int output_size = model->output_size;
1026 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1026, __extension__ __PRETTY_FUNCTION__
); }))
;
1027 if (fits)
1028 for (i = 0; i < output_size; i++)
1029 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1030 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1031 const int parameter_size = compiled_data->parameters->rnum;
1032 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1033 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1034 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1035 int parameter_size_maybe_more = parameter_size;
1036 compiled_data->disable_outgrad = disable_outgrad;
1037 int outgrad_size;
1038 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1039 outgrad_size = 0;
1040 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1041 outgrad_size = model->input_size;
1042 else {
1043 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1043, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1044 outgrad_size = 0;
1045 for (i = 0; i < model->input_size; i++)
1046 if (!(disable_outgrad & ((uint64_t)1 << i)))
1047 ++outgrad_size;
1048 }
1049 compiled_data->outgrad_size = outgrad_size;
1050 parameter_size_maybe_more += outgrad_size;
1051 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1052 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1053 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1054 compiled_data->backward.to_size = parameter_size_maybe_more;
1055 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1056 if (compiled_data->parameter_flags)
1057 {
1058 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1059 for (i = 0; i < parameter_size; i++)
1060 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1061 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1062 else
1063 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1064 }
1065 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1066 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1067 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1068 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1069 else { // Compute minimize with gradients including selected inputs.
1070 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1070, __extension__ __PRETTY_FUNCTION__
); }))
;
1071 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1071, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1072 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1072, __extension__ __PRETTY_FUNCTION__
); }))
;
1073 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1074 j = 0;
1075 for (i = 0; i < model->input_size; i++)
1076 if (!(disable_outgrad & ((uint64_t)1 << i)))
1077 outgrads[j++] = model->inputs[i];
1078 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1079 }
1080 if (compiled_data->parameter_flags)
1081 ccfreefree(parameters);
1082 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1083 if (compiled_data->minimize.parameters)
1084 _ccv_cnnp_apply_parameters_with_minimizer(model);
1085 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1086 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1087 for (i = 0; i < output_size; i++)
1088 {
1089 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1090 // Init this to 1 so we can backprop.
1091 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1092 }
1093 compiled_data->backward.to_size = 0;
1094 for (i = 0; i < parameter_size_maybe_more; i++)
1095 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1096 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1097 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1098 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1099 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1100 {
1101 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1102 continue;
1103 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1104 const int* tos;
1105 int to_size;
1106 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1107 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1108 {
1109 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1110 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1111 int flag = 0;
1112 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1113 for (j = i - 1; !flag && j >= 0; j--)
1114 if (j + outgrad_destination_start < destination_count)
1115 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1116 if (!flag) // Only if we cannot find it, we add it.
1117 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1118 }
1119 }
1120 if (parallel_count > 1)
1121 {
1122 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1123 0, 0,
1124 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1125 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1126 0, 0, 0,
1127 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1128 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1129 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1130 for (i = 0; i < evaluate_to_size; i++)
1131 for (j = 1; j < parallel_count; j++)
1132 {
1133 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1134 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1135 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1136 }
1137 const int backward_to_size = compiled_data->backward.to_size;
1138 for (i = 0; i < backward_to_size; i++)
1139 for (j = 1; j < parallel_count; j++)
1140 {
1141 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1142 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1143 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1144 }
1145 }
1146 // Only use memory compression if we are in gradient parameter mode.
1147 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1148 {
1149 if (model->memory_compression)
1150 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1151 if (model->memory_reduction)
1152 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1153 }
1154 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1155 compiled_data->gradient_mode = gradient_mode;
1156}
1157
1158void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1159{
1160 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1160, __extension__ __PRETTY_FUNCTION__
); }))
;
1161 const int parameter_size = compiled_data->parameters->rnum;
1162 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1163 const int internal_size = compiled_data->internals->rnum;
1164 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1165 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1166 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1167 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1168}
1169
1170int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1171{
1172 int i, j;
1173 const int parameter_size = compiled_data->parameters->rnum;
1174 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1175 const int internal_size = compiled_data->internals->rnum;
1176 for (i = 0; i < parameter_size; i++)
1177 {
1178 // parameters has to be allocated all together.
1179 if (compiled_data->tensors.parameters[i])
1180 {
1181 for (j = 1; j < parallel_count; j++)
1182 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1182, __extension__ __PRETTY_FUNCTION__
); }))
; }
1183 continue;
1184 }
1185 return 1;
1186 }
1187 for (i = 0; i < internal_size; i++)
1188 {
1189 if (!compiled_data->tensors.internals[i])
1190 return 1;
1191 for (j = 1; j < parallel_count; j++)
1192 if (!compiled_data->tensors.internals[i + j * internal_size])
1193 return 1;
1194 }
1195 return 0;
1196}
1197
1198void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1199{
1200 int i, j;
1201 const int parameter_size = compiled_data->parameters->rnum;
1202 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1203 const int internal_size = compiled_data->internals->rnum;
1204 for (i = 0; i < parameter_size; i++)
1205 {
1206 // parameters has to be allocated all together.
1207 if (compiled_data->tensors.parameters[i])
1208 {
1209 for (j = 1; j < parallel_count; j++)
1210 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1210, __extension__ __PRETTY_FUNCTION__
); }))
; }
1211 continue;
1212 }
1213 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1214 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1215 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1216 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1217 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1218 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1219 for (j = 1; j < parallel_count; j++)
1220 {
1221 if (j != device_id)
1222 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1223 else
1224 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1225 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1226 }
1227 }
1228 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1229 for (i = 0; i < internal_size; i++)
1230 {
1231 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1232 const int d = retained.d;
1233 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1234 continue;
1235 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1236 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1237 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1238 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1239 if (!compiled_data->tensors.internals[i])
1240 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1241 for (j = 1; j < parallel_count; j++)
1242 {
1243 if (j != device_id)
1244 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1245 else
1246 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1247 if (!compiled_data->tensors.internals[i + j * internal_size])
1248 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1249 }
1250 }
1251 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1252}
1253
1254static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1255{
1256 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1257 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1258}
1259
1260static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1261{
1262 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1262, __extension__ __PRETTY_FUNCTION__
); }))
;
1263 int i, j;
1264 for (i = 0; i < tensor_size; i++)
1265 {
1266 if (!tensors[i])
1267 continue;
1268 const int d = tensor_symbols[i].d;
1269 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1270 continue;
1271 for (j = 1; j < parallel_count; j++)
1272 if (tensors[i + j * tensor_size])
1273 {
1274 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1275 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1276 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1277 }
1278 }
1279}
1280
1281static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1282{
1283 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1283, __extension__ __PRETTY_FUNCTION__
); }))
;
1284 int i, j;
1285 for (i = 0; i < tensor_size; i++)
1286 {
1287 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1288 for (j = 1; j < parallel_count; j++)
1289 {
1290 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1291 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1292 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1293 { // We shouldn't allocate this, free it up.
1294 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1295 tensors[i + j * tensor_size] = 0;
1296 }
1297 }
1298 }
1299}
1300
1301static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1302{
1303 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1303, __extension__ __PRETTY_FUNCTION__
); }))
;
1304 int i, j;
1305 for (i = 0; i < tensor_size; i++)
1306 {
1307 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1308 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1309 continue;
1310 if (graph)
1311 {
1312 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1313 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1314 tensor_symbol = alias_to;
1315 }
1316 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1317 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1318 {
1319 const ccv_nnc_tensor_bind_t retained_bind = {
1320 .symbol = tensor_symbol,
1321 .tensor = tensor
1322 };
1323 ccv_array_push(tensor_binds, &retained_bind);
1324 }
1325 for (j = 1; j < parallel_count; j++)
1326 {
1327 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1328 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1329 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1330 {
1331 const ccv_nnc_tensor_bind_t bind = {
1332 .symbol = copy,
1333 .tensor = tensors[i + j * tensor_size]
1334 };
1335 ccv_array_push(tensor_binds, &bind);
1336 }
1337 }
1338 }
1339}
1340
1341static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1342{
1343 if (compiled_data->graph)
1344 ccv_nnc_graph_free(compiled_data->graph);
1345 compiled_data->graph = 0;
1346 compiled_data->is_test = 0;
1347 if (compiled_data->tensor_arena)
1348 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1349 compiled_data->tensor_arena = 0;
1350 if (compiled_data->graph_exec_arena)
1351 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1352 compiled_data->graph_exec_arena = 0;
1353 if (compiled_data->backward.from_ops)
1354 ccfreefree(compiled_data->backward.from_ops);
1355 compiled_data->backward.from_ops = 0;
1356 if (compiled_data->evaluate.schedule)
1357 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1358 compiled_data->evaluate.schedule = 0;
1359 if (compiled_data->backward.schedule)
1360 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1361 compiled_data->backward.schedule = 0;
1362}
1363
1364static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1365{
1366 if (compiled_data->gradients)
1367 ccfreefree(compiled_data->gradients);
1368 compiled_data->gradients = 0;
1369 if (compiled_data->updated_parameters)
1370 ccfreefree(compiled_data->updated_parameters);
1371 compiled_data->updated_parameters = 0;
1372 compiled_data->update_nodes = 0;
1373 compiled_data->saved_aux = 0;
1374}
1375
1376static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1377{
1378 if (compiled_data->backward.gradients)
1379 ccfreefree(compiled_data->backward.gradients);
1380 compiled_data->backward.gradients = 0;
1381 if (compiled_data->backward.accum)
1382 ccv_nnc_graph_free(compiled_data->backward.accum);
1383 compiled_data->backward.accum = 0;
1384 if (compiled_data->backward.tensor_arena)
1385 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1386 compiled_data->backward.tensor_arena = 0;
1387 if (compiled_data->backward.graph_exec_arena)
1388 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1389 compiled_data->backward.graph_exec_arena = 0;
1390}
1391
1392static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1393{
1394 if (compiled_data->apply_gradients.graph)
1395 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1396 compiled_data->apply_gradients.graph = 0;
1397 if (compiled_data->apply_gradients.tensor_arena)
1398 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1399 compiled_data->apply_gradients.tensor_arena = 0;
1400 if (compiled_data->apply_gradients.graph_exec_arena)
1401 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1402 compiled_data->apply_gradients.graph_exec_arena = 0;
1403}
1404
1405// Compile the graph to run ccv_cnnp_model_fit
1406static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1407{
1408 int i, j;
1409 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1410 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1410, __extension__ __PRETTY_FUNCTION__
); }))
;
1411 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1412 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1413 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1413, __extension__ __PRETTY_FUNCTION__
); }))
;
1414 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1414
, __extension__ __PRETTY_FUNCTION__); }))
;
1415 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1415, __extension__ __PRETTY_FUNCTION__
); }))
;
1416 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1417 {
1418 _ccv_cnnp_model_set_rewindables(model);
1419 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1420 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1421 _ccv_cnnp_model_rewind_graph(model);
1422 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1423 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1424 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1425 }
1426 const int tensors_init = !!compiled_data->tensors_init.v;
1427 if (!tensors_init)
1428 _ccv_cnnp_model_tensors_init(model, compiled_data);
1429 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1430 // Check if it is not fully allocated, if it is not, init_1.
1431 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1432 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1433 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1433, __extension__ __PRETTY_FUNCTION__); }))
;
1434 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1434, __extension__ __PRETTY_FUNCTION__); }))
;
1435 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1435
, __extension__ __PRETTY_FUNCTION__); }))
;
1436 const int input_size_per_p = input_size / parallel_count;
1437 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1438 const int output_size_per_p = output_size / parallel_count;
1439 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1440 const int fit_size_per_p = fit_size / parallel_count;
1441 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1442 const int parameter_size = compiled_data->parameters->rnum;
1443 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1444 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1445 const int internal_size = compiled_data->internals->rnum;
1446 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1447 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1448 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1449 ccv_array_free(tensor_binds);
1450 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1451 if (tensors_init && parallel_count > 1)
1452 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1453 // If tensor is not init'ed, we need to init states first.
1454 if (_ccv_cnnp_any_to_init(compiled_data))
1455 {
1456 ccv_nnc_tensor_init_states_t tensor_init_states = {
1457 .parallel_count = parallel_count,
1458 .graph = model->graph,
1459 .compiled_data = compiled_data,
1460 .tensor_arena = compiled_data->tensor_arena
1461 };
1462 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1463 }
1464 compiled_data->is_test = 0;
1465 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1466 // No need to set because it is default to training mode.
1467 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1468 for (i = 0; i < saved_aux_size * parameter_size; i++)
1469 {
1470 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1471 continue;
1472 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1473 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1474 for (j = 1; j < parallel_count; j++)
1475 {
1476 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1477 if (copy)
1478 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1479 }
1480 }
1481 const int evaluate_to_size = compiled_data->evaluate.to_size;
1482 compiled_data->evaluate.to_op_size = 0;
1483 for (i = 0; i < evaluate_to_size; i++)
1484 {
1485 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1486 if (to.graph)
1487 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1488 }
1489 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1490 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1491}
1492
1493ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1494{
1495 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1496 if (!compiled_data || !compiled_data->graph)
1497 return 0;
1498 return ccv_nnc_graph_default_stream(compiled_data->graph);
1499}
1500
1501uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1502{
1503 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1504 if (!compiled_data || !compiled_data->tensor_arena)
1505 return 0;
1506 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1507}
1508
1509static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1510{
1511 int i, j;
1512 for (i = 0; i < tensor_size; i++)
1513 {
1514 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1515 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1516 continue;
1517 if (graph)
1518 {
1519 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1520 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1521 tensor_symbol = alias_to;
1522 }
1523 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1524 for (j = 1; j < parallel_count; j++)
1525 {
1526 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1527 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1528 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1529 }
1530 }
1531}
1532
1533void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1534{
1535 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1536 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1536, __extension__ __PRETTY_FUNCTION__); }))
;
1537 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1538 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1538, __extension__ __PRETTY_FUNCTION__
); }))
;
1539 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1539, __extension__ __PRETTY_FUNCTION__
); }))
;
1540 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1540
, __extension__ __PRETTY_FUNCTION__); }))
;
1541 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1541, __extension__ __PRETTY_FUNCTION__); }))
;
1542 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1543 {
1544 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1545 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1546 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1547 // Compile the symbolic graph down only when needed.
1548 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1549 } else {
1550 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1550, __extension__ __PRETTY_FUNCTION__); }))
;
1551 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1551, __extension__ __PRETTY_FUNCTION__); }))
;
1552 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1552
, __extension__ __PRETTY_FUNCTION__); }))
;
1553 const int input_size_per_p = input_size / parallel_count;
1554 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1555 const int output_size_per_p = output_size / parallel_count;
1556 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1557 const int fit_size_per_p = fit_size / parallel_count;
1558 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1559 }
1560 if (compiled_data->is_test)
1561 {
1562 compiled_data->is_test = 0;
1563 ccv_nnc_graph_exec_update_t update = {
1564 .parallel_count = parallel_count,
1565 .graph = model->graph,
1566 .graph_exec_arena = compiled_data->graph_exec_arena,
1567 };
1568 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1569 }
1570 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1571}
1572
1573// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1574static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1575{
1576 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1577 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1578 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1579 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1579, __extension__ __PRETTY_FUNCTION__
); }))
;
1580 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1580, __extension__ __PRETTY_FUNCTION__
); }))
;
1581 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1582 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1583 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1584 {
1585 const int evaluate_to_size = compiled_data->evaluate.to_size;
1586 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1587 _ccv_cnnp_model_set_rewindables(model);
1588 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1589 0, 0,
1590 0, 0, 0,
1591 0, 0, 0,
1592 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1593 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1594 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1595 int i, j;
1596 for (i = 0; i < evaluate_to_size; i++)
1597 for (j = 1; j < parallel_count; j++)
1598 {
1599 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1600 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1601 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1602 }
1603 }
1604 const int tensors_init = !!compiled_data->tensors_init.v;
1605 if (!tensors_init)
1606 _ccv_cnnp_model_tensors_init(model, compiled_data);
1607 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1608 // Check if it is not fully allocated, if it is not, init_1.
1609 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1610 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1611 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1611, __extension__ __PRETTY_FUNCTION__); }))
;
1612 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1612, __extension__ __PRETTY_FUNCTION__); }))
;
1613 const int input_size_per_p = input_size / parallel_count;
1614 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1615 const int output_size_per_p = output_size / parallel_count;
1616 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1617 const int parameter_size = compiled_data->parameters->rnum;
1618 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1619 const int internal_size = compiled_data->internals->rnum;
1620 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1621 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1622 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1623 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1624 ccv_array_free(tensor_binds);
1625 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1626 // If tensor is not init'ed, we need to init states first.
1627 if (tensors_init && parallel_count > 1)
1628 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1629 if (_ccv_cnnp_any_to_init(compiled_data))
1630 {
1631 ccv_nnc_tensor_init_states_t tensor_init_states = {
1632 .parallel_count = parallel_count,
1633 .graph = model->graph,
1634 .compiled_data = compiled_data,
1635 .tensor_arena = compiled_data->tensor_arena
1636 };
1637 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1638 }
1639 compiled_data->is_test = 1;
1640 ccv_nnc_graph_exec_update_t update = {
1641 .parallel_count = parallel_count,
1642 .graph = model->graph,
1643 .graph_exec_arena = compiled_data->graph_exec_arena,
1644 };
1645 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1646 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1647 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1648}
1649
1650static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1651{
1652 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1652, __extension__ __PRETTY_FUNCTION__
); }))
;
1653 const int parameter_size = compiled_data->parameters->rnum;
1654 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1655 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1656 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1657 int i, j;
1658 for (i = 0; i < parameter_size; i++)
1659 {
1660 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1661 {
1662 compiled_data->tensors.gradients[i] = 0;
1663 compiled_data->tensors.accum_gradients[i] = 0;
1664 for (j = 1; j < parallel_count; j++)
1665 {
1666 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1667 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1668 }
1669 continue;
1670 }
1671 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1672 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1673 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1674 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1675 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1676 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1677 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1678 for (j = 1; j < parallel_count; j++)
1679 {
1680 if (j != device_id)
1681 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1682 else
1683 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1684 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1685 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1686 }
1687 }
1688}
1689
1690static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1691{
1692 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1693 return 1;
1694 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1695 return 0;
1696 int i;
1697 for (i = 0; i < input_size; i++)
1698 if (!(disable_outgrad & ((uint64_t)1 << i)))
1699 return 0;
1700 return 1;
1701}
1702
1703// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1704// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1705static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1706{
1707 int i, j;
1708 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1709 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1710 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1710, __extension__ __PRETTY_FUNCTION__
); }))
;
1711 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1712 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1713 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1713, __extension__ __PRETTY_FUNCTION__
); }))
;
1714 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1714, __extension__ __PRETTY_FUNCTION__
); }))
;
1715 // There shouldn't be a loss function if we evaluate with multistage jit.
1716 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1716, __extension__ __PRETTY_FUNCTION__
); }))
;
1717 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1718 {
1719 _ccv_cnnp_model_set_rewindables(model);
1720 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1721 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1722 _ccv_cnnp_model_rewind_graph(model);
1723 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1724 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1725 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1726 }
1727 const int tensors_init = !!compiled_data->tensors_init.v;
1728 if (!tensors_init)
1729 _ccv_cnnp_model_tensors_init(model, compiled_data);
1730 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1731 // Check if it is not fully allocated, if it is not, init_1.
1732 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1733 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1734 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1734, __extension__ __PRETTY_FUNCTION__); }))
;
1735 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1735, __extension__ __PRETTY_FUNCTION__); }))
;
1736 const int input_size_per_p = input_size / parallel_count;
1737 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1738 const int output_size_per_p = output_size / parallel_count;
1739 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1740 const int parameter_size = compiled_data->parameters->rnum;
1741 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1742 const int internal_size = compiled_data->internals->rnum;
1743 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1744 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1745 if (!compiled_data->tensors.gradients)
1746 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1747 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1748 if (compiled_data->backward.to_size > 0)
1749 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1750 else
1751 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1752 ccv_array_free(tensor_binds);
1753 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1754 if (tensors_init && parallel_count > 1)
1755 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1756 // If tensor is not init'ed, we need to init states first.
1757 if (_ccv_cnnp_any_to_init(compiled_data))
1758 {
1759 ccv_nnc_tensor_init_states_t tensor_init_states = {
1760 .parallel_count = parallel_count,
1761 .graph = model->graph,
1762 .compiled_data = compiled_data,
1763 .tensor_arena = compiled_data->tensor_arena
1764 };
1765 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1766 }
1767 compiled_data->is_test = is_test;
1768 ccv_nnc_graph_exec_update_t update = {
1769 .parallel_count = parallel_count,
1770 .graph = model->graph,
1771 .graph_exec_arena = compiled_data->graph_exec_arena,
1772 };
1773 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1774 const int evaluate_to_size = compiled_data->evaluate.to_size;
1775 compiled_data->evaluate.to_op_size = 0;
1776 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1777 for (i = 0; i < evaluate_to_size; i++)
1778 {
1779 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1780 if (to_op.graph)
1781 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1782 const int* tos;
1783 int to_size;
1784 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1785 for (j = 0; j < to_size; j++)
1786 {
1787 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1788 .d = tos[j],
1789 .graph = model->graph
1790 });
1791 if (to_op.graph)
1792 ccv_array_add_unique_int(backward_from, to_op.d);
1793 }
1794 }
1795 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1795, __extension__
__PRETTY_FUNCTION__); }))
;
1796 compiled_data->backward.from_op_size = backward_from->rnum;
1797 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1798 for (i = 0; i < backward_from->rnum; i++)
1799 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1800 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1801 .graph = compiled_data->graph,
1802 };
1803 ccv_array_free(backward_from);
1804 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1805 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1806}
1807
1808void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1809{
1810 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1811 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1811, __extension__ __PRETTY_FUNCTION__); }))
;
1812 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1813 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1813, __extension__ __PRETTY_FUNCTION__
); }))
;
1814 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1814, __extension__ __PRETTY_FUNCTION__
); }))
;
1815 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1815, __extension__ __PRETTY_FUNCTION__); }))
;
1816 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1817 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1818 if (!compiled_data->graph || mode_mismatch)
1819 {
1820 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1821 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1822 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1823 if (params.requires_grad)
1824 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1825 else
1826 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1827 } else {
1828 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1829 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1829, __extension__ __PRETTY_FUNCTION__); }))
;
1830 const int input_size_per_p = input_size / parallel_count;
1831 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1832 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1832, __extension__ __PRETTY_FUNCTION__); }))
;
1833 const int output_size_per_p = output_size / parallel_count;
1834 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1835 }
1836 if (compiled_data->is_test != params.is_test)
1837 {
1838 compiled_data->is_test = params.is_test;
1839 ccv_nnc_graph_exec_update_t update = {
1840 .parallel_count = parallel_count,
1841 .graph = model->graph,
1842 .graph_exec_arena = compiled_data->graph_exec_arena,
1843 };
1844 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1845 }
1846}
1847
1848void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1849{
1850 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1851 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1851, __extension__ __PRETTY_FUNCTION__); }))
;
1852 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1853 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1854 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1855 else {
1856 if (!compiled_data->evaluate.schedule)
1857 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1858 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1859 }
1860}
1861
1862// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1863// Particularly, this method compiles the accumulator graph.
1864static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1865{
1866 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1867 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1867, __extension__ __PRETTY_FUNCTION__); }))
;
1868 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1868, __extension__ __PRETTY_FUNCTION__
); }))
;
1869 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1870 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1871 const int parameter_size = compiled_data->parameters->rnum;
1872 int i, j;
1873 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1874 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1875 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1876 for (i = 0; i < parameter_size; i++)
1877 for (j = 0; j < parallel_count; j++)
1878 if (compiled_data->tensors.gradients[i + j * parameter_size])
1879 {
1880 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1881 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1882 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1883 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1884 ccv_nnc_tensor_symbol_t inputs[2];
1885 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1886 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1887 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1888 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1889 } else {
1890 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1891 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1892 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1893 }
1894 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1895 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1896 {
1897 ccv_nnc_symbolic_graph_free(accum);
1898 // Create empty graph.
1899 compiled_data->backward.accum = ccv_nnc_graph_new();
1900 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1901 return;
1902 }
1903 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1904 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1905 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1906 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1907 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1908 ccv_nnc_symbolic_graph_free(accum);
1909 ccv_array_free(tensor_binds);
1910 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1911}
1912
1913void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1914{
1915 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1916 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1916, __extension__ __PRETTY_FUNCTION__); }))
;
1917 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1917, __extension__ __PRETTY_FUNCTION__
); }))
;
1918 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1919 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1919, __extension__ __PRETTY_FUNCTION__
); }))
;
1920 if (outgrad_size > 0)
1921 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1921, __extension__ __PRETTY_FUNCTION__
); }))
; }
1922 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1922, __extension__ __PRETTY_FUNCTION__); }))
;
1923 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1923, __extension__ __PRETTY_FUNCTION__
); }))
;
1924 const int parameter_size = compiled_data->parameters->rnum;
1925 // If we need to accumulate the gradients now, do jit on accumulator.
1926 if (compiled_data->backward.count > 0)
1927 {
1928 if (!compiled_data->backward.accum)
1929 _ccv_cnnp_model_multistage_jit_1(model);
1930 else if (compiled_data->backward.count == 1) {
1931 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1932 int i;
1933 for (i = 0; i < parameter_size * parallel_count; i++)
1934 {
1935 ccv_nnc_tensor_t* tensor;
1936 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1937 }
1938 if (compiled_data->backward.tensor_arena)
1939 {
1940 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1941 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1942 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1943 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1944 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1945 }
1946 }
1947 }
1948 const int ingrad_size_per_p = model->output_size;
1949 const int outgrad_size_per_p = compiled_data->outgrad_size;
1950 int i, j;
1951 for (i = 0; i < ingrad_size_per_p; i++)
1952 {
1953 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1954 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1955 {
1956 // Set it to 1 if it is not specified.
1957 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1958 if (ingrad_tensor)
1959 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1960 for (j = 1; j < parallel_count; j++)
1961 {
1962 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
1963 if (ingrad_tensor)
1964 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1965 }
1966 } else {
1967 // Make sure the length matches, in case it is an alias.
1968 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 1968, __extension__ __PRETTY_FUNCTION__
); }))
;
1969 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
1970 for (j = 1; j < parallel_count; j++)
1971 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
1972 }
1973 }
1974 if (outgrad_size > 0)
1975 {
1976 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 1976, __extension__ __PRETTY_FUNCTION__
); }))
;
1977 for (i = 0; i < outgrad_size_per_p; i++)
1978 if (outgrads[i])
1979 {
1980 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
1981 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
1982 for (j = 1; j < parallel_count; j++)
1983 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
1984 }
1985 } else {
1986 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1987, __extension__ __PRETTY_FUNCTION__
); }))
1987 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1987, __extension__ __PRETTY_FUNCTION__
); }))
;
1988 }
1989 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
1990 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
1991 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
1992 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1993 if (!compiled_data->backward.schedule)
1994 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
1995 // Run the backward pass.
1996 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
1997 // If we need to run accumulation round, do that now.
1998 if (compiled_data->backward.count > 0)
1999 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
2000 // Update the count, this determines whether we need to accumulate or not.
2001 ++compiled_data->backward.count;
2002}
2003
2004// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2005// Particularly, this method compiles the parameter update graph.
2006static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2007{
2008 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2009 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2009, __extension__ __PRETTY_FUNCTION__
); }))
;
2010 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2011 const int parameter_size = compiled_data->parameters->rnum;
2012 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2013 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2014 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2015 // Bind accumulated gradients.
2016 if (compiled_data->backward.count > 1)
2017 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2018 else
2019 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2020 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2021 int i, j;
2022 for (i = 0; i < compiled_data->backward.to_size; i++)
2023 {
2024 const int* tos;
2025 int to_size;
2026 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2027 for (j = 0; j < to_size; j++)
2028 {
2029 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2030 // gradients graph.
2031 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2032 .d = tos[j],
2033 .graph = model->graph,
2034 });
2035 if (!exec.graph)
2036 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2037 }
2038 }
2039 const int from_size = apply_gradients_from->rnum;
2040 if (from_size == 0)
2041 {
2042 ccv_array_free(apply_gradients_from);
2043 ccv_array_free(tensor_binds);
2044 return;
2045 }
2046 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2047 for (i = 0; i < from_size; i++)
2048 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2049 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2050 .graph = model->graph
2051 };
2052 ccv_array_free(apply_gradients_from);
2053 // It can only ends with updates on the parameters.
2054 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2055 for (i = 0; i < parameter_size; i++)
2056 {
2057 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2058 continue;
2059 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2060 for (j = 1; j < parallel_count; j++)
2061 {
2062 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2063 ccv_array_push(tos, &copy);
2064 }
2065 }
2066 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2067 ccv_array_free(tos);
2068 ccv_array_free(tensor_binds);
2069 ccfreefree(froms);
2070 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2071 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2072 {
2073 // Skip on no tensor.
2074 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2075 continue;
2076 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2077 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2078 for (j = 1; j < parallel_count; j++)
2079 {
2080 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2081 if (copy)
2082 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2083 }
2084 }
2085 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2086}
2087
2088void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2089{
2090 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2091 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2091, __extension__ __PRETTY_FUNCTION__); }))
;
2092 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2092, __extension__ __PRETTY_FUNCTION__
); }))
;
2093 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2094 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2094, __extension__ __PRETTY_FUNCTION__); }))
;
2095 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2095, __extension__ __PRETTY_FUNCTION__
); }))
;
2096 // Skip if there is no backward pass.
2097 if (compiled_data->backward.count <= 0)
2098 return;
2099 // Skip if there is no parameters.
2100 if (compiled_data->parameters->rnum == 0)
2101 {
2102 compiled_data->backward.count = 0;
2103 return;
2104 }
2105 if (!compiled_data->apply_gradients.graph)
2106 _ccv_cnnp_model_multistage_jit_2(model);
2107 else {
2108 const int parameter_size = compiled_data->parameters->rnum;
2109 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2110 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2111 if (compiled_data->backward.count > 1)
2112 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2113 else
2114 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2115 }
2116 if (compiled_data->apply_gradients.graph)
2117 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2118 // Reset backward count to 0.
2119 compiled_data->backward.count = 0;
2120}
2121
2122void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2123{
2124 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2125 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2126 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2126, __extension__ __PRETTY_FUNCTION__
); }))
;
2127 const int tensors_init = !!compiled_data->tensors_init.v;
2128 if (!tensors_init)
2129 _ccv_cnnp_model_tensors_init(model, compiled_data);
2130 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2131 // Check if it is not fully allocated, if it is not, init_1.
2132 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2133 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2134 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2135 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2136 if (param_ref < 0)
2137 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2137
, __extension__ __PRETTY_FUNCTION__); }))
; }
2138 else
2139 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2139, __extension__ __PRETTY_FUNCTION__
); }))
; }
2140 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2141 ccv_array_free(parameter_indices);
2142 const int parameter_size = compiled_data->parameters->rnum;
2143 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2143
, __extension__ __PRETTY_FUNCTION__); }))
;
2144 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2144, __extension__ __PRETTY_FUNCTION__
); }))
;
2145 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2146 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2147 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2147, __extension__
__PRETTY_FUNCTION__); }))
;
2148 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2149 int i;
2150 for (i = 1; i < parallel_count; i++)
2151 {
2152 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2153 if (copy_tensor)
2154 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2155 }
2156 // Mark this symbol as init'ed.
2157 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2158 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2159 init_v[s >> 5] |= (1u << (s & 0x1f));
2160}
2161
2162void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2163{
2164 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2165 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2166 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2166, __extension__ __PRETTY_FUNCTION__
); }))
;
2167 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2167, __extension__ __PRETTY_FUNCTION__
); }))
;
2168 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2169 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2170 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2171 if (param_ref < 0)
2172 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2172
, __extension__ __PRETTY_FUNCTION__); }))
; }
2173 else
2174 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2174, __extension__ __PRETTY_FUNCTION__
); }))
; }
2175 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2176 ccv_array_free(parameter_indices);
2177 const int parameter_size = compiled_data->parameters->rnum;
2178 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2178
, __extension__ __PRETTY_FUNCTION__); }))
;
2179 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2179, __extension__ __PRETTY_FUNCTION__
); }))
;
2180 // We don't need to consider parallel_count, every parameter on each device is identical.
2181 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2182 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2182, __extension__
__PRETTY_FUNCTION__); }))
;
2183 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2184}
2185
2186ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2187{
2188 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2189 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2190 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2190, __extension__ __PRETTY_FUNCTION__
); }))
;
2191 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2191, __extension__ __PRETTY_FUNCTION__
); }))
;
2192 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2193 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2194 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2195 if (param_ref < 0)
2196 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2196
, __extension__ __PRETTY_FUNCTION__); }))
; }
2197 else
2198 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2198, __extension__ __PRETTY_FUNCTION__
); }))
; }
2199 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2200 ccv_array_free(parameter_indices);
2201 const int parameter_size = compiled_data->parameters->rnum;
2202 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2202
, __extension__ __PRETTY_FUNCTION__); }))
;
2203 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2203, __extension__ __PRETTY_FUNCTION__
); }))
;
2204 // We don't need to consider parallel_count, every parameter on each device is identical.
2205 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2206 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2206, __extension__
__PRETTY_FUNCTION__); }))
;
2207 return tensor->info;
2208}
2209
2210const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2211{
2212 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2213 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2214 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2214, __extension__ __PRETTY_FUNCTION__
); }))
;
2215 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2216 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2217 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2218 if (param_ref < 0)
2219 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2219
, __extension__ __PRETTY_FUNCTION__); }))
; }
2220 else
2221 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2221, __extension__ __PRETTY_FUNCTION__
); }))
; }
2222 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2223 ccv_array_free(parameter_indices);
2224 const int parameter_size = compiled_data->parameters->rnum;
2225 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2225
, __extension__ __PRETTY_FUNCTION__); }))
;
2226 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2226, __extension__ __PRETTY_FUNCTION__
); }))
;
2227 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2228}
2229
2230int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2231{
2232 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2232, __extension__ __PRETTY_FUNCTION__
); }))
;
2233 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2234 return compiled_data->parameters->rnum;
2235}
2236
2237ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2238{
2239 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2240 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2240, __extension__ __PRETTY_FUNCTION__); }))
;
2241 const int parameter_size = compiled_data->parameters->rnum;
2242 int i;
2243 for (i = 0; i < parameter_size; i++)
2244 {
2245 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2246 if (first(model, name, context))
2247 return ccv_cnnp_model_parameters(model, -1, i);
2248 }
2249 return 0;
2250}
2251
2252ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2253{
2254 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2255 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2255, __extension__ __PRETTY_FUNCTION__); }))
;
2256 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2257 const int parameter_size = compiled_data->parameters->rnum;
2258 int i;
2259 for (i = 0; i < parameter_size; i++)
2260 {
2261 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2262 if (filter(model, name, context))
2263 {
2264 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2265 ccv_array_push(parameters, &parameter);
2266 }
2267 }
2268 return parameters;
2269
2270}
2271
2272CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2273{
2274 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2275 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2275, __extension__ __PRETTY_FUNCTION__); }))
;
2276 const int tensors_init = !!compiled_data->tensors_init.v;
2277 if (!tensors_init) // If nothing initialized, we return parameter 0.
2278 return ccv_cnnp_model_parameters(model, -1, 0);
2279 const int parameter_size = compiled_data->parameters->rnum;
2280 int i;
2281 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2282 for (i = 0; i < parameter_size; i++)
2283 {
2284 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2285 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2286 return ccv_cnnp_model_parameters(model, -1, i);
2287 }
2288 return 0;
2289}
2290
2291static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2292{
2293 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2294 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2294, __extension__
__PRETTY_FUNCTION__); }))
;
2295 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2296 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2297 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2298 return to_parameter_indices;
2299}
2300
2301static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2302{
2303 // If the model is not compiled yet. Compile them now.
2304 if (!model->graph)
2305 {
2306 model->graph = ccv_nnc_symbolic_graph_new();
2307 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2307, __extension__ __PRETTY_FUNCTION__
); }))
;
2308 const int input_size = from_model->input_size;
2309 ccv_nnc_tensor_param_t input_params[input_size];
2310 int i;
2311 for (i = 0; i < input_size; i++)
2312 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2313 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2314 model->parallel_count = from_model->parallel_count;
2315 model->memory_compression = from_model->memory_compression;
2316 model->memory_reduction = from_model->memory_reduction;
2317 model->gradient_checkpointing = from_model->gradient_checkpointing;
2318 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2319 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2320 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2321 }
2322 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2323 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2323, __extension__ __PRETTY_FUNCTION__
); }))
;
2324 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2325 if (!to_tensors_init)
2326 {
2327 if (only_init_0)
2328 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2329 else
2330 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2331 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2332 // Check if it is not fully allocated, if it is not, init_1.
2333 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2334 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2334, __extension__ __PRETTY_FUNCTION__
); }))
;
2335 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2336 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2337 if (*from_param_ref < 0 && *param_ref >= 0)
2338 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2338, __extension__ __PRETTY_FUNCTION__
); }))
; }
2339 else if (*from_param_ref >= 0)
2340 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2340, __extension__ __PRETTY_FUNCTION__
); }))
; }
2341 if (*param_ref < 0 && *from_param_ref >= 0)
2342 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2342, __extension__ __PRETTY_FUNCTION__); }))
; }
2343 else if (*param_ref >= 0)
2344 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2344, __extension__ __PRETTY_FUNCTION__
); }))
; }
2345}
2346
2347void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2348{
2349 ccv_array_t* to_parameter_indices;
2350 int to_param_ref;
2351 ccv_array_t* from_parameter_indices;
2352 int from_param_ref;
2353 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2354 // Should be exactly the same tensor.
2355 if (to_param_ref < 0 && from_param_ref < 0)
2356 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2356, __extension__ __PRETTY_FUNCTION__
); }))
; }
2357 // To models.
2358 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2359 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2359, __extension__ __PRETTY_FUNCTION__
); }))
;
2360 // From models.
2361 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2362 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2363 const int to_parameter_size = to_compiled_data->parameters->rnum;
2364 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2365 int i, j;
2366 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2367 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2368 for (i = 0; i < rnum; i++)
2369 {
2370 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2371 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2371, __extension__ __PRETTY_FUNCTION__); }))
;
2372 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2372, __extension__ __PRETTY_FUNCTION__
); }))
;
2373 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2374 // If the original is not init'ed. We cannot copy from.
2375 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2376 continue;
2377 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2378 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2378, __extension__ __PRETTY_FUNCTION__); }))
;
2379 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2379, __extension__ __PRETTY_FUNCTION__
); }))
;
2380 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2381 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2381, __extension__
__PRETTY_FUNCTION__); }))
;
2382 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2383 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2383, __extension__
__PRETTY_FUNCTION__); }))
;
2384 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2385 for (j = 1; j < parallel_count; j++)
2386 {
2387 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2388 if (copy_tensor)
2389 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2390 }
2391 // Mark this symbol as init'ed.
2392 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2393 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2394 }
2395 ccv_array_free(to_parameter_indices);
2396 ccv_array_free(from_parameter_indices);
2397}
2398
2399KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2399
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
The value 0 is assigned to 'i'
50
Assuming the condition is false
51
Taking false branch
52
Assuming the condition is false
53
1st function call argument is an uninitialized value
2400
2401void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2402{
2403 ccv_array_t* to_parameter_indices;
2404 int to_param_ref;
2405 ccv_array_t* from_parameter_indices;
2406 int from_param_ref;
2407 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2408 // Should be exactly the same tensor.
2409 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2410 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2410, __extension__ __PRETTY_FUNCTION__
); }))
; }
2411 // To models.
2412 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2413 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2413, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2414 // From models.
2415 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2416 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2417 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2417, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2418 const int from_parameter_size = from_compiled_data->parameters->rnum;
2419 const int to_parameter_size = to_compiled_data->parameters->rnum;
2420 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2421 int i, j;
2422 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2423 char* updated_name = 0;
2424 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2425 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2426 for (i = 0; i < rnum; i++)
2427 {
2428 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2429 // Need to figure out how to use the renamer here.
2430 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2431 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2431, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2432 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2432, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2433 if (renamer
18.1
'renamer' is non-null
)
2434 {
2435 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2436 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2437 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2438 updated_name = (char*)ccmallocmalloc(1024);
2439 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2440 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2441 memcpy(updated_name, src_name, src_name_len);
2442 updated_name[src_name_len] = 0;
2443 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2444 continue; // Skip this.
2445 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2446 {
2447 // Nothing changed.
2448 } else {
2449 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2450 {
2451 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2452 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
2453 {
2454 int ret;
2455 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
2456 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2456
, __extension__ __PRETTY_FUNCTION__); }))
;
2457 kh_val(id_map, k)((id_map)->vals[k]) = j;
2458 }
2459 }
2460 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2461 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2462 continue;
2463 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2464 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2464, __extension__ __PRETTY_FUNCTION__); }))
;
2465 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2465, __extension__
__PRETTY_FUNCTION__); }))
;
2466 }
2467 }
2468 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2468, __extension__ __PRETTY_FUNCTION__); }))
;
2469 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2469, __extension__
__PRETTY_FUNCTION__); }))
;
2470 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2471 // If the original is not init'ed. We cannot share from.
2472 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2473 continue;
2474 for (j = 0; j < parallel_count; j++)
2475 {
2476 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2477 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2477, __extension__
__PRETTY_FUNCTION__); }))
;
2478 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2479 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2480 ccv_nnc_tensor_free(dest);
2481 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2482 }
2483 // Mark this symbol as init'ed.
2484 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2485 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2486 }
2487 ccv_array_free(to_parameter_indices);
2488 ccv_array_free(from_parameter_indices);
2489 if (id_map)
2490 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2491 if (updated_name)
2492 ccfreefree(updated_name);
2493 // Mark it as incomplete so we will call init_1.
2494 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2495 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2496 else // Remove the flag.
2497 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2498}
2499
2500ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2501{
2502 if (!compiled_data->stream_map)
2503 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2504 int ret = 0;
2505 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2506 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2506, __extension__ __PRETTY_FUNCTION__); }))
;
2507 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2508 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2509 if (ret != 0)
2510 {
2511 stream = ccv_nnc_stream_context_new(type);
2512 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2513 }
2514 return stream;
2515}
2516
2517void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2518{
2519 ccv_array_t* to_parameter_indices;
2520 int to_param_ref;
2521 ccv_array_t* from_parameter_indices;
2522 int from_param_ref;
2523 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2524 // Should be exactly the same tensor.
2525 if (to_param_ref < 0 && from_param_ref < 0)
2526 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2526, __extension__ __PRETTY_FUNCTION__
); }))
; }
2527 // To models.
2528 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2529 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2529, __extension__ __PRETTY_FUNCTION__
); }))
;
2530 // From models.
2531 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2532 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2533 const int to_parameter_size = to_compiled_data->parameters->rnum;
2534 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2535 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2535, __extension__ __PRETTY_FUNCTION__
); }))
;
2536 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2536, __extension__ __PRETTY_FUNCTION__
); }))
;
2537 int i, j;
2538 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2539 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2540 for (i = 0; i < aux_in_size; i++)
2541 inputs[i + 2] = aux_ins[i];
2542 for (i = 0; i < aux_out_size; i++)
2543 outputs[i + 1] = aux_outs[i];
2544 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2545 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2546 for (i = 0; i < rnum; i++)
2547 {
2548 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2549 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2549, __extension__ __PRETTY_FUNCTION__); }))
;
2550 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2550, __extension__ __PRETTY_FUNCTION__
); }))
;
2551 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2552 // If the original is not init'ed. We cannot copy from.
2553 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2554 continue;
2555 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2556 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2556, __extension__ __PRETTY_FUNCTION__); }))
;
2557 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2557, __extension__ __PRETTY_FUNCTION__
); }))
;
2558 if (parallel_count > 1)
2559 {
2560 ccv_nnc_stream_context_t* streams[parallel_count];
2561 ccv_nnc_stream_signal_t* signal;
2562 if (stream_context)
2563 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2564 for (j = 0; j < parallel_count; j++)
2565 {
2566 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2567 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2568 if (!dest || !src)
2569 {
2570 streams[j] = 0;
2571 continue;
2572 }
2573 // At the moment, can only handle them on the same device.
2574 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2574, __extension__ __PRETTY_FUNCTION__
); }))
;
2575 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2575, __extension__ __PRETTY_FUNCTION__
); }))
;
2576 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2577 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2578 int type = stream_type;
2579 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2580 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2581 // Wait signal to finish.
2582 if (stream_context)
2583 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2584 inputs[0] = outputs[0] = dest;
2585 inputs[1] = src;
2586 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2587 if (stream_context)
2588 {
2589 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2590 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2591 }
2592 streams[j] = stream_0;
2593 }
2594 // If this should be blocking, blocking it.
2595 if (!stream_context)
2596 for (j = 0; j < parallel_count; j++)
2597 if (streams[j])
2598 ccv_nnc_stream_context_wait(streams[j]);
2599 } else {
2600 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2601 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2601, __extension__
__PRETTY_FUNCTION__); }))
;
2602 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2603 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2603, __extension__
__PRETTY_FUNCTION__); }))
;
2604 inputs[0] = outputs[0] = dest;
2605 inputs[1] = src;
2606 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2607 }
2608 // Mark this symbol as init'ed.
2609 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2610 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2611 }
2612 ccv_array_free(to_parameter_indices);
2613 ccv_array_free(from_parameter_indices);
2614}
2615
2616void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2617{
2618 int to_param_ref;
2619 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2620 // To models.
2621 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2622 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2622, __extension__ __PRETTY_FUNCTION__
); }))
;
2623 // Tensor has to be inited already.
2624 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2624, __extension__ __PRETTY_FUNCTION__
); }))
;
2625 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2625, __extension__ __PRETTY_FUNCTION__
); }))
;
2626 // From models.
2627 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2628 const int to_parameter_size = to_compiled_data->parameters->rnum;
2629 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2630 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2630, __extension__ __PRETTY_FUNCTION__
); }))
;
2631 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2631, __extension__ __PRETTY_FUNCTION__
); }))
;
2632 int i, j;
2633 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2634 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2635 for (i = 0; i < aux_in_size; i++)
2636 inputs[i + 1] = aux_ins[i];
2637 for (i = 0; i < aux_out_size; i++)
2638 outputs[i + 1] = aux_outs[i];
2639 for (i = 0; i < rnum; i++)
2640 {
2641 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2642 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2642, __extension__ __PRETTY_FUNCTION__); }))
;
2643 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2643, __extension__ __PRETTY_FUNCTION__
); }))
;
2644 if (parallel_count > 1)
2645 {
2646 ccv_nnc_stream_context_t* streams[parallel_count];
2647 ccv_nnc_stream_signal_t* signal;
2648 if (stream_context)
2649 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2650 for (j = 0; j < parallel_count; j++)
2651 {
2652 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2653 if (!dest)
2654 {
2655 streams[j] = 0;
2656 continue;
2657 }
2658 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2659 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2660 int type = stream_type;
2661 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2662 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2663 // Wait signal to finish.
2664 if (stream_context)
2665 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2666 inputs[0] = outputs[0] = dest;
2667 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2668 if (stream_context)
2669 {
2670 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2671 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2672 }
2673 streams[j] = stream_0;
2674 }
2675 // If this should be blocking, blocking it.
2676 if (!stream_context)
2677 for (j = 0; j < parallel_count; j++)
2678 if (streams[j])
2679 ccv_nnc_stream_context_wait(streams[j]);
2680 } else {
2681 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2682 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2682, __extension__
__PRETTY_FUNCTION__); }))
;
2683 inputs[0] = outputs[0] = dest;
2684 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2685 }
2686 // No need to mark this symbol as init'ed, it is already.
2687 }
2688 ccv_array_free(to_parameter_indices);
2689}
2690
2691void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2692{
2693 int to_param_ref;
2694 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2695 // To models.
2696 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2697 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2697, __extension__ __PRETTY_FUNCTION__
); }))
;
2698 // Tensor has to be inited already.
2699 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2699, __extension__ __PRETTY_FUNCTION__
); }))
;
2700 ccv_nnc_tensor_t** tensor_gradients;
2701 if (to_compiled_data->backward.count > 1)
2702 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2703 else
2704 tensor_gradients = to_compiled_data->tensors.gradients;
2705 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2705, __extension__ __PRETTY_FUNCTION__
); }))
;
2706 // From models.
2707 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2708 const int to_parameter_size = to_compiled_data->parameters->rnum;
2709 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2710 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2710, __extension__ __PRETTY_FUNCTION__
); }))
;
2711 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2711, __extension__ __PRETTY_FUNCTION__
); }))
;
2712 int i, j;
2713 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2714 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2715 for (i = 0; i < aux_in_size; i++)
2716 inputs[i + 1] = aux_ins[i];
2717 for (i = 0; i < aux_out_size; i++)
2718 outputs[i + 1] = aux_outs[i];
2719 for (i = 0; i < rnum; i++)
2720 {
2721 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2722 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2722, __extension__ __PRETTY_FUNCTION__); }))
;
2723 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2723, __extension__ __PRETTY_FUNCTION__
); }))
;
2724 if (parallel_count > 1)
2725 {
2726 ccv_nnc_stream_context_t* streams[parallel_count];
2727 ccv_nnc_stream_signal_t* signal;
2728 if (stream_context)
2729 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2730 for (j = 0; j < parallel_count; j++)
2731 {
2732 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2733 if (!dest)
2734 {
2735 streams[j] = 0;
2736 continue;
2737 }
2738 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2739 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2740 int type = stream_type;
2741 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2742 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2743 // Wait signal to finish.
2744 if (stream_context)
2745 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2746 inputs[0] = outputs[0] = dest;
2747 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2748 if (stream_context)
2749 {
2750 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2751 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2752 }
2753 streams[j] = stream_0;
2754 }
2755 // If this should be blocking, blocking it.
2756 if (!stream_context)
2757 for (j = 0; j < parallel_count; j++)
2758 if (streams[j])
2759 ccv_nnc_stream_context_wait(streams[j]);
2760 } else {
2761 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2762 if (!dest)
2763 continue;
2764 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2764, __extension__
__PRETTY_FUNCTION__); }))
;
2765 inputs[0] = outputs[0] = dest;
2766 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2767 }
2768 // No need to mark this symbol as init'ed, it is already.
2769 }
2770 ccv_array_free(to_parameter_indices);
2771}
2772
2773ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2774{
2775 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2776 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2776, __extension__ __PRETTY_FUNCTION__); }))
;
2777 return compiled_data->minimize.minimizer;
2778}
2779
2780void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2781{
2782 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2783 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2783, __extension__ __PRETTY_FUNCTION__); }))
;
2784 const int parameter_size = compiled_data->parameters->rnum;
2785 if (parameter_size == 0)
2786 return;
2787 if (reset)
2788 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2788, __extension__ __PRETTY_FUNCTION__
); }))
; }
2789 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2790 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2791 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2792 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2793 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2794 // We update all parameters, at this point, we have one minimizer.
2795 if (set_parameters == 0 || set_parameter_size == 0)
2796 compiled_data->minimize.minimizer = minimizer;
2797 int i;
2798 if (set_parameters && set_parameter_size)
2799 {
2800 // I need to save what's the minimizer along with this.
2801 if (!compiled_data->minimize.parameters)
2802 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2803 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2804 set_minimizer_for_parameter->minimizer = minimizer;
2805 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2806 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2807 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2808 }
2809 // If reset is true, clear the parameters array.
2810 if (reset && compiled_data->minimize.parameters)
2811 {
2812 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2813 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2814 ccv_array_clear(compiled_data->minimize.parameters);
2815 }
2816 if (!compiled_data->update_nodes)
2817 return;
2818 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2819 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2819, __extension__ __PRETTY_FUNCTION__); }))
;
2820 if (saved_aux_size > old_max_saved_aux_size)
2821 {
2822 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2822, __extension__ __PRETTY_FUNCTION__
); }))
;
2823 // Reallocate first, move them around later.
2824 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2825 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2826 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2827 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2828 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2829 }
2830 int flag = 0;
2831 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2832 if (set_parameters && set_parameter_size)
2833 {
2834 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2835 for (i = 0; i < set_parameter_size; i++)
2836 {
2837 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2838 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2838, __extension__ __PRETTY_FUNCTION__
); }))
;
2839 const int old_rnum = parameter_indices->rnum;
2840 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2841 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2842 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2842, __extension__ __PRETTY_FUNCTION__
); }))
;
2843 if (param_ref >= 0)
2844 {
2845 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2845, __extension__ __PRETTY_FUNCTION__
); }))
;
2846 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2847 parameter_indices->rnum = old_rnum + 1;
2848 }
2849 }
2850 // We may have duplicated indices, but that is OK, we will set it twice.
2851 for (i = 0; i < parameter_indices->rnum; i++)
2852 {
2853 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2854 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2855 flag = 1;
2856 }
2857 ccv_array_free(parameter_indices);
2858 } else {
2859 for (i = 0; i < parameter_size; i++)
2860 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2861 flag = 1;
2862 if (compiled_data->minimize.parameters)
2863 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2864 flag = 1;
2865 }
2866 if (flag)
2867 {
2868 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2869 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2870 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2871 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2872 }
2873}
2874
2875void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2876{
2877 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2878 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2878, __extension__ __PRETTY_FUNCTION__); }))
;
2879 compiled_data->compile_params = compile_params;
2880}
2881
2882void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2883{
2884 if (model->graph && out_size > 0)
2885 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2886 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2887 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2888 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2889 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2890 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2891 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2892}
2893
2894void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2895{
2896 if (model->graph)
2897 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2898}
2899
2900static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2901{
2902 int i;
2903 const int parameter_size = compiled_data->parameters->rnum;
2904 ccv_array_free(compiled_data->parameters);
2905 if (compiled_data->parameter_flags)
2906 ccfreefree(compiled_data->parameter_flags);
2907 const int internal_size = compiled_data->internals->rnum;
2908 ccv_array_free(compiled_data->internals);
2909 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2909, __extension__ __PRETTY_FUNCTION__
); }))
;
2910 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2910, __extension__ __PRETTY_FUNCTION__
); }))
;
2911 for (i = 0; i < parameter_size; i++)
2912 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2913 ccv_array_free(compiled_data->ids.parameters);
2914 for (i = 0; i < internal_size; i++)
2915 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2916 ccv_array_free(compiled_data->ids.internals);
2917 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2918 if (compiled_data->tensors.parameters)
2919 {
2920 for (i = 0; i < parameter_size * parallel_count; i++)
2921 // If it is not marked as not belonging, we can free it.
2922 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2923 if (compiled_data->tensors.parameters[i])
2924 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2925 for (i = 0; i < internal_size * parallel_count; i++)
2926 if (compiled_data->tensors.internals[i])
2927 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2928 ccfreefree(compiled_data->tensors.parameters);
2929 }
2930 if (compiled_data->tensors.gradients)
2931 {
2932 for (i = 0; i < parameter_size * parallel_count; i++)
2933 {
2934 if (compiled_data->tensors.gradients[i])
2935 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2936 if (compiled_data->tensors.accum_gradients[i])
2937 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2938 }
2939 ccfreefree(compiled_data->tensors.gradients);
2940 }
2941 if (compiled_data->minimize.parameters)
2942 {
2943 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2944 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2945 ccv_array_free(compiled_data->minimize.parameters);
2946 }
2947 if (compiled_data->rewindables)
2948 ccv_array_free(compiled_data->rewindables);
2949 if (compiled_data->tensors_init.v)
2950 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
2951 if (compiled_data->evaluate.tos)
2952 ccfreefree(compiled_data->evaluate.tos);
2953 compiled_data->evaluate.tos = 0;
2954 if (compiled_data->stream_map)
2955 {
2956 khiter_t k;
2957 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2958 {
2959 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
2960 continue;
2961 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2962 ccv_nnc_stream_context_free(stream);
2963 }
2964 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
2965 }
2966 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2967 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
2968 _ccv_cnnp_compiled_data_backward_free(compiled_data);
2969 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2970 if (compiled_data->gradient_checkpoints)
2971 {
2972 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
2973 {
2974 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
2975 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 2975, __extension__ __PRETTY_FUNCTION__
); }))
;
2976 ccfreefree(checkpoint->inputs);
2977 ccv_array_free(checkpoint->tensor_symbols);
2978 }
2979 ccv_array_free(compiled_data->gradient_checkpoints);
2980 }
2981 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
2982 ccfreefree(compiled_data);
2983}
2984
2985void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
2986{
2987 if (model->isa->deinit)
2988 model->isa->deinit(model);
2989 if (model->io)
2990 {
2991 int i;
2992 for (i = 0; i < model->io->rnum; i++)
2993 {
2994 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
2995 if (model_io->outgoings)
2996 ccv_array_free(model_io->outgoings);
2997 if (model_io->incomings)
2998 ccv_array_free(model_io->incomings);
2999 if (model_io->dependencies)
3000 ccv_array_free(model_io->dependencies);
3001 ccfreefree(model_io);
3002 }
3003 ccv_array_free(model->io);
3004 }
3005 if (model->parameter_indices)
3006 ccv_array_free(model->parameter_indices);
3007 if (model->inputs)
3008 ccfreefree(model->inputs);
3009 if (model->graph)
3010 ccv_nnc_symbolic_graph_free(model->graph);
3011 if (model->compiled_data)
3012 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3013 if (model->name)
3014 ccfreefree(model->name);
3015 ccfreefree(model);
3016}
3017
3018void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model)
3019{
3020 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
3021 if (!compiled_data)
3022 return;
3023 if (compiled_data->graph)
3024 ccv_nnc_graph_cancel(compiled_data->graph);
3025 if (compiled_data->apply_gradients.graph)
3026 ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph);
3027}