Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2398, column 1
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/18 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -D HAVE_CUDA_SM80 -I /usr/local/include -internal-isystem /usr/local/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-09-15-184933-339151-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6
7// MARK - Level-5 API
8
9ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
10{
11 if (!model->io)
12 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
13 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
14 model_io->param_ref = 0;
15 model_io->param_sel = 0;
16 model_io->visit = 0;
17 model_io->model = model;
18 model_io->dependencies = 0;
19 model_io->dependents = 0;
20 model_io->outgoings = 0;
21 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
22 ccv_array_push(model->io, &model_io);
23 if (input_size > 0)
24 {
25 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
26 ccv_array_resize(model_io->incomings, input_size);
27 int i;
28 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
29 for (i = 0; i < input_size; i++)
30 {
31 if (!inputs[i]->outgoings)
32 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
33 ccv_array_push(inputs[i]->outgoings, &model_io);
34 }
35 } else {
36 model_io->incomings = 0;
37 }
38 return model_io;
39}
40
41void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
42{
43 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 43, __extension__ __PRETTY_FUNCTION__);
}))
;
44 if (!model_io->dependencies)
45 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
46 int i, j;
47 for (i = 0; i < dependency_size; i++)
48 {
49 int flag = 0;
50 // Check if it is already exist or not.
51 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
52 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
53 flag = 1;
54 if (flag)
55 continue;
56 ccv_array_push(model_io->dependencies, dependencies + i);
57 ++dependencies[i]->dependents;
58 }
59}
60
61int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
62{
63 return model->output_size;
64}
65
66int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
67{
68 // If the model is compiled, it is default to 1 unless it is not.
69 if (model->compiled_data)
70 return model->is_trainable >= 0 ? model->is_trainable : 1;
71 return model->is_trainable;
72}
73
74ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
75{
76 if (!model->io)
77 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
78 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
79 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
80 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
81 model_io->visit = 0;
82 model_io->model = model;
83 model_io->outputs = 0;
84 model_io->dependencies = 0;
85 model_io->dependents = 0;
86 model_io->incomings = 0;
87 model_io->outgoings = 0;
88 ccv_array_push(model->io, &model_io);
89 return model_io;
90}
91
92void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
93{
94 model->notify_hook.func = func;
95 model->notify_hook.context = context;
96}
97
98void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
99{
100 if (model->notify_hook.func)
101 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
102 if (model->isa->notify)
103 model->isa->notify(model, tag, payload);
104}
105
106static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
107{
108 int i, j;
109 for (i = 0; i < graph_exec_symbol_size; i++)
110 {
111 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
112 // Check whether this tensor symbol has any duplicate.
113 for (j = i + 1; j < graph_exec_symbol_size;)
114 {
115 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
116 // If there is a same tensor symbol, remove it.
117 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
118 {
119 if (j + 1 < graph_exec_symbol_size)
120 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
121 --graph_exec_symbol_size;
122 continue;
123 }
124 ++j;
125 }
126 }
127 return graph_exec_symbol_size;
128}
129
130void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
131{
132 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
133 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
134 int i;
135 if (!model->parameter_indices)
136 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
137 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
138 {
139 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
140 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
141 {
142 // Only add to parameter_indices if it is trainable.
143 if (add_to_array_context->prefix == 't')
144 ccv_array_add_unique_int(model->parameter_indices, i);
145 // Found it, return, don't add it.
146 return;
147 }
148 }
149 // Only add to parameter_indices if it is trainable.
150 if (add_to_array_context->prefix == 't')
151 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
152 // This is a new one, no need to add_unique_int, it is unique.
153 ccv_array_push(add_to_array_context->symbols, &symbol);
154 if (add_to_array_context->trainables)
155 ccv_array_push(add_to_array_context->trainables, &is_trainable);
156 char id[2048];
157 id[0] = add_to_array_context->prefix;
158 id[1] = '-';
159 int total_len = 2;
160 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
161 {
162 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
163 int len;
164 if (name->name && name->name[0] != '\0')
165 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
166 else
167 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
168 total_len += len;
169 if (total_len >= 2047)
170 break;
171 }
172 if (total_len < 2047)
173 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
174 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 174, __extension__ __PRETTY_FUNCTION__)
; }))
;
175 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
176 memcpy(heap_id, id, total_len + 1);
177 ccv_array_push(add_to_array_context->ids, &heap_id);
178 ++add_to_array_context->sequence->it;
179}
180
181static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
182{
183 compiled_data->f = compiled_data->fits + output_size;
184 compiled_data->xpu_alloc.mp_hdr = -1;
185 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
186 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
187 compiled_data->gradient_checkpoints = gradient_checkpoints;
188}
189
190static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
191{
192 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 192, __extension__ __PRETTY_FUNCTION__); }))
;
193 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
194 int i;
195 for (i = 0; i < input_size; i++)
196 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
197 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
198 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
199 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
200 ccv_cnnp_model_sequence_t model_sequence = {
201 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
202 };
203 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
204 .sequence = &model_sequence,
205 .prefix = 't',
206 .symbols = parameters,
207 .ids = parameter_ids,
208 .trainables = parameter_trainables,
209 };
210 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
211 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
212 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
213 .sequence = &model_sequence,
214 .prefix = 'r',
215 .symbols = internals,
216 .ids = internal_ids,
217 .trainables = 0,
218 };
219 ccv_cnnp_model_build_data_t build_data = {
220 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
221 .model_sequence = &model_sequence,
222 .add_to_array = ccv_cnnp_model_add_to_array,
223 .parameters = parameters,
224 .context = {
225 .add_to_parameter = &add_to_parameter_context,
226 .add_to_output = &add_to_output_context,
227 },
228 .gradient_checkpoints = 0,
229 };
230 model->data = &build_data;
231 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
232 for (i = 0; i < model->output_size; i++)
233 {
234 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
235 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
236 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
237 continue;
238 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
239 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
240 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
241 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
242 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
243 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
244 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
245 }
246 model->data = 0;
247 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
248 if (model_sequence.sequences)
249 ccv_array_free(model_sequence.sequences);
250 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
251 int not_trainables = 0;
252 // Assert no parameter is alias.
253 for (i = 0; i < parameters->rnum; i++)
254 {
255 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
256 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
257 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 257, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
258 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
259 not_trainables = 1;
260 }
261 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 261, __extension__ __PRETTY_FUNCTION__)
; }))
;
262 uint64_t* parameter_flags = 0;
263 if (not_trainables)
264 {
265 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
266 for (i = 0; i < parameter_trainables->rnum; i++)
267 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
268 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
269 }
270 ccv_array_free(parameter_trainables);
271 // Assert no internal is alias.
272 for (i = 0; i < internals->rnum; i++)
273 {
274 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
275 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
276 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 276, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
277 }
278 const int output_size = model->output_size;
279 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
280 const int parameters_rnum = parameters->rnum;
281 if (input_size > 0)
282 {
283 ccv_array_resize(parameters, parameters_rnum + input_size);
284 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
285 }
286 ccv_nnc_symbolic_graph_simplify(model->graph,
287 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
288 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
289 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
290 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
291 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
292 model->outputs, output_size,
293 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
294 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
295 // Size it down.
296 parameters->rnum = parameters_rnum;
297 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
298 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
299 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
300 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 300, __extension__ __PRETTY_FUNCTION__)
; }))
;
301 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
302 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
303 compiled_data->loss = loss;
304 if (loss.cmd == CCV_NNC_NOOP)
305 {
306 // If no loss function provided, there is no fits.
307 for (i = 0; i < output_size; i++)
308 {
309 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
310 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
311 if (alias_to.d < 0)
312 compiled_data->f[i] = model->outputs[i];
313 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
314 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
315 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
316 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
317 int j;
318 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
319 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 319, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
320 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
321 }
322 }
323 } else {
324 for (i = 0; i < output_size; i++)
325 {
326 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
327 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
328 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
329 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
330 }
331 }
332 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
333 ccv_nnc_symbolic_graph_simplify(model->graph,
334 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
335 0, 0, // No need to provide binds at this point.
336 compiled_data->f, model->output_size,
337 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
338 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
339 // If inputs are from GPU, stream type is GPU.
340 compiled_data->parameters = parameters;
341 compiled_data->parameter_flags = parameter_flags;
342 compiled_data->internals = internals;
343 compiled_data->ids.parameters = parameter_ids;
344 compiled_data->ids.internals = internal_ids;
345}
346
347static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
348{
349 ccv_array_t* const stack = (ccv_array_t*)context;
350 ccv_array_push(stack, &symbol.d);
351}
352
353static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
354{
355 const ccv_nnc_tensor_symbol_t src_symbol = {
356 .d = src_index,
357 .graph = src_graph
358 };
359 const ccv_nnc_tensor_symbol_t dest_symbol = {
360 .d = dest_index,
361 .graph = dest_graph
362 };
363 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
364 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
365 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
366 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
367 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
368 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
369}
370
371static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
372{
373 const ccv_nnc_tensor_symbol_t src_symbol = {
374 .d = src_index,
375 .graph = src_graph
376 };
377 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
378 const ccv_nnc_tensor_symbol_t dest_symbol = {
379 .d = dest_index,
380 .graph = dest_graph
381 };
382 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
383 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
384}
385
386static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
387static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
388
389typedef struct {
390 int parallel_count;
391 ccv_nnc_symbolic_graph_t* graph;
392 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
393} ccv_nnc_graph_exec_update_t;
394
395static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
396{
397 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
398 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
399 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
400 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
401 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
402 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
403 const int parallel_count = graph_exec_update->parallel_count;
404 int i;
405 for (i = 1; i < parallel_count; i++)
406 {
407 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
408 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
409 {
410 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
411 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
412 }
413 }
414}
415
416void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
417{
418 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 418, __extension__ __PRETTY_FUNCTION__); }))
;
419 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 419, __extension__ __PRETTY_FUNCTION__)
; }))
;
420 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 420, __extension__ __PRETTY_FUNCTION__); }))
;
421 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
422 init->graph = ccv_nnc_symbolic_graph_new();
423 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
424 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
425 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
426 init->parallel_count = model->parallel_count;
427 init->memory_compression = model->memory_compression;
428 init->memory_reduction = model->memory_reduction;
429 init->gradient_checkpointing = model->gradient_checkpointing;
430 init->compiled_data->stream_type = model->compiled_data->stream_type;
431 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
432 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
433 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
434 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
435 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
436 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
437 int i, j;
438 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
439 for (i = 0; i < compiled_data->parameters->rnum; i++)
440 {
441 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
442 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 442, __extension__ __PRETTY_FUNCTION__)
; }))
;
443 }
444 for (i = 0; i < compiled_data->internals->rnum; i++)
445 {
446 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
447 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 447, __extension__ __PRETTY_FUNCTION__)
; }))
;
448 }
449 // Update inputs.
450 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 450, __extension__ __PRETTY_FUNCTION__)
; }))
;
451 for (i = 0; i < model->input_size; i++)
452 if (model->inputs[i].d >= 0)
453 {
454 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 454, __extension__ __PRETTY_FUNCTION__)
; }))
;
455 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
456 }
457 // Update outputs.
458 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 458, __extension__ __PRETTY_FUNCTION__)
; }))
;
459 for (i = 0; i < model->output_size; i++)
460 {
461 if (model->outputs[i].d >= 0)
462 {
463 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 463, __extension__
__PRETTY_FUNCTION__); }))
;
464 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
465 }
466 if (model->outputs[i].d != model->compiled_data->f[i].d)
467 {
468 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 468, __extension__ __PRETTY_FUNCTION__)
; }))
;
469 if (model->compiled_data->f[i].d >= 0)
470 {
471 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 471, __extension__ __PRETTY_FUNCTION__)
; }))
;
472 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
473 }
474 }
475 }
476 // Go through the graph to set tensor on matching symbols
477 for (i = 0; i < stack->rnum; i++)
478 {
479 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
480 // If exceed range, skip.
481 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
482 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
483 continue;
484 const ccv_nnc_graph_exec_symbol_t src_symbol = {
485 .d = d,
486 .graph = init->graph
487 };
488 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
489 .d = d,
490 .graph = model->graph
491 };
492 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
493 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
494 // If the name doesn't match, skip.
495 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
496 continue;
497 // Now get all the inputs and outputs, if matches, set them.
498 const int* src_inputs;
499 int src_input_size;
500 const int* src_outputs;
501 int src_output_size;
502 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
503 const int* dest_inputs;
504 int dest_input_size;
505 const int* dest_outputs;
506 int dest_output_size;
507 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
508 // We may have unmatched input / output size because this is the minimizer and it has
509 // different saved_aux (for example, when we shrunk with CMD_NOOP).
510 if (src_input_size != dest_input_size)
511 continue;
512 if (src_output_size != dest_output_size)
513 continue;
514 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
515 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
516 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
517 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
518 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
519 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
520 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
521 // a new exec symbol.
522 for (j = 0; j < src_input_size; j++)
523 if (src_inputs[j] >= 0)
524 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
525 for (j = 0; j < src_output_size; j++)
526 if (src_outputs[j] >= 0)
527 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
528 }
529 ccv_array_free(stack);
530 // After this, we get all tensors in the model graph resolved through tensor_auto.
531 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
532 // Verify symbols we get matches.
533 const int parameter_size = compiled_data->parameters->rnum;
534 for (i = 0; i < parameter_size; i++)
535 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 535, __extension__ __PRETTY_FUNCTION__)
; }))
; }
536 const int internal_size = compiled_data->internals->rnum;
537 for (i = 0; i < internal_size; i++)
538 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 538, __extension__ __PRETTY_FUNCTION__)
; }))
; }
539 // Go through compiled data.
540 if (compiled_data->tensor_arena)
541 {
542 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
543 if (flag == 0 && compiled_data->graph_exec_arena)
544 {
545 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
546 // Since we will reinit, if we previously set is_test, we need to set it again.
547 if (compiled_data->is_test)
548 {
549 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
550 ccv_nnc_graph_exec_update_t update = {
551 .parallel_count = parallel_count,
552 .graph = model->graph,
553 .graph_exec_arena = compiled_data->graph_exec_arena,
554 };
555 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
556 }
557 } else
558 // Free-up tensor arena & graph exec arena.
559 _ccv_cnnp_compiled_data_graph_free(compiled_data);
560 }
561 // There are other compiled graphs, for accum and apply gradients.
562 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
563 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
564 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
565 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
566 // That is why we don't update these compiled graphs at all this point.
567 // Free the model, we've already "absorbed" it.
568 ccv_cnnp_model_free(init);
569}
570
571void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
572{
573 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 573, __extension__ __PRETTY_FUNCTION__)
; }))
;
574 if (model->input_size == 0)
575 model->input_size = input_size;
576 if (!model->graph) // The graph is not compiled yet.
577 {
578 model->graph = ccv_nnc_symbolic_graph_new();
579 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
580 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 580, __extension__ __PRETTY_FUNCTION__)
; }))
;
581 int i, flag = 0;
582 for (i = 0; !flag && i < input_size; i++)
583 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
584 // If inputs are from GPU, stream type is GPU.
585 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
586 model->compiled_data->minimize.minimizer = minimizer;
587 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
588 } else {
589 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
590 // And then absorb the "new model" to the old one.
591 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
592 ccv_cnnp_model_absorb(model, init, inputs, input_size);
593 // Reset minimizer.
594 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
595 }
596}
597
598ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
599{
600 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
601 new_model->is_trainable = is_trainable;
602 return new_model;
603}
604
605void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
606{
607 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 607, __extension__ __PRETTY_FUNCTION__); }))
;
608 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 608, __extension__ __PRETTY_FUNCTION__)
; }))
;
609 ccv_nnc_symbolic_graph_t* const graph = model->graph;
610 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
611 int i;
612 for (i = 0; i < output_size; i++)
613 {
614 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 614, __extension__ __PRETTY_FUNCTION__)
; }))
;
615 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
616 }
617}
618
619void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
620{
621 if (workspace_size == model->workspace_size)
622 return;
623 model->workspace_size = workspace_size;
624 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
625 if (compiled_data && compiled_data->graph)
626 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
627}
628
629size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
630{
631 return model->workspace_size;
632}
633
634void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
635{
636 if (parallel == 0)
637 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
638 else
639 model->parallel_count = parallel;
640 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
641 if (compiled_data)
642 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 642, __extension__ __PRETTY_FUNCTION__)
; }))
; }
643}
644
645void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
646{
647 model->max_stream_count = max_stream_count;
648 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
649 if (compiled_data)
650 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 650, __extension__ __PRETTY_FUNCTION__)
; }))
; }
651}
652
653void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
654{
655 model->memory_compression = memory_compression;
656 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
657 if (compiled_data)
658 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 658, __extension__ __PRETTY_FUNCTION__)
; }))
; }
659}
660
661void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
662{
663 model->memory_reduction = memory_reduction;
664 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
665 if (compiled_data)
666 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 666, __extension__ __PRETTY_FUNCTION__)
; }))
; }
667}
668
669void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
670{
671 model->gradient_checkpointing = gradient_checkpointing;
672}
673
674int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
675{
676 return model->gradient_checkpointing;
677}
678
679typedef struct {
680 int parallel_count;
681 ccv_nnc_symbolic_graph_t* graph;
682 ccv_cnnp_compiled_data_t* compiled_data;
683 ccv_nnc_tensor_arena_t* tensor_arena;
684} ccv_nnc_tensor_init_states_t;
685
686static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
687{
688 int i;
689 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
690 for (i = 0; i < compiled_data->parameters->rnum; i++)
691 {
692 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
693 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
694 return 1;
695 }
696 for (i = 0; i < compiled_data->internals->rnum; i++)
697 {
698 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
699 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
700 return 1;
701 }
702 return 0;
703}
704
705static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
706{
707 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
708 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
709 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
710 if (!output_tensor)
711 return;
712 const int d = output_symbol.d;
713 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 713, __extension__ __PRETTY_FUNCTION__)
; }))
;
714 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
715 if (init_v[d >> 5] & (1u << (d & 0x1f)))
716 return;
717 init_v[d >> 5] |= (1u << (d & 0x1f));
718 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
719 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
720 const int parallel_count = tensor_init_states->parallel_count;
721 int i;
722 for (i = 1; i < parallel_count; i++)
723 {
724 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
725 if (copy)
726 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
727 }
728}
729
730// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
731// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
732static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
733{
734 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 734, __extension__ __PRETTY_FUNCTION__); }))
;
735 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 735, __extension__ __PRETTY_FUNCTION__)
; }))
;
736 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
737 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 737, __extension__
__PRETTY_FUNCTION__); }))
;
738 int i;
739 for (i = 0; i < compiled_data->rewindables->rnum; i++)
740 {
741 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
742 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
743 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
744 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
745 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
746 }
747 ccv_array_clear(compiled_data->rewindables);
748 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
749}
750
751static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
752{
753 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
754 .type = CCV_CNNP_REWIND_TENSOR,
755 .tensor = symbol
756 };
757 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
758 ccv_array_push(rewind_symbols, &rewind_symbol);
759}
760
761static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
762{
763 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
764 .type = CCV_CNNP_REWIND_TENSOR,
765 .tensor = symbol
766 };
767 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
768 ccv_array_push(rewind_symbols, &rewind_symbol);
769}
770
771static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
772{
773 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
774 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
775 .graph_exec = symbol
776 };
777 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
778 ccv_array_push(rewind_symbols, &rewind_symbol);
779}
780
781static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
782{
783 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
784 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
785 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
786 int i;
787 for (i = 1; i < parallel_count; i++)
788 {
789 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
790 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
791 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
792 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
793 }
794}
795
796static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
797{
798 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 798, __extension__ __PRETTY_FUNCTION__); }))
;
799 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 799, __extension__ __PRETTY_FUNCTION__); }))
;
800 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
801 int i;
802 for (i = 1; i < parallel_count; i++)
803 {
804 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
805 if (copy_symbol.graph)
806 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
807 }
808 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
809 if (graph_exec_arena)
810 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
811 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
812 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
813 if (gradient_graph_exec_arena)
814 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
815}
816
817static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
818{
819 int this_parameter_flag = 0;
820 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
821 return this_parameter_flag;
822 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
823 int j, k;
824 // For no-op, we can preserve previous saved_aux_size.
825 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
826 {
827 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
828 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
829 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
830 // make sure some model parameters don't update if we don't want them to.
831 int old_saved_aux_size;
832 if (old_minimizer.cmd == CCV_NNC_NOOP)
833 {
834 int input_size;
835 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
836 if (input_size < 2) // This is not legit.
837 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
838 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
839 old_saved_aux_size = input_size - 2;
840 } else
841 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
842 if (old_saved_aux_size != saved_aux_size)
843 {
844 this_parameter_flag = 1;
845 if (saved_aux_size > old_saved_aux_size)
846 {
847 // Allocate new tensor symbols.
848 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
849 for (j = old_saved_aux_size; j < saved_aux_size; j++)
850 {
851 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
852 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
853 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
854 for (k = 1; k < parallel_count; k++)
855 {
856 ccv_nnc_tensor_param_t dev_info = info;
857 if (k != device_id)
858 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
859 else
860 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
861 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
862 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
863 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
864 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
865 }
866 }
867 } else {
868 for (j = saved_aux_size; j < old_saved_aux_size; j++)
869 {
870 for (k = 1; k < parallel_count; k++)
871 {
872 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
873 if (src_copy.d >= 0)
874 {
875 ccv_nnc_tensor_symbol_free(graph, src_copy);
876 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
877 }
878 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
879 if (dest_copy.d >= 0)
880 {
881 ccv_nnc_tensor_symbol_free(graph, dest_copy);
882 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
883 }
884 }
885 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
886 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
887 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
888 }
889 }
890 }
891 }
892 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
893 if (this_parameter_flag)
894 {
895 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
896 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
897 const int* inputs = 0;
898 int input_size = 0;
899 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
900 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 900, __extension__ __PRETTY_FUNCTION__)
; }))
;
901 update_inputs[0].d = inputs[0];
902 update_inputs[0].graph = graph;
903 update_inputs[1].d = inputs[1];
904 update_inputs[1].graph = graph;
905 update_outputs[0] = updated_parameters[parameter_indice];
906 for (j = 0; j < saved_aux_size; j++)
907 {
908 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
909 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
910 }
911 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
912 for (k = 1; k < parallel_count; k++)
913 {
914 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
915 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 915, __extension__ __PRETTY_FUNCTION__); }))
;
916 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
917 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 917, __extension__ __PRETTY_FUNCTION__)
; }))
;
918 update_inputs[0].d = inputs[0];
919 update_inputs[0].graph = graph;
920 update_inputs[1].d = inputs[1];
921 update_inputs[1].graph = graph;
922 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
923 for (j = 0; j < saved_aux_size; j++)
924 {
925 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
926 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
927 }
928 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
929 }
930 }
931 return this_parameter_flag;
932}
933
934typedef struct {
935 int parameter_size;
936 ccv_nnc_cmd_t minimizer;
937 ccv_cnnp_model_io_t parameters[1];
938} ccv_cnnp_set_minimizer_for_parameter_t;
939
940static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
941{
942 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
943 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 943, __extension__ __PRETTY_FUNCTION__); }))
;
944 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
945 // We update all parameters, at this point, we have one minimizer.
946 const int parameter_size = compiled_data->parameters->rnum;
947 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
948 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
949 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 949, __extension__ __PRETTY_FUNCTION__); }))
;
950 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
951 ccv_array_t* const parameters = compiled_data->minimize.parameters;
952 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
953 int i, j, flag = 0;
954 for (i = 0; i < parameters->rnum; i++)
955 {
956 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
957 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
958 {
959 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
960 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 960, __extension__ __PRETTY_FUNCTION__)
; }))
;
961 const int old_rnum = parameter_indices->rnum;
962 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
963 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
964 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 964, __extension__ __PRETTY_FUNCTION__)
; }))
;
965 if (param_ref >= 0)
966 {
967 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 967, __extension__ __PRETTY_FUNCTION__)
; }))
;
968 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
969 parameter_indices->rnum = old_rnum + 1;
970 }
971 }
972 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
973 // We may have duplicated indices, but that is OK, we will set it twice.
974 for (j = 0; j < parameter_indices->rnum; j++)
975 {
976 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
977 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 977, __extension__ __PRETTY_FUNCTION__)
; }))
;
978 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
979 flag = 1;
980 }
981 ccv_array_clear(parameter_indices);
982 }
983 ccv_array_free(parameter_indices);
984 return flag;
985}
986
987static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
988{
989 if (new_saved_aux_size == old_saved_aux_size)
990 return;
991 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 991, __extension__ __PRETTY_FUNCTION__)
; }))
;
992 int i, j;
993 for (i = parameter_size - 1; i >= 0; i--)
994 {
995 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
996 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
997 for (j = old_saved_aux_size - 1; j >= 0; j--)
998 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
999 }
1000}
1001
1002static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1003{
1004 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1005 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1005, __extension__ __PRETTY_FUNCTION__); }))
;
1006 if (!compiled_data->rewindables)
1007 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1008 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1009 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1010 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1011}
1012
1013static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1014{
1015 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1016 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1016, __extension__ __PRETTY_FUNCTION__
); }))
;
1017 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1017, __extension__ __PRETTY_FUNCTION__
); }))
;
1018 const int evaluate_to_size = compiled_data->evaluate.to_size;
1019 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1019, __extension__ __PRETTY_FUNCTION__
); }))
;
1020 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1021 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1022 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1023 int i, j;
1024 const int output_size = model->output_size;
1025 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1025, __extension__ __PRETTY_FUNCTION__
); }))
;
1026 if (fits)
1027 for (i = 0; i < output_size; i++)
1028 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1029 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1030 const int parameter_size = compiled_data->parameters->rnum;
1031 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1032 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1033 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1034 int parameter_size_maybe_more = parameter_size;
1035 compiled_data->disable_outgrad = disable_outgrad;
1036 int outgrad_size;
1037 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1038 outgrad_size = 0;
1039 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1040 outgrad_size = model->input_size;
1041 else {
1042 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1042, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1043 outgrad_size = 0;
1044 for (i = 0; i < model->input_size; i++)
1045 if (!(disable_outgrad & ((uint64_t)1 << i)))
1046 ++outgrad_size;
1047 }
1048 compiled_data->outgrad_size = outgrad_size;
1049 parameter_size_maybe_more += outgrad_size;
1050 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1051 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1052 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1053 compiled_data->backward.to_size = parameter_size_maybe_more;
1054 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1055 if (compiled_data->parameter_flags)
1056 {
1057 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1058 for (i = 0; i < parameter_size; i++)
1059 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1060 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1061 else
1062 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1063 }
1064 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1065 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1066 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1067 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1068 else { // Compute minimize with gradients including selected inputs.
1069 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1069, __extension__ __PRETTY_FUNCTION__
); }))
;
1070 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1070, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1071 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1071, __extension__ __PRETTY_FUNCTION__
); }))
;
1072 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1073 j = 0;
1074 for (i = 0; i < model->input_size; i++)
1075 if (!(disable_outgrad & ((uint64_t)1 << i)))
1076 outgrads[j++] = model->inputs[i];
1077 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1078 }
1079 if (compiled_data->parameter_flags)
1080 ccfreefree(parameters);
1081 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1082 if (compiled_data->minimize.parameters)
1083 _ccv_cnnp_apply_parameters_with_minimizer(model);
1084 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1085 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1086 for (i = 0; i < output_size; i++)
1087 {
1088 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1089 // Init this to 1 so we can backprop.
1090 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1091 }
1092 compiled_data->backward.to_size = 0;
1093 for (i = 0; i < parameter_size_maybe_more; i++)
1094 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1095 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1096 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1097 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1098 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1099 {
1100 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1101 continue;
1102 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1103 const int* tos;
1104 int to_size;
1105 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1106 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1107 {
1108 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1109 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1110 int flag = 0;
1111 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1112 for (j = i - 1; !flag && j >= 0; j--)
1113 if (j + outgrad_destination_start < destination_count)
1114 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1115 if (!flag) // Only if we cannot find it, we add it.
1116 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1117 }
1118 }
1119 if (parallel_count > 1)
1120 {
1121 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1122 0, 0,
1123 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1124 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1125 0, 0, 0,
1126 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1127 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1128 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1129 for (i = 0; i < evaluate_to_size; i++)
1130 for (j = 1; j < parallel_count; j++)
1131 {
1132 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1133 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1134 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1135 }
1136 const int backward_to_size = compiled_data->backward.to_size;
1137 for (i = 0; i < backward_to_size; i++)
1138 for (j = 1; j < parallel_count; j++)
1139 {
1140 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1141 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1142 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1143 }
1144 }
1145 // Only use memory compression if we are in gradient parameter mode.
1146 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1147 {
1148 if (model->memory_compression)
1149 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1150 if (model->memory_reduction)
1151 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1152 }
1153 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1154 compiled_data->gradient_mode = gradient_mode;
1155}
1156
1157void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1158{
1159 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1159, __extension__ __PRETTY_FUNCTION__
); }))
;
1160 const int parameter_size = compiled_data->parameters->rnum;
1161 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1162 const int internal_size = compiled_data->internals->rnum;
1163 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1164 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1165 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1166 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1167}
1168
1169int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1170{
1171 int i, j;
1172 const int parameter_size = compiled_data->parameters->rnum;
1173 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1174 const int internal_size = compiled_data->internals->rnum;
1175 for (i = 0; i < parameter_size; i++)
1176 {
1177 // parameters has to be allocated all together.
1178 if (compiled_data->tensors.parameters[i])
1179 {
1180 for (j = 1; j < parallel_count; j++)
1181 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1181, __extension__ __PRETTY_FUNCTION__
); }))
; }
1182 continue;
1183 }
1184 return 1;
1185 }
1186 for (i = 0; i < internal_size; i++)
1187 {
1188 if (!compiled_data->tensors.internals[i])
1189 return 1;
1190 for (j = 1; j < parallel_count; j++)
1191 if (!compiled_data->tensors.internals[i + j * internal_size])
1192 return 1;
1193 }
1194 return 0;
1195}
1196
1197void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1198{
1199 int i, j;
1200 const int parameter_size = compiled_data->parameters->rnum;
1201 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1202 const int internal_size = compiled_data->internals->rnum;
1203 for (i = 0; i < parameter_size; i++)
1204 {
1205 // parameters has to be allocated all together.
1206 if (compiled_data->tensors.parameters[i])
1207 {
1208 for (j = 1; j < parallel_count; j++)
1209 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1209, __extension__ __PRETTY_FUNCTION__
); }))
; }
1210 continue;
1211 }
1212 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1213 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1214 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1215 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1216 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1217 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1218 for (j = 1; j < parallel_count; j++)
1219 {
1220 if (j != device_id)
1221 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1222 else
1223 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1224 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1225 }
1226 }
1227 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1228 for (i = 0; i < internal_size; i++)
1229 {
1230 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1231 const int d = retained.d;
1232 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1233 continue;
1234 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1235 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1236 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1237 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1238 if (!compiled_data->tensors.internals[i])
1239 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1240 for (j = 1; j < parallel_count; j++)
1241 {
1242 if (j != device_id)
1243 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1244 else
1245 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1246 if (!compiled_data->tensors.internals[i + j * internal_size])
1247 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1248 }
1249 }
1250 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1251}
1252
1253static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1254{
1255 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1256 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1257}
1258
1259static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1260{
1261 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1261, __extension__ __PRETTY_FUNCTION__
); }))
;
1262 int i, j;
1263 for (i = 0; i < tensor_size; i++)
1264 {
1265 if (!tensors[i])
1266 continue;
1267 const int d = tensor_symbols[i].d;
1268 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1269 continue;
1270 for (j = 1; j < parallel_count; j++)
1271 if (tensors[i + j * tensor_size])
1272 {
1273 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1274 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1275 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1276 }
1277 }
1278}
1279
1280static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1281{
1282 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1282, __extension__ __PRETTY_FUNCTION__
); }))
;
1283 int i, j;
1284 for (i = 0; i < tensor_size; i++)
1285 {
1286 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1287 for (j = 1; j < parallel_count; j++)
1288 {
1289 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1290 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1291 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1292 { // We shouldn't allocate this, free it up.
1293 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1294 tensors[i + j * tensor_size] = 0;
1295 }
1296 }
1297 }
1298}
1299
1300static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1301{
1302 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1302, __extension__ __PRETTY_FUNCTION__
); }))
;
1303 int i, j;
1304 for (i = 0; i < tensor_size; i++)
1305 {
1306 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1307 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1308 continue;
1309 if (graph)
1310 {
1311 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1312 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1313 tensor_symbol = alias_to;
1314 }
1315 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1316 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1317 {
1318 const ccv_nnc_tensor_bind_t retained_bind = {
1319 .symbol = tensor_symbol,
1320 .tensor = tensor
1321 };
1322 ccv_array_push(tensor_binds, &retained_bind);
1323 }
1324 for (j = 1; j < parallel_count; j++)
1325 {
1326 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1327 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1328 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1329 {
1330 const ccv_nnc_tensor_bind_t bind = {
1331 .symbol = copy,
1332 .tensor = tensors[i + j * tensor_size]
1333 };
1334 ccv_array_push(tensor_binds, &bind);
1335 }
1336 }
1337 }
1338}
1339
1340static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1341{
1342 if (compiled_data->graph)
1343 ccv_nnc_graph_free(compiled_data->graph);
1344 compiled_data->graph = 0;
1345 compiled_data->is_test = 0;
1346 if (compiled_data->tensor_arena)
1347 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1348 compiled_data->tensor_arena = 0;
1349 if (compiled_data->graph_exec_arena)
1350 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1351 compiled_data->graph_exec_arena = 0;
1352 if (compiled_data->backward.from_ops)
1353 ccfreefree(compiled_data->backward.from_ops);
1354 compiled_data->backward.from_ops = 0;
1355 if (compiled_data->evaluate.schedule)
1356 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1357 compiled_data->evaluate.schedule = 0;
1358 if (compiled_data->backward.schedule)
1359 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1360 compiled_data->backward.schedule = 0;
1361}
1362
1363static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1364{
1365 if (compiled_data->gradients)
1366 ccfreefree(compiled_data->gradients);
1367 compiled_data->gradients = 0;
1368 if (compiled_data->updated_parameters)
1369 ccfreefree(compiled_data->updated_parameters);
1370 compiled_data->updated_parameters = 0;
1371 compiled_data->update_nodes = 0;
1372 compiled_data->saved_aux = 0;
1373}
1374
1375static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1376{
1377 if (compiled_data->backward.gradients)
1378 ccfreefree(compiled_data->backward.gradients);
1379 compiled_data->backward.gradients = 0;
1380 if (compiled_data->backward.accum)
1381 ccv_nnc_graph_free(compiled_data->backward.accum);
1382 compiled_data->backward.accum = 0;
1383 if (compiled_data->backward.tensor_arena)
1384 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1385 compiled_data->backward.tensor_arena = 0;
1386 if (compiled_data->backward.graph_exec_arena)
1387 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1388 compiled_data->backward.graph_exec_arena = 0;
1389}
1390
1391static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1392{
1393 if (compiled_data->apply_gradients.graph)
1394 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1395 compiled_data->apply_gradients.graph = 0;
1396 if (compiled_data->apply_gradients.tensor_arena)
1397 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1398 compiled_data->apply_gradients.tensor_arena = 0;
1399 if (compiled_data->apply_gradients.graph_exec_arena)
1400 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1401 compiled_data->apply_gradients.graph_exec_arena = 0;
1402}
1403
1404// Compile the graph to run ccv_cnnp_model_fit
1405static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1406{
1407 int i, j;
1408 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1409 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1409, __extension__ __PRETTY_FUNCTION__
); }))
;
1410 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1411 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1412 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1412, __extension__ __PRETTY_FUNCTION__
); }))
;
1413 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1413
, __extension__ __PRETTY_FUNCTION__); }))
;
1414 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1414, __extension__ __PRETTY_FUNCTION__
); }))
;
1415 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1416 {
1417 _ccv_cnnp_model_set_rewindables(model);
1418 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1419 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1420 _ccv_cnnp_model_rewind_graph(model);
1421 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1422 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1423 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1424 }
1425 const int tensors_init = !!compiled_data->tensors_init.v;
1426 if (!tensors_init)
1427 _ccv_cnnp_model_tensors_init(model, compiled_data);
1428 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1429 // Check if it is not fully allocated, if it is not, init_1.
1430 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1431 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1432 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1432, __extension__ __PRETTY_FUNCTION__); }))
;
1433 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1433, __extension__ __PRETTY_FUNCTION__); }))
;
1434 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1434
, __extension__ __PRETTY_FUNCTION__); }))
;
1435 const int input_size_per_p = input_size / parallel_count;
1436 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1437 const int output_size_per_p = output_size / parallel_count;
1438 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1439 const int fit_size_per_p = fit_size / parallel_count;
1440 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1441 const int parameter_size = compiled_data->parameters->rnum;
1442 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1443 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1444 const int internal_size = compiled_data->internals->rnum;
1445 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1446 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1447 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1448 ccv_array_free(tensor_binds);
1449 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1450 if (tensors_init && parallel_count > 1)
1451 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1452 // If tensor is not init'ed, we need to init states first.
1453 if (_ccv_cnnp_any_to_init(compiled_data))
1454 {
1455 ccv_nnc_tensor_init_states_t tensor_init_states = {
1456 .parallel_count = parallel_count,
1457 .graph = model->graph,
1458 .compiled_data = compiled_data,
1459 .tensor_arena = compiled_data->tensor_arena
1460 };
1461 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1462 }
1463 compiled_data->is_test = 0;
1464 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1465 // No need to set because it is default to training mode.
1466 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1467 for (i = 0; i < saved_aux_size * parameter_size; i++)
1468 {
1469 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1470 continue;
1471 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1472 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1473 for (j = 1; j < parallel_count; j++)
1474 {
1475 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1476 if (copy)
1477 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1478 }
1479 }
1480 const int evaluate_to_size = compiled_data->evaluate.to_size;
1481 compiled_data->evaluate.to_op_size = 0;
1482 for (i = 0; i < evaluate_to_size; i++)
1483 {
1484 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1485 if (to.graph)
1486 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1487 }
1488 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1489 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1490}
1491
1492ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1493{
1494 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1495 if (!compiled_data || !compiled_data->graph)
1496 return 0;
1497 return ccv_nnc_graph_default_stream(compiled_data->graph);
1498}
1499
1500uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1501{
1502 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1503 if (!compiled_data || !compiled_data->tensor_arena)
1504 return 0;
1505 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1506}
1507
1508static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1509{
1510 int i, j;
1511 for (i = 0; i < tensor_size; i++)
1512 {
1513 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1514 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1515 continue;
1516 if (graph)
1517 {
1518 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1519 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1520 tensor_symbol = alias_to;
1521 }
1522 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1523 for (j = 1; j < parallel_count; j++)
1524 {
1525 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1526 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1527 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1528 }
1529 }
1530}
1531
1532void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1533{
1534 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1535 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1535, __extension__ __PRETTY_FUNCTION__); }))
;
1536 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1537 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1537, __extension__ __PRETTY_FUNCTION__
); }))
;
1538 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1538, __extension__ __PRETTY_FUNCTION__
); }))
;
1539 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1539
, __extension__ __PRETTY_FUNCTION__); }))
;
1540 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1540, __extension__ __PRETTY_FUNCTION__); }))
;
1541 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1542 {
1543 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1544 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1545 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1546 // Compile the symbolic graph down only when needed.
1547 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1548 } else {
1549 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1549, __extension__ __PRETTY_FUNCTION__); }))
;
1550 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1550, __extension__ __PRETTY_FUNCTION__); }))
;
1551 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1551
, __extension__ __PRETTY_FUNCTION__); }))
;
1552 const int input_size_per_p = input_size / parallel_count;
1553 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1554 const int output_size_per_p = output_size / parallel_count;
1555 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1556 const int fit_size_per_p = fit_size / parallel_count;
1557 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1558 }
1559 if (compiled_data->is_test)
1560 {
1561 compiled_data->is_test = 0;
1562 ccv_nnc_graph_exec_update_t update = {
1563 .parallel_count = parallel_count,
1564 .graph = model->graph,
1565 .graph_exec_arena = compiled_data->graph_exec_arena,
1566 };
1567 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1568 }
1569 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1570}
1571
1572// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1573static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1574{
1575 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1576 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1577 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1578 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1578, __extension__ __PRETTY_FUNCTION__
); }))
;
1579 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1579, __extension__ __PRETTY_FUNCTION__
); }))
;
1580 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1581 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1582 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1583 {
1584 const int evaluate_to_size = compiled_data->evaluate.to_size;
1585 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1586 _ccv_cnnp_model_set_rewindables(model);
1587 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1588 0, 0,
1589 0, 0, 0,
1590 0, 0, 0,
1591 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1592 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1593 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1594 int i, j;
1595 for (i = 0; i < evaluate_to_size; i++)
1596 for (j = 1; j < parallel_count; j++)
1597 {
1598 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1599 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1600 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1601 }
1602 }
1603 const int tensors_init = !!compiled_data->tensors_init.v;
1604 if (!tensors_init)
1605 _ccv_cnnp_model_tensors_init(model, compiled_data);
1606 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1607 // Check if it is not fully allocated, if it is not, init_1.
1608 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1609 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1610 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1610, __extension__ __PRETTY_FUNCTION__); }))
;
1611 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1611, __extension__ __PRETTY_FUNCTION__); }))
;
1612 const int input_size_per_p = input_size / parallel_count;
1613 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1614 const int output_size_per_p = output_size / parallel_count;
1615 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1616 const int parameter_size = compiled_data->parameters->rnum;
1617 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1618 const int internal_size = compiled_data->internals->rnum;
1619 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1620 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1621 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1622 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1623 ccv_array_free(tensor_binds);
1624 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1625 // If tensor is not init'ed, we need to init states first.
1626 if (tensors_init && parallel_count > 1)
1627 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1628 if (_ccv_cnnp_any_to_init(compiled_data))
1629 {
1630 ccv_nnc_tensor_init_states_t tensor_init_states = {
1631 .parallel_count = parallel_count,
1632 .graph = model->graph,
1633 .compiled_data = compiled_data,
1634 .tensor_arena = compiled_data->tensor_arena
1635 };
1636 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1637 }
1638 compiled_data->is_test = 1;
1639 ccv_nnc_graph_exec_update_t update = {
1640 .parallel_count = parallel_count,
1641 .graph = model->graph,
1642 .graph_exec_arena = compiled_data->graph_exec_arena,
1643 };
1644 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1645 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1646 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1647}
1648
1649static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1650{
1651 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1651, __extension__ __PRETTY_FUNCTION__
); }))
;
1652 const int parameter_size = compiled_data->parameters->rnum;
1653 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1654 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1655 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1656 int i, j;
1657 for (i = 0; i < parameter_size; i++)
1658 {
1659 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1660 {
1661 compiled_data->tensors.gradients[i] = 0;
1662 compiled_data->tensors.accum_gradients[i] = 0;
1663 for (j = 1; j < parallel_count; j++)
1664 {
1665 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1666 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1667 }
1668 continue;
1669 }
1670 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1671 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1672 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1673 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1674 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1675 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1676 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1677 for (j = 1; j < parallel_count; j++)
1678 {
1679 if (j != device_id)
1680 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1681 else
1682 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1683 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1684 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1685 }
1686 }
1687}
1688
1689static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1690{
1691 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1692 return 1;
1693 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1694 return 0;
1695 int i;
1696 for (i = 0; i < input_size; i++)
1697 if (!(disable_outgrad & ((uint64_t)1 << i)))
1698 return 0;
1699 return 1;
1700}
1701
1702// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1703// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1704static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1705{
1706 int i, j;
1707 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1708 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1709 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1709, __extension__ __PRETTY_FUNCTION__
); }))
;
1710 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1711 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1712 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1712, __extension__ __PRETTY_FUNCTION__
); }))
;
1713 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1713, __extension__ __PRETTY_FUNCTION__
); }))
;
1714 // There shouldn't be a loss function if we evaluate with multistage jit.
1715 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1715, __extension__ __PRETTY_FUNCTION__
); }))
;
1716 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1717 {
1718 _ccv_cnnp_model_set_rewindables(model);
1719 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1720 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1721 _ccv_cnnp_model_rewind_graph(model);
1722 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1723 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1724 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1725 }
1726 const int tensors_init = !!compiled_data->tensors_init.v;
1727 if (!tensors_init)
1728 _ccv_cnnp_model_tensors_init(model, compiled_data);
1729 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1730 // Check if it is not fully allocated, if it is not, init_1.
1731 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1732 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1733 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1733, __extension__ __PRETTY_FUNCTION__); }))
;
1734 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1734, __extension__ __PRETTY_FUNCTION__); }))
;
1735 const int input_size_per_p = input_size / parallel_count;
1736 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1737 const int output_size_per_p = output_size / parallel_count;
1738 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1739 const int parameter_size = compiled_data->parameters->rnum;
1740 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1741 const int internal_size = compiled_data->internals->rnum;
1742 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1743 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1744 if (!compiled_data->tensors.gradients)
1745 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1746 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1747 if (compiled_data->backward.to_size > 0)
1748 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1749 else
1750 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1751 ccv_array_free(tensor_binds);
1752 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1753 if (tensors_init && parallel_count > 1)
1754 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1755 // If tensor is not init'ed, we need to init states first.
1756 if (_ccv_cnnp_any_to_init(compiled_data))
1757 {
1758 ccv_nnc_tensor_init_states_t tensor_init_states = {
1759 .parallel_count = parallel_count,
1760 .graph = model->graph,
1761 .compiled_data = compiled_data,
1762 .tensor_arena = compiled_data->tensor_arena
1763 };
1764 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1765 }
1766 compiled_data->is_test = is_test;
1767 ccv_nnc_graph_exec_update_t update = {
1768 .parallel_count = parallel_count,
1769 .graph = model->graph,
1770 .graph_exec_arena = compiled_data->graph_exec_arena,
1771 };
1772 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1773 const int evaluate_to_size = compiled_data->evaluate.to_size;
1774 compiled_data->evaluate.to_op_size = 0;
1775 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1776 for (i = 0; i < evaluate_to_size; i++)
1777 {
1778 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1779 if (to_op.graph)
1780 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1781 const int* tos;
1782 int to_size;
1783 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1784 for (j = 0; j < to_size; j++)
1785 {
1786 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1787 .d = tos[j],
1788 .graph = model->graph
1789 });
1790 if (to_op.graph)
1791 ccv_array_add_unique_int(backward_from, to_op.d);
1792 }
1793 }
1794 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1794, __extension__
__PRETTY_FUNCTION__); }))
;
1795 compiled_data->backward.from_op_size = backward_from->rnum;
1796 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1797 for (i = 0; i < backward_from->rnum; i++)
1798 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1799 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1800 .graph = compiled_data->graph,
1801 };
1802 ccv_array_free(backward_from);
1803 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1804 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1805}
1806
1807void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1808{
1809 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1810 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1810, __extension__ __PRETTY_FUNCTION__); }))
;
1811 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1812 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__
); }))
;
1813 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1813, __extension__ __PRETTY_FUNCTION__
); }))
;
1814 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1814, __extension__ __PRETTY_FUNCTION__); }))
;
1815 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1816 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1817 if (!compiled_data->graph || mode_mismatch)
1818 {
1819 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1820 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1821 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1822 if (params.requires_grad)
1823 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1824 else
1825 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1826 } else {
1827 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1828 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1828, __extension__ __PRETTY_FUNCTION__); }))
;
1829 const int input_size_per_p = input_size / parallel_count;
1830 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1831 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1831, __extension__ __PRETTY_FUNCTION__); }))
;
1832 const int output_size_per_p = output_size / parallel_count;
1833 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1834 }
1835 if (compiled_data->is_test != params.is_test)
1836 {
1837 compiled_data->is_test = params.is_test;
1838 ccv_nnc_graph_exec_update_t update = {
1839 .parallel_count = parallel_count,
1840 .graph = model->graph,
1841 .graph_exec_arena = compiled_data->graph_exec_arena,
1842 };
1843 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1844 }
1845}
1846
1847void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1848{
1849 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1850 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1850, __extension__ __PRETTY_FUNCTION__); }))
;
1851 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1852 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1853 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1854 else {
1855 if (!compiled_data->evaluate.schedule)
1856 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1857 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1858 }
1859}
1860
1861// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1862// Particularly, this method compiles the accumulator graph.
1863static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1864{
1865 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1866 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1866, __extension__ __PRETTY_FUNCTION__); }))
;
1867 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1867, __extension__ __PRETTY_FUNCTION__
); }))
;
1868 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1869 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1870 const int parameter_size = compiled_data->parameters->rnum;
1871 int i, j;
1872 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1873 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1874 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1875 for (i = 0; i < parameter_size; i++)
1876 for (j = 0; j < parallel_count; j++)
1877 if (compiled_data->tensors.gradients[i + j * parameter_size])
1878 {
1879 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1880 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1881 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1882 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1883 ccv_nnc_tensor_symbol_t inputs[2];
1884 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1885 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1886 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1887 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1888 } else {
1889 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1890 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1891 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1892 }
1893 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1894 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1895 {
1896 ccv_nnc_symbolic_graph_free(accum);
1897 // Create empty graph.
1898 compiled_data->backward.accum = ccv_nnc_graph_new();
1899 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1900 return;
1901 }
1902 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1903 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1904 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1905 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1906 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1907 ccv_nnc_symbolic_graph_free(accum);
1908 ccv_array_free(tensor_binds);
1909 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1910}
1911
1912void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1913{
1914 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1915 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1915, __extension__ __PRETTY_FUNCTION__); }))
;
1916 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1916, __extension__ __PRETTY_FUNCTION__
); }))
;
1917 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1918 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1918, __extension__ __PRETTY_FUNCTION__
); }))
;
1919 if (outgrad_size > 0)
1920 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1920, __extension__ __PRETTY_FUNCTION__
); }))
; }
1921 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1921, __extension__ __PRETTY_FUNCTION__); }))
;
1922 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1922, __extension__ __PRETTY_FUNCTION__
); }))
;
1923 const int parameter_size = compiled_data->parameters->rnum;
1924 // If we need to accumulate the gradients now, do jit on accumulator.
1925 if (compiled_data->backward.count > 0)
1926 {
1927 if (!compiled_data->backward.accum)
1928 _ccv_cnnp_model_multistage_jit_1(model);
1929 else if (compiled_data->backward.count == 1) {
1930 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1931 int i;
1932 for (i = 0; i < parameter_size * parallel_count; i++)
1933 {
1934 ccv_nnc_tensor_t* tensor;
1935 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1936 }
1937 if (compiled_data->backward.tensor_arena)
1938 {
1939 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1940 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1941 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1942 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1943 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1944 }
1945 }
1946 }
1947 const int ingrad_size_per_p = model->output_size;
1948 const int outgrad_size_per_p = compiled_data->outgrad_size;
1949 int i, j;
1950 for (i = 0; i < ingrad_size_per_p; i++)
1951 {
1952 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1953 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1954 {
1955 // Set it to 1 if it is not specified.
1956 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1957 if (ingrad_tensor)
1958 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1959 for (j = 1; j < parallel_count; j++)
1960 {
1961 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
1962 if (ingrad_tensor)
1963 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1964 }
1965 } else {
1966 // Make sure the length matches, in case it is an alias.
1967 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 1967, __extension__ __PRETTY_FUNCTION__
); }))
;
1968 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
1969 for (j = 1; j < parallel_count; j++)
1970 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
1971 }
1972 }
1973 if (outgrad_size > 0)
1974 {
1975 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 1975, __extension__ __PRETTY_FUNCTION__
); }))
;
1976 for (i = 0; i < outgrad_size_per_p; i++)
1977 if (outgrads[i])
1978 {
1979 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
1980 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
1981 for (j = 1; j < parallel_count; j++)
1982 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
1983 }
1984 } else {
1985 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
1986 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
;
1987 }
1988 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
1989 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
1990 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
1991 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1992 if (!compiled_data->backward.schedule)
1993 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
1994 // Run the backward pass.
1995 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
1996 // If we need to run accumulation round, do that now.
1997 if (compiled_data->backward.count > 0)
1998 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
1999 // Update the count, this determines whether we need to accumulate or not.
2000 ++compiled_data->backward.count;
2001}
2002
2003// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2004// Particularly, this method compiles the parameter update graph.
2005static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2006{
2007 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2008 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2008, __extension__ __PRETTY_FUNCTION__
); }))
;
2009 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2010 const int parameter_size = compiled_data->parameters->rnum;
2011 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2012 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2013 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2014 // Bind accumulated gradients.
2015 if (compiled_data->backward.count > 1)
2016 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2017 else
2018 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2019 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2020 int i, j;
2021 for (i = 0; i < compiled_data->backward.to_size; i++)
2022 {
2023 const int* tos;
2024 int to_size;
2025 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2026 for (j = 0; j < to_size; j++)
2027 {
2028 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2029 // gradients graph.
2030 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2031 .d = tos[j],
2032 .graph = model->graph,
2033 });
2034 if (!exec.graph)
2035 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2036 }
2037 }
2038 const int from_size = apply_gradients_from->rnum;
2039 if (from_size == 0)
2040 {
2041 ccv_array_free(apply_gradients_from);
2042 ccv_array_free(tensor_binds);
2043 return;
2044 }
2045 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2046 for (i = 0; i < from_size; i++)
2047 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2048 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2049 .graph = model->graph
2050 };
2051 ccv_array_free(apply_gradients_from);
2052 // It can only ends with updates on the parameters.
2053 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2054 for (i = 0; i < parameter_size; i++)
2055 {
2056 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2057 continue;
2058 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2059 for (j = 1; j < parallel_count; j++)
2060 {
2061 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2062 ccv_array_push(tos, &copy);
2063 }
2064 }
2065 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2066 ccv_array_free(tos);
2067 ccv_array_free(tensor_binds);
2068 ccfreefree(froms);
2069 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2070 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2071 {
2072 // Skip on no tensor.
2073 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2074 continue;
2075 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2076 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2077 for (j = 1; j < parallel_count; j++)
2078 {
2079 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2080 if (copy)
2081 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2082 }
2083 }
2084 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2085}
2086
2087void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2088{
2089 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2090 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2090, __extension__ __PRETTY_FUNCTION__); }))
;
2091 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2091, __extension__ __PRETTY_FUNCTION__
); }))
;
2092 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2093 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2093, __extension__ __PRETTY_FUNCTION__); }))
;
2094 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2094, __extension__ __PRETTY_FUNCTION__
); }))
;
2095 // Skip if there is no backward pass.
2096 if (compiled_data->backward.count <= 0)
2097 return;
2098 // Skip if there is no parameters.
2099 if (compiled_data->parameters->rnum == 0)
2100 {
2101 compiled_data->backward.count = 0;
2102 return;
2103 }
2104 if (!compiled_data->apply_gradients.graph)
2105 _ccv_cnnp_model_multistage_jit_2(model);
2106 else {
2107 const int parameter_size = compiled_data->parameters->rnum;
2108 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2109 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2110 if (compiled_data->backward.count > 1)
2111 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2112 else
2113 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2114 }
2115 if (compiled_data->apply_gradients.graph)
2116 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2117 // Reset backward count to 0.
2118 compiled_data->backward.count = 0;
2119}
2120
2121void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2122{
2123 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2124 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2125 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2125, __extension__ __PRETTY_FUNCTION__
); }))
;
2126 const int tensors_init = !!compiled_data->tensors_init.v;
2127 if (!tensors_init)
2128 _ccv_cnnp_model_tensors_init(model, compiled_data);
2129 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2130 // Check if it is not fully allocated, if it is not, init_1.
2131 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2132 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2133 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2134 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2135 if (param_ref < 0)
2136 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2136
, __extension__ __PRETTY_FUNCTION__); }))
; }
2137 else
2138 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2138, __extension__ __PRETTY_FUNCTION__
); }))
; }
2139 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2140 ccv_array_free(parameter_indices);
2141 const int parameter_size = compiled_data->parameters->rnum;
2142 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2142
, __extension__ __PRETTY_FUNCTION__); }))
;
2143 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2143, __extension__ __PRETTY_FUNCTION__
); }))
;
2144 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2145 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2146 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2146, __extension__
__PRETTY_FUNCTION__); }))
;
2147 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2148 int i;
2149 for (i = 1; i < parallel_count; i++)
2150 {
2151 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2152 if (copy_tensor)
2153 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2154 }
2155 // Mark this symbol as init'ed.
2156 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2157 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2158 init_v[s >> 5] |= (1u << (s & 0x1f));
2159}
2160
2161void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2162{
2163 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2164 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2165 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2165, __extension__ __PRETTY_FUNCTION__
); }))
;
2166 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2166, __extension__ __PRETTY_FUNCTION__
); }))
;
2167 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2168 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2169 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2170 if (param_ref < 0)
2171 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2171
, __extension__ __PRETTY_FUNCTION__); }))
; }
2172 else
2173 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2173, __extension__ __PRETTY_FUNCTION__
); }))
; }
2174 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2175 ccv_array_free(parameter_indices);
2176 const int parameter_size = compiled_data->parameters->rnum;
2177 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2177
, __extension__ __PRETTY_FUNCTION__); }))
;
2178 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2178, __extension__ __PRETTY_FUNCTION__
); }))
;
2179 // We don't need to consider parallel_count, every parameter on each device is identical.
2180 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2181 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2181, __extension__
__PRETTY_FUNCTION__); }))
;
2182 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2183}
2184
2185ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2186{
2187 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2188 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2189 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2189, __extension__ __PRETTY_FUNCTION__
); }))
;
2190 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2190, __extension__ __PRETTY_FUNCTION__
); }))
;
2191 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2192 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2193 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2194 if (param_ref < 0)
2195 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2195
, __extension__ __PRETTY_FUNCTION__); }))
; }
2196 else
2197 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2197, __extension__ __PRETTY_FUNCTION__
); }))
; }
2198 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2199 ccv_array_free(parameter_indices);
2200 const int parameter_size = compiled_data->parameters->rnum;
2201 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2201
, __extension__ __PRETTY_FUNCTION__); }))
;
2202 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2202, __extension__ __PRETTY_FUNCTION__
); }))
;
2203 // We don't need to consider parallel_count, every parameter on each device is identical.
2204 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2205 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2205, __extension__
__PRETTY_FUNCTION__); }))
;
2206 return tensor->info;
2207}
2208
2209const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2210{
2211 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2212 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2213 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2213, __extension__ __PRETTY_FUNCTION__
); }))
;
2214 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2215 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2216 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2217 if (param_ref < 0)
2218 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2218
, __extension__ __PRETTY_FUNCTION__); }))
; }
2219 else
2220 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2220, __extension__ __PRETTY_FUNCTION__
); }))
; }
2221 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2222 ccv_array_free(parameter_indices);
2223 const int parameter_size = compiled_data->parameters->rnum;
2224 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2224
, __extension__ __PRETTY_FUNCTION__); }))
;
2225 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2225, __extension__ __PRETTY_FUNCTION__
); }))
;
2226 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2227}
2228
2229int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2230{
2231 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2231, __extension__ __PRETTY_FUNCTION__
); }))
;
2232 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2233 return compiled_data->parameters->rnum;
2234}
2235
2236ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2237{
2238 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2239 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2239, __extension__ __PRETTY_FUNCTION__); }))
;
2240 const int parameter_size = compiled_data->parameters->rnum;
2241 int i;
2242 for (i = 0; i < parameter_size; i++)
2243 {
2244 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2245 if (first(model, name, context))
2246 return ccv_cnnp_model_parameters(model, -1, i);
2247 }
2248 return 0;
2249}
2250
2251ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2252{
2253 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2254 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2254, __extension__ __PRETTY_FUNCTION__); }))
;
2255 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2256 const int parameter_size = compiled_data->parameters->rnum;
2257 int i;
2258 for (i = 0; i < parameter_size; i++)
2259 {
2260 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2261 if (filter(model, name, context))
2262 {
2263 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2264 ccv_array_push(parameters, &parameter);
2265 }
2266 }
2267 return parameters;
2268
2269}
2270
2271CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model)
2272{
2273 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2274 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2274, __extension__ __PRETTY_FUNCTION__); }))
;
2275 const int tensors_init = !!compiled_data->tensors_init.v;
2276 if (!tensors_init) // If nothing initialized, we return parameter 0.
2277 return ccv_cnnp_model_parameters(model, -1, 0);
2278 const int parameter_size = compiled_data->parameters->rnum;
2279 int i;
2280 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2281 for (i = 0; i < parameter_size; i++)
2282 {
2283 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
2284 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
2285 return ccv_cnnp_model_parameters(model, -1, i);
2286 }
2287 return 0;
2288}
2289
2290static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2291{
2292 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2293 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2293, __extension__
__PRETTY_FUNCTION__); }))
;
2294 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2295 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2296 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2297 return to_parameter_indices;
2298}
2299
2300static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2301{
2302 // If the model is not compiled yet. Compile them now.
2303 if (!model->graph)
2304 {
2305 model->graph = ccv_nnc_symbolic_graph_new();
2306 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2306, __extension__ __PRETTY_FUNCTION__
); }))
;
2307 const int input_size = from_model->input_size;
2308 ccv_nnc_tensor_param_t input_params[input_size];
2309 int i;
2310 for (i = 0; i < input_size; i++)
2311 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2312 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2313 model->parallel_count = from_model->parallel_count;
2314 model->memory_compression = from_model->memory_compression;
2315 model->memory_reduction = from_model->memory_reduction;
2316 model->gradient_checkpointing = from_model->gradient_checkpointing;
2317 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2318 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2319 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2320 }
2321 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2322 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2322, __extension__ __PRETTY_FUNCTION__
); }))
;
2323 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2324 if (!to_tensors_init)
2325 {
2326 if (only_init_0)
2327 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2328 else
2329 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2330 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2331 // Check if it is not fully allocated, if it is not, init_1.
2332 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2333 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2333, __extension__ __PRETTY_FUNCTION__
); }))
;
2334 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2335 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2336 if (*from_param_ref < 0 && *param_ref >= 0)
2337 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2337, __extension__ __PRETTY_FUNCTION__
); }))
; }
2338 else if (*from_param_ref >= 0)
2339 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2339, __extension__ __PRETTY_FUNCTION__
); }))
; }
2340 if (*param_ref < 0 && *from_param_ref >= 0)
2341 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2341, __extension__ __PRETTY_FUNCTION__); }))
; }
2342 else if (*param_ref >= 0)
2343 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2343, __extension__ __PRETTY_FUNCTION__
); }))
; }
2344}
2345
2346void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2347{
2348 ccv_array_t* to_parameter_indices;
2349 int to_param_ref;
2350 ccv_array_t* from_parameter_indices;
2351 int from_param_ref;
2352 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2353 // Should be exactly the same tensor.
2354 if (to_param_ref < 0 && from_param_ref < 0)
2355 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2355, __extension__ __PRETTY_FUNCTION__
); }))
; }
2356 // To models.
2357 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2358 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2358, __extension__ __PRETTY_FUNCTION__
); }))
;
2359 // From models.
2360 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2361 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2362 const int to_parameter_size = to_compiled_data->parameters->rnum;
2363 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2364 int i, j;
2365 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2366 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2367 for (i = 0; i < rnum; i++)
2368 {
2369 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2370 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2370, __extension__ __PRETTY_FUNCTION__); }))
;
2371 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2371, __extension__ __PRETTY_FUNCTION__
); }))
;
2372 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2373 // If the original is not init'ed. We cannot copy from.
2374 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2375 continue;
2376 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2377 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2377, __extension__ __PRETTY_FUNCTION__); }))
;
2378 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2378, __extension__ __PRETTY_FUNCTION__
); }))
;
2379 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2380 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2380, __extension__
__PRETTY_FUNCTION__); }))
;
2381 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2382 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2382, __extension__
__PRETTY_FUNCTION__); }))
;
2383 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2384 for (j = 1; j < parallel_count; j++)
2385 {
2386 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2387 if (copy_tensor)
2388 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2389 }
2390 // Mark this symbol as init'ed.
2391 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2392 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2393 }
2394 ccv_array_free(to_parameter_indices);
2395 ccv_array_free(from_parameter_indices);
2396}
2397
2398KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Storing uninitialized value
39
Assuming 'new_keys' is non-null
40
Taking false branch
41
Taking true branch
42
Assuming 'new_vals' is non-null
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2398
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Assuming field 'n_occupied' is >= field 'upper_bound'
58
Taking true branch
59
Taking true branch
60
Calling 'kh_resize_ccv_cnnp_parameter_id'
61
Taking false branch
62
Assuming the condition is false
63
Taking false branch
64
'?' condition is true
65
Assuming 'new_flags' is non-null
66
Taking false branch
67
'?' condition is true
68
Taking false branch
69
Taking true branch
70
Loop condition is true. Entering loop body
71
Assuming the condition is false
72
Taking false branch
73
The value 1 is assigned to 'j'
74
Loop condition is true. Entering loop body
75
Assuming the condition is true
76
Taking true branch
77
Assigned value is garbage or undefined
2399
2400void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2401{
2402 ccv_array_t* to_parameter_indices;
2403 int to_param_ref;
2404 ccv_array_t* from_parameter_indices;
2405 int from_param_ref;
2406 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2407 // Should be exactly the same tensor.
2408 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2409 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2409, __extension__ __PRETTY_FUNCTION__
); }))
; }
2410 // To models.
2411 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2412 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2412, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2413 // From models.
2414 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2415 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2416 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2416, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2417 const int from_parameter_size = from_compiled_data->parameters->rnum;
2418 const int to_parameter_size = to_compiled_data->parameters->rnum;
2419 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2420 int i, j;
2421 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2422 char* updated_name = 0;
2423 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2424 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2425 for (i = 0; i < rnum; i++)
2426 {
2427 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2428 // Need to figure out how to use the renamer here.
2429 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2430 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2430, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2431 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2431, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2432 if (renamer
18.1
'renamer' is non-null
)
2433 {
2434 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2435 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2436 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2437 updated_name = (char*)ccmallocmalloc(1024);
2438 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2439 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2440 memcpy(updated_name, src_name, src_name_len);
2441 updated_name[src_name_len] = 0;
2442 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2443 continue; // Skip this.
2444 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2445 {
2446 // Nothing changed.
2447 } else {
2448 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2449 {
2450 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2451 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is < 'from_parameter_size'
55
Loop condition is true. Entering loop body
2452 {
2453 int ret;
2454 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
56
Calling 'kh_put_ccv_cnnp_parameter_id'
2455 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2455
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2456 kh_val(id_map, k)((id_map)->vals[k]) = j;
2457 }
2458 }
2459 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2460 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
2461 continue;
2462 src_d = kh_val(id_map, k)((id_map)->vals[k]);
2463 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2463, __extension__ __PRETTY_FUNCTION__); }))
;
2464 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2464, __extension__
__PRETTY_FUNCTION__); }))
;
2465 }
2466 }
2467 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2467, __extension__ __PRETTY_FUNCTION__); }))
;
2468 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2468, __extension__
__PRETTY_FUNCTION__); }))
;
2469 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2470 // If the original is not init'ed. We cannot share from.
2471 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2472 continue;
2473 for (j = 0; j < parallel_count; j++)
2474 {
2475 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2476 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2476, __extension__
__PRETTY_FUNCTION__); }))
;
2477 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2478 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2479 ccv_nnc_tensor_free(dest);
2480 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2481 }
2482 // Mark this symbol as init'ed.
2483 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2484 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2485 }
2486 ccv_array_free(to_parameter_indices);
2487 ccv_array_free(from_parameter_indices);
2488 if (id_map)
2489 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2490 if (updated_name)
2491 ccfreefree(updated_name);
2492 // Mark it as incomplete so we will call init_1.
2493 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2494 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2495 else // Remove the flag.
2496 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2497}
2498
2499ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2500{
2501 if (!compiled_data->stream_map)
2502 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2503 int ret = 0;
2504 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2505 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2505, __extension__ __PRETTY_FUNCTION__); }))
;
2506 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2507 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2508 if (ret != 0)
2509 {
2510 stream = ccv_nnc_stream_context_new(type);
2511 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2512 }
2513 return stream;
2514}
2515
2516void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2517{
2518 ccv_array_t* to_parameter_indices;
2519 int to_param_ref;
2520 ccv_array_t* from_parameter_indices;
2521 int from_param_ref;
2522 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2523 // Should be exactly the same tensor.
2524 if (to_param_ref < 0 && from_param_ref < 0)
2525 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2525, __extension__ __PRETTY_FUNCTION__
); }))
; }
2526 // To models.
2527 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2528 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2528, __extension__ __PRETTY_FUNCTION__
); }))
;
2529 // From models.
2530 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2531 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2532 const int to_parameter_size = to_compiled_data->parameters->rnum;
2533 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2534 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2534, __extension__ __PRETTY_FUNCTION__
); }))
;
2535 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2535, __extension__ __PRETTY_FUNCTION__
); }))
;
2536 int i, j;
2537 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2538 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2539 for (i = 0; i < aux_in_size; i++)
2540 inputs[i + 2] = aux_ins[i];
2541 for (i = 0; i < aux_out_size; i++)
2542 outputs[i + 1] = aux_outs[i];
2543 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2544 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2545 for (i = 0; i < rnum; i++)
2546 {
2547 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2548 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2548, __extension__ __PRETTY_FUNCTION__); }))
;
2549 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2549, __extension__ __PRETTY_FUNCTION__
); }))
;
2550 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2551 // If the original is not init'ed. We cannot copy from.
2552 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2553 continue;
2554 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2555 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2555, __extension__ __PRETTY_FUNCTION__); }))
;
2556 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2556, __extension__ __PRETTY_FUNCTION__
); }))
;
2557 if (parallel_count > 1)
2558 {
2559 ccv_nnc_stream_context_t* streams[parallel_count];
2560 ccv_nnc_stream_signal_t* signal;
2561 if (stream_context)
2562 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2563 for (j = 0; j < parallel_count; j++)
2564 {
2565 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2566 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2567 if (!dest || !src)
2568 {
2569 streams[j] = 0;
2570 continue;
2571 }
2572 // At the moment, can only handle them on the same device.
2573 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2573, __extension__ __PRETTY_FUNCTION__
); }))
;
2574 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2574, __extension__ __PRETTY_FUNCTION__
); }))
;
2575 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2576 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2577 int type = stream_type;
2578 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2579 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2580 // Wait signal to finish.
2581 if (stream_context)
2582 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2583 inputs[0] = outputs[0] = dest;
2584 inputs[1] = src;
2585 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2586 if (stream_context)
2587 {
2588 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2589 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2590 }
2591 streams[j] = stream_0;
2592 }
2593 // If this should be blocking, blocking it.
2594 if (!stream_context)
2595 for (j = 0; j < parallel_count; j++)
2596 if (streams[j])
2597 ccv_nnc_stream_context_wait(streams[j]);
2598 } else {
2599 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2600 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2600, __extension__
__PRETTY_FUNCTION__); }))
;
2601 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2602 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2602, __extension__
__PRETTY_FUNCTION__); }))
;
2603 inputs[0] = outputs[0] = dest;
2604 inputs[1] = src;
2605 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2606 }
2607 // Mark this symbol as init'ed.
2608 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2609 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2610 }
2611 ccv_array_free(to_parameter_indices);
2612 ccv_array_free(from_parameter_indices);
2613}
2614
2615void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2616{
2617 int to_param_ref;
2618 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2619 // To models.
2620 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2621 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2621, __extension__ __PRETTY_FUNCTION__
); }))
;
2622 // Tensor has to be inited already.
2623 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2623, __extension__ __PRETTY_FUNCTION__
); }))
;
2624 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2624, __extension__ __PRETTY_FUNCTION__
); }))
;
2625 // From models.
2626 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2627 const int to_parameter_size = to_compiled_data->parameters->rnum;
2628 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2629 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2629, __extension__ __PRETTY_FUNCTION__
); }))
;
2630 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2630, __extension__ __PRETTY_FUNCTION__
); }))
;
2631 int i, j;
2632 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2633 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2634 for (i = 0; i < aux_in_size; i++)
2635 inputs[i + 1] = aux_ins[i];
2636 for (i = 0; i < aux_out_size; i++)
2637 outputs[i + 1] = aux_outs[i];
2638 for (i = 0; i < rnum; i++)
2639 {
2640 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2641 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2641, __extension__ __PRETTY_FUNCTION__); }))
;
2642 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2642, __extension__ __PRETTY_FUNCTION__
); }))
;
2643 if (parallel_count > 1)
2644 {
2645 ccv_nnc_stream_context_t* streams[parallel_count];
2646 ccv_nnc_stream_signal_t* signal;
2647 if (stream_context)
2648 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2649 for (j = 0; j < parallel_count; j++)
2650 {
2651 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2652 if (!dest)
2653 {
2654 streams[j] = 0;
2655 continue;
2656 }
2657 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2658 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2659 int type = stream_type;
2660 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2661 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2662 // Wait signal to finish.
2663 if (stream_context)
2664 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2665 inputs[0] = outputs[0] = dest;
2666 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2667 if (stream_context)
2668 {
2669 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2670 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2671 }
2672 streams[j] = stream_0;
2673 }
2674 // If this should be blocking, blocking it.
2675 if (!stream_context)
2676 for (j = 0; j < parallel_count; j++)
2677 if (streams[j])
2678 ccv_nnc_stream_context_wait(streams[j]);
2679 } else {
2680 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2681 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2681, __extension__
__PRETTY_FUNCTION__); }))
;
2682 inputs[0] = outputs[0] = dest;
2683 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2684 }
2685 // No need to mark this symbol as init'ed, it is already.
2686 }
2687 ccv_array_free(to_parameter_indices);
2688}
2689
2690void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2691{
2692 int to_param_ref;
2693 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2694 // To models.
2695 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2696 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2696, __extension__ __PRETTY_FUNCTION__
); }))
;
2697 // Tensor has to be inited already.
2698 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2698, __extension__ __PRETTY_FUNCTION__
); }))
;
2699 ccv_nnc_tensor_t** tensor_gradients;
2700 if (to_compiled_data->backward.count > 1)
2701 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2702 else
2703 tensor_gradients = to_compiled_data->tensors.gradients;
2704 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2704, __extension__ __PRETTY_FUNCTION__
); }))
;
2705 // From models.
2706 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2707 const int to_parameter_size = to_compiled_data->parameters->rnum;
2708 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2709 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2709, __extension__ __PRETTY_FUNCTION__
); }))
;
2710 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2710, __extension__ __PRETTY_FUNCTION__
); }))
;
2711 int i, j;
2712 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2713 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2714 for (i = 0; i < aux_in_size; i++)
2715 inputs[i + 1] = aux_ins[i];
2716 for (i = 0; i < aux_out_size; i++)
2717 outputs[i + 1] = aux_outs[i];
2718 for (i = 0; i < rnum; i++)
2719 {
2720 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2721 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2721, __extension__ __PRETTY_FUNCTION__); }))
;
2722 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2722, __extension__ __PRETTY_FUNCTION__
); }))
;
2723 if (parallel_count > 1)
2724 {
2725 ccv_nnc_stream_context_t* streams[parallel_count];
2726 ccv_nnc_stream_signal_t* signal;
2727 if (stream_context)
2728 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2729 for (j = 0; j < parallel_count; j++)
2730 {
2731 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2732 if (!dest)
2733 {
2734 streams[j] = 0;
2735 continue;
2736 }
2737 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2738 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2739 int type = stream_type;
2740 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2741 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2742 // Wait signal to finish.
2743 if (stream_context)
2744 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2745 inputs[0] = outputs[0] = dest;
2746 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2747 if (stream_context)
2748 {
2749 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2750 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2751 }
2752 streams[j] = stream_0;
2753 }
2754 // If this should be blocking, blocking it.
2755 if (!stream_context)
2756 for (j = 0; j < parallel_count; j++)
2757 if (streams[j])
2758 ccv_nnc_stream_context_wait(streams[j]);
2759 } else {
2760 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2761 if (!dest)
2762 continue;
2763 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2763, __extension__
__PRETTY_FUNCTION__); }))
;
2764 inputs[0] = outputs[0] = dest;
2765 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2766 }
2767 // No need to mark this symbol as init'ed, it is already.
2768 }
2769 ccv_array_free(to_parameter_indices);
2770}
2771
2772ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2773{
2774 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2775 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2775, __extension__ __PRETTY_FUNCTION__); }))
;
2776 return compiled_data->minimize.minimizer;
2777}
2778
2779void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2780{
2781 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2782 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2782, __extension__ __PRETTY_FUNCTION__); }))
;
2783 const int parameter_size = compiled_data->parameters->rnum;
2784 if (parameter_size == 0)
2785 return;
2786 if (reset)
2787 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2787, __extension__ __PRETTY_FUNCTION__
); }))
; }
2788 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2789 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2790 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2791 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2792 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2793 // We update all parameters, at this point, we have one minimizer.
2794 if (set_parameters == 0 || set_parameter_size == 0)
2795 compiled_data->minimize.minimizer = minimizer;
2796 int i;
2797 if (set_parameters && set_parameter_size)
2798 {
2799 // I need to save what's the minimizer along with this.
2800 if (!compiled_data->minimize.parameters)
2801 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2802 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2803 set_minimizer_for_parameter->minimizer = minimizer;
2804 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2805 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2806 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2807 }
2808 // If reset is true, clear the parameters array.
2809 if (reset && compiled_data->minimize.parameters)
2810 {
2811 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2812 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2813 ccv_array_clear(compiled_data->minimize.parameters);
2814 }
2815 if (!compiled_data->update_nodes)
2816 return;
2817 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2818 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2818, __extension__ __PRETTY_FUNCTION__); }))
;
2819 if (saved_aux_size > old_max_saved_aux_size)
2820 {
2821 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2821, __extension__ __PRETTY_FUNCTION__
); }))
;
2822 // Reallocate first, move them around later.
2823 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2824 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2825 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2826 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2827 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2828 }
2829 int flag = 0;
2830 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2831 if (set_parameters && set_parameter_size)
2832 {
2833 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2834 for (i = 0; i < set_parameter_size; i++)
2835 {
2836 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2837 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2837, __extension__ __PRETTY_FUNCTION__
); }))
;
2838 const int old_rnum = parameter_indices->rnum;
2839 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2840 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2841 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2841, __extension__ __PRETTY_FUNCTION__
); }))
;
2842 if (param_ref >= 0)
2843 {
2844 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2844, __extension__ __PRETTY_FUNCTION__
); }))
;
2845 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2846 parameter_indices->rnum = old_rnum + 1;
2847 }
2848 }
2849 // We may have duplicated indices, but that is OK, we will set it twice.
2850 for (i = 0; i < parameter_indices->rnum; i++)
2851 {
2852 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2853 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2854 flag = 1;
2855 }
2856 ccv_array_free(parameter_indices);
2857 } else {
2858 for (i = 0; i < parameter_size; i++)
2859 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2860 flag = 1;
2861 if (compiled_data->minimize.parameters)
2862 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2863 flag = 1;
2864 }
2865 if (flag)
2866 {
2867 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2868 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2869 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2870 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2871 }
2872}
2873
2874void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2875{
2876 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2877 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2877, __extension__ __PRETTY_FUNCTION__); }))
;
2878 compiled_data->compile_params = compile_params;
2879}
2880
2881void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2882{
2883 if (model->graph && out_size > 0)
2884 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2885 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2886 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2887 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2888 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2889 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2890 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2891}
2892
2893void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2894{
2895 if (model->graph)
2896 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2897}
2898
2899static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2900{
2901 int i;
2902 const int parameter_size = compiled_data->parameters->rnum;
2903 ccv_array_free(compiled_data->parameters);
2904 if (compiled_data->parameter_flags)
2905 ccfreefree(compiled_data->parameter_flags);
2906 const int internal_size = compiled_data->internals->rnum;
2907 ccv_array_free(compiled_data->internals);
2908 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2908, __extension__ __PRETTY_FUNCTION__
); }))
;
2909 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2909, __extension__ __PRETTY_FUNCTION__
); }))
;
2910 for (i = 0; i < parameter_size; i++)
2911 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2912 ccv_array_free(compiled_data->ids.parameters);
2913 for (i = 0; i < internal_size; i++)
2914 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2915 ccv_array_free(compiled_data->ids.internals);
2916 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2917 if (compiled_data->tensors.parameters)
2918 {
2919 for (i = 0; i < parameter_size * parallel_count; i++)
2920 // If it is not marked as not belonging, we can free it.
2921 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2922 if (compiled_data->tensors.parameters[i])
2923 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2924 for (i = 0; i < internal_size * parallel_count; i++)
2925 if (compiled_data->tensors.internals[i])
2926 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2927 ccfreefree(compiled_data->tensors.parameters);
2928 }
2929 if (compiled_data->tensors.gradients)
2930 {
2931 for (i = 0; i < parameter_size * parallel_count; i++)
2932 {
2933 if (compiled_data->tensors.gradients[i])
2934 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2935 if (compiled_data->tensors.accum_gradients[i])
2936 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2937 }
2938 ccfreefree(compiled_data->tensors.gradients);
2939 }
2940 if (compiled_data->minimize.parameters)
2941 {
2942 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2943 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2944 ccv_array_free(compiled_data->minimize.parameters);
2945 }
2946 if (compiled_data->rewindables)
2947 ccv_array_free(compiled_data->rewindables);
2948 if (compiled_data->tensors_init.v)
2949 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
2950 if (compiled_data->evaluate.tos)
2951 ccfreefree(compiled_data->evaluate.tos);
2952 compiled_data->evaluate.tos = 0;
2953 if (compiled_data->stream_map)
2954 {
2955 khiter_t k;
2956 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2957 {
2958 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
2959 continue;
2960 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2961 ccv_nnc_stream_context_free(stream);
2962 }
2963 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
2964 }
2965 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2966 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
2967 _ccv_cnnp_compiled_data_backward_free(compiled_data);
2968 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2969 if (compiled_data->gradient_checkpoints)
2970 {
2971 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
2972 {
2973 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
2974 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 2974, __extension__ __PRETTY_FUNCTION__
); }))
;
2975 ccfreefree(checkpoint->inputs);
2976 ccv_array_free(checkpoint->tensor_symbols);
2977 }
2978 ccv_array_free(compiled_data->gradient_checkpoints);
2979 }
2980 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
2981 ccfreefree(compiled_data);
2982}
2983
2984void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
2985{
2986 if (model->isa->deinit)
2987 model->isa->deinit(model);
2988 if (model->io)
2989 {
2990 int i;
2991 for (i = 0; i < model->io->rnum; i++)
2992 {
2993 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
2994 if (model_io->outgoings)
2995 ccv_array_free(model_io->outgoings);
2996 if (model_io->incomings)
2997 ccv_array_free(model_io->incomings);
2998 if (model_io->dependencies)
2999 ccv_array_free(model_io->dependencies);
3000 ccfreefree(model_io);
3001 }
3002 ccv_array_free(model->io);
3003 }
3004 if (model->parameter_indices)
3005 ccv_array_free(model->parameter_indices);
3006 if (model->inputs)
3007 ccfreefree(model->inputs);
3008 if (model->graph)
3009 ccv_nnc_symbolic_graph_free(model->graph);
3010 if (model->compiled_data)
3011 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
3012 if (model->name)
3013 ccfreefree(model->name);
3014 ccfreefree(model);
3015}