Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2443, column 13
Array access (via field 'vals') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/18 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -D HAVE_CUDA_SM80 -I /usr/local/include -internal-isystem /usr/local/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-06-10-002533-204407-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6
7// MARK - Level-5 API
8
9ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
10{
11 if (!model->io)
12 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
13 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
14 model_io->param_ref = 0;
15 model_io->param_sel = 0;
16 model_io->visit = 0;
17 model_io->model = model;
18 model_io->dependencies = 0;
19 model_io->dependents = 0;
20 model_io->outgoings = 0;
21 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
22 ccv_array_push(model->io, &model_io);
23 if (input_size > 0)
24 {
25 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
26 ccv_array_resize(model_io->incomings, input_size);
27 int i;
28 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
29 for (i = 0; i < input_size; i++)
30 {
31 if (!inputs[i]->outgoings)
32 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
33 ccv_array_push(inputs[i]->outgoings, &model_io);
34 }
35 } else {
36 model_io->incomings = 0;
37 }
38 return model_io;
39}
40
41void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
42{
43 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 43, __extension__ __PRETTY_FUNCTION__);
}))
;
44 if (!model_io->dependencies)
45 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
46 int i, j;
47 for (i = 0; i < dependency_size; i++)
48 {
49 int flag = 0;
50 // Check if it is already exist or not.
51 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
52 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
53 flag = 1;
54 if (flag)
55 continue;
56 ccv_array_push(model_io->dependencies, dependencies + i);
57 ++dependencies[i]->dependents;
58 }
59}
60
61int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
62{
63 return model->output_size;
64}
65
66int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
67{
68 // If the model is compiled, it is default to 1 unless it is not.
69 if (model->compiled_data)
70 return model->is_trainable >= 0 ? model->is_trainable : 1;
71 return model->is_trainable;
72}
73
74ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
75{
76 if (!model->io)
77 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
78 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
79 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
80 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
81 model_io->visit = 0;
82 model_io->model = model;
83 model_io->outputs = 0;
84 model_io->dependencies = 0;
85 model_io->dependents = 0;
86 model_io->incomings = 0;
87 model_io->outgoings = 0;
88 ccv_array_push(model->io, &model_io);
89 return model_io;
90}
91
92void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
93{
94 model->notify_hook.func = func;
95 model->notify_hook.context = context;
96}
97
98void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
99{
100 if (model->notify_hook.func)
101 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
102 if (model->isa->notify)
103 model->isa->notify(model, tag, payload);
104}
105
106static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
107{
108 int i, j;
109 for (i = 0; i < graph_exec_symbol_size; i++)
110 {
111 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
112 // Check whether this tensor symbol has any duplicate.
113 for (j = i + 1; j < graph_exec_symbol_size;)
114 {
115 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
116 // If there is a same tensor symbol, remove it.
117 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
118 {
119 if (j + 1 < graph_exec_symbol_size)
120 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
121 --graph_exec_symbol_size;
122 continue;
123 }
124 ++j;
125 }
126 }
127 return graph_exec_symbol_size;
128}
129
130void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
131{
132 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
133 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
134 int i;
135 if (!model->parameter_indices)
136 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
137 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
138 {
139 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
140 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
141 {
142 // Only add to parameter_indices if it is trainable.
143 if (add_to_array_context->prefix == 't')
144 ccv_array_add_unique_int(model->parameter_indices, i);
145 // Found it, return, don't add it.
146 return;
147 }
148 }
149 // Only add to parameter_indices if it is trainable.
150 if (add_to_array_context->prefix == 't')
151 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
152 // This is a new one, no need to add_unique_int, it is unique.
153 ccv_array_push(add_to_array_context->symbols, &symbol);
154 if (add_to_array_context->trainables)
155 ccv_array_push(add_to_array_context->trainables, &is_trainable);
156 char id[2048];
157 id[0] = add_to_array_context->prefix;
158 id[1] = '-';
159 int total_len = 2;
160 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
161 {
162 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
163 int len;
164 if (name->name && name->name[0] != '\0')
165 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
166 else
167 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
168 total_len += len;
169 if (total_len >= 2047)
170 break;
171 }
172 if (total_len < 2047)
173 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
174 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 174, __extension__ __PRETTY_FUNCTION__)
; }))
;
175 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
176 memcpy(heap_id, id, total_len + 1);
177 ccv_array_push(add_to_array_context->ids, &heap_id);
178 ++add_to_array_context->sequence->it;
179}
180
181static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
182{
183 compiled_data->f = compiled_data->fits + output_size;
184 compiled_data->xpu_alloc.mp_hdr = -1;
185 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
186 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
187 compiled_data->gradient_checkpoints = gradient_checkpoints;
188}
189
190static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
191{
192 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 192, __extension__ __PRETTY_FUNCTION__); }))
;
193 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
194 int i;
195 for (i = 0; i < input_size; i++)
196 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
197 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
198 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
199 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
200 ccv_cnnp_model_sequence_t model_sequence = {
201 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
202 };
203 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
204 .sequence = &model_sequence,
205 .prefix = 't',
206 .symbols = parameters,
207 .ids = parameter_ids,
208 .trainables = parameter_trainables,
209 };
210 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
211 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
212 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
213 .sequence = &model_sequence,
214 .prefix = 'r',
215 .symbols = internals,
216 .ids = internal_ids,
217 .trainables = 0,
218 };
219 ccv_cnnp_model_build_data_t build_data = {
220 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
221 .model_sequence = &model_sequence,
222 .add_to_array = ccv_cnnp_model_add_to_array,
223 .parameters = parameters,
224 .context = {
225 .add_to_parameter = &add_to_parameter_context,
226 .add_to_output = &add_to_output_context,
227 },
228 .gradient_checkpoints = 0,
229 };
230 model->data = &build_data;
231 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
232 for (i = 0; i < model->output_size; i++)
233 {
234 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
235 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
236 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
237 continue;
238 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
239 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
240 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
241 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
242 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
243 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
244 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
245 }
246 model->data = 0;
247 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
248 if (model_sequence.sequences)
249 ccv_array_free(model_sequence.sequences);
250 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
251 int not_trainables = 0;
252 // Assert no parameter is alias.
253 for (i = 0; i < parameters->rnum; i++)
254 {
255 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
256 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
257 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 257, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
258 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
259 not_trainables = 1;
260 }
261 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 261, __extension__ __PRETTY_FUNCTION__)
; }))
;
262 uint64_t* parameter_flags = 0;
263 if (not_trainables)
264 {
265 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
266 for (i = 0; i < parameter_trainables->rnum; i++)
267 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
268 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
269 }
270 ccv_array_free(parameter_trainables);
271 // Assert no internal is alias.
272 for (i = 0; i < internals->rnum; i++)
273 {
274 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
275 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
276 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 276, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
277 }
278 const int output_size = model->output_size;
279 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
280 const int parameters_rnum = parameters->rnum;
281 if (input_size > 0)
282 {
283 ccv_array_resize(parameters, parameters_rnum + input_size);
284 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
285 }
286 ccv_nnc_symbolic_graph_simplify(model->graph,
287 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
288 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
289 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
290 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
291 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
292 model->outputs, output_size,
293 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
294 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
295 // Size it down.
296 parameters->rnum = parameters_rnum;
297 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
298 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
299 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
300 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 300, __extension__ __PRETTY_FUNCTION__)
; }))
;
301 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
302 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
303 compiled_data->loss = loss;
304 if (loss.cmd == CCV_NNC_NOOP)
305 {
306 // If no loss function provided, there is no fits.
307 for (i = 0; i < output_size; i++)
308 {
309 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
310 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
311 if (alias_to.d < 0)
312 compiled_data->f[i] = model->outputs[i];
313 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
314 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
315 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
316 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
317 int j;
318 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
319 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 319, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
320 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
321 }
322 }
323 } else {
324 for (i = 0; i < output_size; i++)
325 {
326 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
327 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
328 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
329 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
330 }
331 }
332 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
333 ccv_nnc_symbolic_graph_simplify(model->graph,
334 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
335 0, 0, // No need to provide binds at this point.
336 compiled_data->f, model->output_size,
337 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
338 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
339 // If inputs are from GPU, stream type is GPU.
340 compiled_data->parameters = parameters;
341 compiled_data->parameter_flags = parameter_flags;
342 compiled_data->internals = internals;
343 compiled_data->ids.parameters = parameter_ids;
344 compiled_data->ids.internals = internal_ids;
345}
346
347static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
348{
349 ccv_array_t* const stack = (ccv_array_t*)context;
350 ccv_array_push(stack, &symbol.d);
351}
352
353static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
354{
355 const ccv_nnc_tensor_symbol_t src_symbol = {
356 .d = src_index,
357 .graph = src_graph
358 };
359 const ccv_nnc_tensor_symbol_t dest_symbol = {
360 .d = dest_index,
361 .graph = dest_graph
362 };
363 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
364 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
365 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
366 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
367 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
368 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
369}
370
371static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
372{
373 const ccv_nnc_tensor_symbol_t src_symbol = {
374 .d = src_index,
375 .graph = src_graph
376 };
377 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
378 const ccv_nnc_tensor_symbol_t dest_symbol = {
379 .d = dest_index,
380 .graph = dest_graph
381 };
382 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
383 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
384}
385
386static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
387static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
388
389typedef struct {
390 int parallel_count;
391 ccv_nnc_symbolic_graph_t* graph;
392 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
393} ccv_nnc_graph_exec_update_t;
394
395static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
396{
397 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
398 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
399 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
400 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
401 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
402 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
403 const int parallel_count = graph_exec_update->parallel_count;
404 int i;
405 for (i = 1; i < parallel_count; i++)
406 {
407 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
408 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
409 {
410 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
411 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
412 }
413 }
414}
415
416void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
417{
418 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 418, __extension__ __PRETTY_FUNCTION__); }))
;
419 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 419, __extension__ __PRETTY_FUNCTION__)
; }))
;
420 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 420, __extension__ __PRETTY_FUNCTION__); }))
;
421 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
422 init->graph = ccv_nnc_symbolic_graph_new();
423 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
424 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
425 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
426 init->parallel_count = model->parallel_count;
427 init->memory_compression = model->memory_compression;
428 init->memory_reduction = model->memory_reduction;
429 init->gradient_checkpointing = model->gradient_checkpointing;
430 init->compiled_data->stream_type = model->compiled_data->stream_type;
431 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
432 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
433 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
434 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
435 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
436 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
437 int i, j;
438 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
439 for (i = 0; i < compiled_data->parameters->rnum; i++)
440 {
441 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
442 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 442, __extension__ __PRETTY_FUNCTION__)
; }))
;
443 }
444 for (i = 0; i < compiled_data->internals->rnum; i++)
445 {
446 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
447 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 447, __extension__ __PRETTY_FUNCTION__)
; }))
;
448 }
449 // Update inputs.
450 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 450, __extension__ __PRETTY_FUNCTION__)
; }))
;
451 for (i = 0; i < model->input_size; i++)
452 if (model->inputs[i].d >= 0)
453 {
454 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 454, __extension__ __PRETTY_FUNCTION__)
; }))
;
455 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
456 }
457 // Update outputs.
458 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 458, __extension__ __PRETTY_FUNCTION__)
; }))
;
459 for (i = 0; i < model->output_size; i++)
460 {
461 if (model->outputs[i].d >= 0)
462 {
463 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 463, __extension__
__PRETTY_FUNCTION__); }))
;
464 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
465 }
466 if (model->outputs[i].d != model->compiled_data->f[i].d)
467 {
468 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 468, __extension__ __PRETTY_FUNCTION__)
; }))
;
469 if (model->compiled_data->f[i].d >= 0)
470 {
471 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 471, __extension__ __PRETTY_FUNCTION__)
; }))
;
472 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
473 }
474 }
475 }
476 // Go through the graph to set tensor on matching symbols
477 for (i = 0; i < stack->rnum; i++)
478 {
479 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
480 // If exceed range, skip.
481 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
482 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
483 continue;
484 const ccv_nnc_graph_exec_symbol_t src_symbol = {
485 .d = d,
486 .graph = init->graph
487 };
488 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
489 .d = d,
490 .graph = model->graph
491 };
492 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
493 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
494 // If the name doesn't match, skip.
495 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
496 continue;
497 // Now get all the inputs and outputs, if matches, set them.
498 const int* src_inputs;
499 int src_input_size;
500 const int* src_outputs;
501 int src_output_size;
502 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
503 const int* dest_inputs;
504 int dest_input_size;
505 const int* dest_outputs;
506 int dest_output_size;
507 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
508 // We may have unmatched input / output size because this is the minimizer and it has
509 // different saved_aux (for example, when we shrunk with CMD_NOOP).
510 if (src_input_size != dest_input_size)
511 continue;
512 if (src_output_size != dest_output_size)
513 continue;
514 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
515 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
516 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
517 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
518 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
519 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
520 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
521 // a new exec symbol.
522 for (j = 0; j < src_input_size; j++)
523 if (src_inputs[j] >= 0)
524 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
525 for (j = 0; j < src_output_size; j++)
526 if (src_outputs[j] >= 0)
527 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
528 }
529 ccv_array_free(stack);
530 // After this, we get all tensors in the model graph resolved through tensor_auto.
531 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
532 // Verify symbols we get matches.
533 const int parameter_size = compiled_data->parameters->rnum;
534 for (i = 0; i < parameter_size; i++)
535 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 535, __extension__ __PRETTY_FUNCTION__)
; }))
; }
536 const int internal_size = compiled_data->internals->rnum;
537 for (i = 0; i < internal_size; i++)
538 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 538, __extension__ __PRETTY_FUNCTION__)
; }))
; }
539 // Go through compiled data.
540 if (compiled_data->tensor_arena)
541 {
542 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
543 if (flag == 0 && compiled_data->graph_exec_arena)
544 {
545 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
546 // Since we will reinit, if we previously set is_test, we need to set it again.
547 if (compiled_data->is_test)
548 {
549 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
550 ccv_nnc_graph_exec_update_t update = {
551 .parallel_count = parallel_count,
552 .graph = model->graph,
553 .graph_exec_arena = compiled_data->graph_exec_arena,
554 };
555 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
556 }
557 } else
558 // Free-up tensor arena & graph exec arena.
559 _ccv_cnnp_compiled_data_graph_free(compiled_data);
560 }
561 // There are other compiled graphs, for accum and apply gradients.
562 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
563 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
564 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
565 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
566 // That is why we don't update these compiled graphs at all this point.
567 // Free the model, we've already "absorbed" it.
568 ccv_cnnp_model_free(init);
569}
570
571void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
572{
573 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 573, __extension__ __PRETTY_FUNCTION__)
; }))
;
574 if (model->input_size == 0)
575 model->input_size = input_size;
576 if (!model->graph) // The graph is not compiled yet.
577 {
578 model->graph = ccv_nnc_symbolic_graph_new();
579 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
580 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 580, __extension__ __PRETTY_FUNCTION__)
; }))
;
581 int i, flag = 0;
582 for (i = 0; !flag && i < input_size; i++)
583 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
584 // If inputs are from GPU, stream type is GPU.
585 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
586 model->compiled_data->minimize.minimizer = minimizer;
587 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
588 } else {
589 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
590 // And then absorb the "new model" to the old one.
591 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
592 ccv_cnnp_model_absorb(model, init, inputs, input_size);
593 // Reset minimizer.
594 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
595 }
596}
597
598ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
599{
600 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
601 new_model->is_trainable = is_trainable;
602 return new_model;
603}
604
605void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
606{
607 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 607, __extension__ __PRETTY_FUNCTION__); }))
;
608 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 608, __extension__ __PRETTY_FUNCTION__)
; }))
;
609 ccv_nnc_symbolic_graph_t* const graph = model->graph;
610 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
611 int i;
612 for (i = 0; i < output_size; i++)
613 {
614 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 614, __extension__ __PRETTY_FUNCTION__)
; }))
;
615 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
616 }
617}
618
619void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
620{
621 if (workspace_size == model->workspace_size)
622 return;
623 model->workspace_size = workspace_size;
624 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
625 if (compiled_data && compiled_data->graph)
626 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
627}
628
629size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
630{
631 return model->workspace_size;
632}
633
634void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
635{
636 if (parallel == 0)
637 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
638 else
639 model->parallel_count = parallel;
640 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
641 if (compiled_data)
642 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 642, __extension__ __PRETTY_FUNCTION__)
; }))
; }
643}
644
645void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
646{
647 model->max_stream_count = max_stream_count;
648 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
649 if (compiled_data)
650 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 650, __extension__ __PRETTY_FUNCTION__)
; }))
; }
651}
652
653void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
654{
655 model->memory_compression = memory_compression;
656 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
657 if (compiled_data)
658 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 658, __extension__ __PRETTY_FUNCTION__)
; }))
; }
659}
660
661void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
662{
663 model->memory_reduction = memory_reduction;
664 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
665 if (compiled_data)
666 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 666, __extension__ __PRETTY_FUNCTION__)
; }))
; }
667}
668
669void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
670{
671 model->gradient_checkpointing = gradient_checkpointing;
672}
673
674int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
675{
676 return model->gradient_checkpointing;
677}
678
679typedef struct {
680 int parallel_count;
681 ccv_nnc_symbolic_graph_t* graph;
682 ccv_cnnp_compiled_data_t* compiled_data;
683 ccv_nnc_tensor_arena_t* tensor_arena;
684} ccv_nnc_tensor_init_states_t;
685
686static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
687{
688 int i;
689 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
690 for (i = 0; i < compiled_data->parameters->rnum; i++)
691 {
692 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
693 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
694 return 1;
695 }
696 for (i = 0; i < compiled_data->internals->rnum; i++)
697 {
698 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
699 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
700 return 1;
701 }
702 return 0;
703}
704
705static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
706{
707 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
708 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
709 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
710 if (!output_tensor)
711 return;
712 const int d = output_symbol.d;
713 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 713, __extension__ __PRETTY_FUNCTION__)
; }))
;
714 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
715 if (init_v[d >> 5] & (1u << (d & 0x1f)))
716 return;
717 init_v[d >> 5] |= (1u << (d & 0x1f));
718 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
719 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
720 const int parallel_count = tensor_init_states->parallel_count;
721 int i;
722 for (i = 1; i < parallel_count; i++)
723 {
724 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
725 if (copy)
726 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
727 }
728}
729
730// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
731// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
732static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
733{
734 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 734, __extension__ __PRETTY_FUNCTION__); }))
;
735 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 735, __extension__ __PRETTY_FUNCTION__)
; }))
;
736 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
737 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 737, __extension__
__PRETTY_FUNCTION__); }))
;
738 int i;
739 for (i = 0; i < compiled_data->rewindables->rnum; i++)
740 {
741 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
742 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
743 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
744 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
745 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
746 }
747 ccv_array_clear(compiled_data->rewindables);
748 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
749}
750
751static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
752{
753 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
754 .type = CCV_CNNP_REWIND_TENSOR,
755 .tensor = symbol
756 };
757 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
758 ccv_array_push(rewind_symbols, &rewind_symbol);
759}
760
761static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
762{
763 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
764 .type = CCV_CNNP_REWIND_TENSOR,
765 .tensor = symbol
766 };
767 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
768 ccv_array_push(rewind_symbols, &rewind_symbol);
769}
770
771static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
772{
773 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
774 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
775 .graph_exec = symbol
776 };
777 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
778 ccv_array_push(rewind_symbols, &rewind_symbol);
779}
780
781static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
782{
783 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
784 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
785 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
786 int i;
787 for (i = 1; i < parallel_count; i++)
788 {
789 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
790 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
791 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
792 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
793 }
794}
795
796static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
797{
798 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 798, __extension__ __PRETTY_FUNCTION__); }))
;
799 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 799, __extension__ __PRETTY_FUNCTION__); }))
;
800 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
801 int i;
802 for (i = 1; i < parallel_count; i++)
803 {
804 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
805 if (copy_symbol.graph)
806 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
807 }
808 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
809 if (graph_exec_arena)
810 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
811 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
812 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
813 if (gradient_graph_exec_arena)
814 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
815}
816
817static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
818{
819 int this_parameter_flag = 0;
820 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
821 return this_parameter_flag;
822 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
823 int j, k;
824 // For no-op, we can preserve previous saved_aux_size.
825 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
826 {
827 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
828 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
829 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
830 // make sure some model parameters don't update if we don't want them to.
831 int old_saved_aux_size;
832 if (old_minimizer.cmd == CCV_NNC_NOOP)
833 {
834 int input_size;
835 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
836 if (input_size < 2) // This is not legit.
837 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
838 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
839 old_saved_aux_size = input_size - 2;
840 } else
841 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
842 if (old_saved_aux_size != saved_aux_size)
843 {
844 this_parameter_flag = 1;
845 if (saved_aux_size > old_saved_aux_size)
846 {
847 // Allocate new tensor symbols.
848 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
849 for (j = old_saved_aux_size; j < saved_aux_size; j++)
850 {
851 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
852 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
853 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
854 for (k = 1; k < parallel_count; k++)
855 {
856 ccv_nnc_tensor_param_t dev_info = info;
857 if (k != device_id)
858 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
859 else
860 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
861 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
862 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
863 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
864 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
865 }
866 }
867 } else {
868 for (j = saved_aux_size; j < old_saved_aux_size; j++)
869 {
870 for (k = 1; k < parallel_count; k++)
871 {
872 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
873 if (src_copy.d >= 0)
874 {
875 ccv_nnc_tensor_symbol_free(graph, src_copy);
876 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
877 }
878 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
879 if (dest_copy.d >= 0)
880 {
881 ccv_nnc_tensor_symbol_free(graph, dest_copy);
882 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
883 }
884 }
885 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
886 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
887 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
888 }
889 }
890 }
891 }
892 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
893 if (this_parameter_flag)
894 {
895 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
896 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
897 const int* inputs = 0;
898 int input_size = 0;
899 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
900 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 900, __extension__ __PRETTY_FUNCTION__)
; }))
;
901 update_inputs[0].d = inputs[0];
902 update_inputs[0].graph = graph;
903 update_inputs[1].d = inputs[1];
904 update_inputs[1].graph = graph;
905 update_outputs[0] = updated_parameters[parameter_indice];
906 for (j = 0; j < saved_aux_size; j++)
907 {
908 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
909 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
910 }
911 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
912 for (k = 1; k < parallel_count; k++)
913 {
914 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
915 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 915, __extension__ __PRETTY_FUNCTION__); }))
;
916 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
917 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 917, __extension__ __PRETTY_FUNCTION__)
; }))
;
918 update_inputs[0].d = inputs[0];
919 update_inputs[0].graph = graph;
920 update_inputs[1].d = inputs[1];
921 update_inputs[1].graph = graph;
922 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
923 for (j = 0; j < saved_aux_size; j++)
924 {
925 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
926 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
927 }
928 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
929 }
930 }
931 return this_parameter_flag;
932}
933
934typedef struct {
935 int parameter_size;
936 ccv_nnc_cmd_t minimizer;
937 ccv_cnnp_model_io_t parameters[1];
938} ccv_cnnp_set_minimizer_for_parameter_t;
939
940static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
941{
942 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
943 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 943, __extension__ __PRETTY_FUNCTION__); }))
;
944 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
945 // We update all parameters, at this point, we have one minimizer.
946 const int parameter_size = compiled_data->parameters->rnum;
947 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
948 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
949 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 949, __extension__ __PRETTY_FUNCTION__); }))
;
950 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
951 ccv_array_t* const parameters = compiled_data->minimize.parameters;
952 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
953 int i, j, flag = 0;
954 for (i = 0; i < parameters->rnum; i++)
955 {
956 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
957 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
958 {
959 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
960 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 960, __extension__ __PRETTY_FUNCTION__)
; }))
;
961 const int old_rnum = parameter_indices->rnum;
962 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
963 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
964 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 964, __extension__ __PRETTY_FUNCTION__)
; }))
;
965 if (param_ref >= 0)
966 {
967 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 967, __extension__ __PRETTY_FUNCTION__)
; }))
;
968 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
969 parameter_indices->rnum = old_rnum + 1;
970 }
971 }
972 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
973 // We may have duplicated indices, but that is OK, we will set it twice.
974 for (j = 0; j < parameter_indices->rnum; j++)
975 {
976 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
977 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 977, __extension__ __PRETTY_FUNCTION__)
; }))
;
978 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
979 flag = 1;
980 }
981 ccv_array_clear(parameter_indices);
982 }
983 ccv_array_free(parameter_indices);
984 return flag;
985}
986
987static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
988{
989 if (new_saved_aux_size == old_saved_aux_size)
990 return;
991 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 991, __extension__ __PRETTY_FUNCTION__)
; }))
;
992 int i, j;
993 for (i = parameter_size - 1; i >= 0; i--)
994 {
995 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
996 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
997 for (j = old_saved_aux_size - 1; j >= 0; j--)
998 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
999 }
1000}
1001
1002static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1003{
1004 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1005 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1005, __extension__ __PRETTY_FUNCTION__); }))
;
1006 if (!compiled_data->rewindables)
1007 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1008 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1009 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1010 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1011}
1012
1013static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1014{
1015 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1016 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1016, __extension__ __PRETTY_FUNCTION__
); }))
;
1017 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1017, __extension__ __PRETTY_FUNCTION__
); }))
;
1018 const int evaluate_to_size = compiled_data->evaluate.to_size;
1019 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1019, __extension__ __PRETTY_FUNCTION__
); }))
;
1020 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1021 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1022 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1023 int i, j;
1024 const int output_size = model->output_size;
1025 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1025, __extension__ __PRETTY_FUNCTION__
); }))
;
1026 if (fits)
1027 for (i = 0; i < output_size; i++)
1028 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1029 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1030 const int parameter_size = compiled_data->parameters->rnum;
1031 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1032 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1033 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1034 int parameter_size_maybe_more = parameter_size;
1035 compiled_data->disable_outgrad = disable_outgrad;
1036 int outgrad_size;
1037 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1038 outgrad_size = 0;
1039 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1040 outgrad_size = model->input_size;
1041 else {
1042 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1042, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1043 outgrad_size = 0;
1044 for (i = 0; i < model->input_size; i++)
1045 if (!(disable_outgrad & ((uint64_t)1 << i)))
1046 ++outgrad_size;
1047 }
1048 compiled_data->outgrad_size = outgrad_size;
1049 parameter_size_maybe_more += outgrad_size;
1050 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1051 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1052 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1053 compiled_data->backward.to_size = parameter_size_maybe_more;
1054 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1055 if (compiled_data->parameter_flags)
1056 {
1057 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1058 for (i = 0; i < parameter_size; i++)
1059 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1060 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1061 else
1062 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1063 }
1064 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1065 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1066 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1067 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1068 else { // Compute minimize with gradients including selected inputs.
1069 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1069, __extension__ __PRETTY_FUNCTION__
); }))
;
1070 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1070, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1071 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1071, __extension__ __PRETTY_FUNCTION__
); }))
;
1072 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1073 j = 0;
1074 for (i = 0; i < model->input_size; i++)
1075 if (!(disable_outgrad & ((uint64_t)1 << i)))
1076 outgrads[j++] = model->inputs[i];
1077 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1078 }
1079 if (compiled_data->parameter_flags)
1080 ccfreefree(parameters);
1081 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1082 if (compiled_data->minimize.parameters)
1083 _ccv_cnnp_apply_parameters_with_minimizer(model);
1084 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1085 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1086 for (i = 0; i < output_size; i++)
1087 {
1088 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1089 // Init this to 1 so we can backprop.
1090 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1091 }
1092 compiled_data->backward.to_size = 0;
1093 for (i = 0; i < parameter_size_maybe_more; i++)
1094 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1095 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1096 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1097 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1098 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1099 {
1100 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1101 continue;
1102 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1103 const int* tos;
1104 int to_size;
1105 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1106 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1107 {
1108 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1109 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1110 int flag = 0;
1111 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1112 for (j = i - 1; !flag && j >= 0; j--)
1113 if (j + outgrad_destination_start < destination_count)
1114 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1115 if (!flag) // Only if we cannot find it, we add it.
1116 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1117 }
1118 }
1119 if (parallel_count > 1)
1120 {
1121 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1122 0, 0,
1123 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1124 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1125 0, 0, 0,
1126 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1127 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1128 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1129 for (i = 0; i < evaluate_to_size; i++)
1130 for (j = 1; j < parallel_count; j++)
1131 {
1132 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1133 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1134 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1135 }
1136 const int backward_to_size = compiled_data->backward.to_size;
1137 for (i = 0; i < backward_to_size; i++)
1138 for (j = 1; j < parallel_count; j++)
1139 {
1140 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1141 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1142 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1143 }
1144 }
1145 // Only use memory compression if we are in gradient parameter mode.
1146 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1147 {
1148 if (model->memory_compression)
1149 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1150 if (model->memory_reduction)
1151 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1152 }
1153 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1154 compiled_data->gradient_mode = gradient_mode;
1155}
1156
1157void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1158{
1159 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1159, __extension__ __PRETTY_FUNCTION__
); }))
;
1160 const int parameter_size = compiled_data->parameters->rnum;
1161 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1162 const int internal_size = compiled_data->internals->rnum;
1163 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1164 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1165 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1166 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1167}
1168
1169int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1170{
1171 int i, j;
1172 const int parameter_size = compiled_data->parameters->rnum;
1173 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1174 const int internal_size = compiled_data->internals->rnum;
1175 for (i = 0; i < parameter_size; i++)
1176 {
1177 // parameters has to be allocated all together.
1178 if (compiled_data->tensors.parameters[i])
1179 {
1180 for (j = 1; j < parallel_count; j++)
1181 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1181, __extension__ __PRETTY_FUNCTION__
); }))
; }
1182 continue;
1183 }
1184 return 1;
1185 }
1186 for (i = 0; i < internal_size; i++)
1187 {
1188 if (!compiled_data->tensors.internals[i])
1189 return 1;
1190 for (j = 1; j < parallel_count; j++)
1191 if (!compiled_data->tensors.internals[i + j * internal_size])
1192 return 1;
1193 }
1194 return 0;
1195}
1196
1197void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1198{
1199 int i, j;
1200 const int parameter_size = compiled_data->parameters->rnum;
1201 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1202 const int internal_size = compiled_data->internals->rnum;
1203 for (i = 0; i < parameter_size; i++)
1204 {
1205 // parameters has to be allocated all together.
1206 if (compiled_data->tensors.parameters[i])
1207 {
1208 for (j = 1; j < parallel_count; j++)
1209 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1209, __extension__ __PRETTY_FUNCTION__
); }))
; }
1210 continue;
1211 }
1212 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1213 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1214 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1215 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1216 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1217 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1218 for (j = 1; j < parallel_count; j++)
1219 {
1220 if (j != device_id)
1221 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1222 else
1223 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1224 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1225 }
1226 }
1227 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1228 for (i = 0; i < internal_size; i++)
1229 {
1230 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1231 const int d = retained.d;
1232 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1233 continue;
1234 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1235 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1236 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1237 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1238 if (!compiled_data->tensors.internals[i])
1239 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1240 for (j = 1; j < parallel_count; j++)
1241 {
1242 if (j != device_id)
1243 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1244 else
1245 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1246 if (!compiled_data->tensors.internals[i + j * internal_size])
1247 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1248 }
1249 }
1250 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1251}
1252
1253static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1254{
1255 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1256 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1257}
1258
1259static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1260{
1261 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1261, __extension__ __PRETTY_FUNCTION__
); }))
;
1262 int i, j;
1263 for (i = 0; i < tensor_size; i++)
1264 {
1265 if (!tensors[i])
1266 continue;
1267 const int d = tensor_symbols[i].d;
1268 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1269 continue;
1270 for (j = 1; j < parallel_count; j++)
1271 if (tensors[i + j * tensor_size])
1272 {
1273 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1274 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1275 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1276 }
1277 }
1278}
1279
1280static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1281{
1282 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1282, __extension__ __PRETTY_FUNCTION__
); }))
;
1283 int i, j;
1284 for (i = 0; i < tensor_size; i++)
1285 {
1286 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1287 for (j = 1; j < parallel_count; j++)
1288 {
1289 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1290 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1291 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1292 { // We shouldn't allocate this, free it up.
1293 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1294 tensors[i + j * tensor_size] = 0;
1295 }
1296 }
1297 }
1298}
1299
1300static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1301{
1302 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1302, __extension__ __PRETTY_FUNCTION__
); }))
;
1303 int i, j;
1304 for (i = 0; i < tensor_size; i++)
1305 {
1306 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1307 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1308 continue;
1309 if (graph)
1310 {
1311 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1312 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1313 tensor_symbol = alias_to;
1314 }
1315 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1316 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1317 {
1318 const ccv_nnc_tensor_bind_t retained_bind = {
1319 .symbol = tensor_symbol,
1320 .tensor = tensor
1321 };
1322 ccv_array_push(tensor_binds, &retained_bind);
1323 }
1324 for (j = 1; j < parallel_count; j++)
1325 {
1326 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1327 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1328 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1329 {
1330 const ccv_nnc_tensor_bind_t bind = {
1331 .symbol = copy,
1332 .tensor = tensors[i + j * tensor_size]
1333 };
1334 ccv_array_push(tensor_binds, &bind);
1335 }
1336 }
1337 }
1338}
1339
1340static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1341{
1342 if (compiled_data->graph)
1343 ccv_nnc_graph_free(compiled_data->graph);
1344 compiled_data->graph = 0;
1345 compiled_data->is_test = 0;
1346 if (compiled_data->tensor_arena)
1347 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1348 compiled_data->tensor_arena = 0;
1349 if (compiled_data->graph_exec_arena)
1350 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1351 compiled_data->graph_exec_arena = 0;
1352 if (compiled_data->backward.from_ops)
1353 ccfreefree(compiled_data->backward.from_ops);
1354 compiled_data->backward.from_ops = 0;
1355 if (compiled_data->evaluate.schedule)
1356 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1357 compiled_data->evaluate.schedule = 0;
1358 if (compiled_data->backward.schedule)
1359 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1360 compiled_data->backward.schedule = 0;
1361}
1362
1363static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1364{
1365 if (compiled_data->gradients)
1366 ccfreefree(compiled_data->gradients);
1367 compiled_data->gradients = 0;
1368 if (compiled_data->updated_parameters)
1369 ccfreefree(compiled_data->updated_parameters);
1370 compiled_data->updated_parameters = 0;
1371 compiled_data->update_nodes = 0;
1372 compiled_data->saved_aux = 0;
1373}
1374
1375static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1376{
1377 if (compiled_data->backward.gradients)
1378 ccfreefree(compiled_data->backward.gradients);
1379 compiled_data->backward.gradients = 0;
1380 if (compiled_data->backward.accum)
1381 ccv_nnc_graph_free(compiled_data->backward.accum);
1382 compiled_data->backward.accum = 0;
1383 if (compiled_data->backward.tensor_arena)
1384 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1385 compiled_data->backward.tensor_arena = 0;
1386 if (compiled_data->backward.graph_exec_arena)
1387 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1388 compiled_data->backward.graph_exec_arena = 0;
1389}
1390
1391static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1392{
1393 if (compiled_data->apply_gradients.graph)
1394 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1395 compiled_data->apply_gradients.graph = 0;
1396 if (compiled_data->apply_gradients.tensor_arena)
1397 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1398 compiled_data->apply_gradients.tensor_arena = 0;
1399 if (compiled_data->apply_gradients.graph_exec_arena)
1400 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1401 compiled_data->apply_gradients.graph_exec_arena = 0;
1402}
1403
1404// Compile the graph to run ccv_cnnp_model_fit
1405static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1406{
1407 int i, j;
1408 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1409 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1409, __extension__ __PRETTY_FUNCTION__
); }))
;
1410 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1411 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1412 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1412, __extension__ __PRETTY_FUNCTION__
); }))
;
1413 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1413
, __extension__ __PRETTY_FUNCTION__); }))
;
1414 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1414, __extension__ __PRETTY_FUNCTION__
); }))
;
1415 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1416 {
1417 _ccv_cnnp_model_set_rewindables(model);
1418 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1419 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1420 _ccv_cnnp_model_rewind_graph(model);
1421 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1422 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1423 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1424 }
1425 const int tensors_init = !!compiled_data->tensors_init.v;
1426 if (!tensors_init)
1427 _ccv_cnnp_model_tensors_init(model, compiled_data);
1428 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1429 // Check if it is not fully allocated, if it is not, init_1.
1430 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1431 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1432 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1432, __extension__ __PRETTY_FUNCTION__); }))
;
1433 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1433, __extension__ __PRETTY_FUNCTION__); }))
;
1434 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1434
, __extension__ __PRETTY_FUNCTION__); }))
;
1435 const int input_size_per_p = input_size / parallel_count;
1436 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1437 const int output_size_per_p = output_size / parallel_count;
1438 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1439 const int fit_size_per_p = fit_size / parallel_count;
1440 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1441 const int parameter_size = compiled_data->parameters->rnum;
1442 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1443 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1444 const int internal_size = compiled_data->internals->rnum;
1445 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1446 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1447 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1448 ccv_array_free(tensor_binds);
1449 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1450 if (tensors_init && parallel_count > 1)
1451 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1452 // If tensor is not init'ed, we need to init states first.
1453 if (_ccv_cnnp_any_to_init(compiled_data))
1454 {
1455 ccv_nnc_tensor_init_states_t tensor_init_states = {
1456 .parallel_count = parallel_count,
1457 .graph = model->graph,
1458 .compiled_data = compiled_data,
1459 .tensor_arena = compiled_data->tensor_arena
1460 };
1461 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1462 }
1463 compiled_data->is_test = 0;
1464 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1465 // No need to set because it is default to training mode.
1466 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1467 for (i = 0; i < saved_aux_size * parameter_size; i++)
1468 {
1469 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1470 continue;
1471 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1472 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1473 for (j = 1; j < parallel_count; j++)
1474 {
1475 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1476 if (copy)
1477 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1478 }
1479 }
1480 const int evaluate_to_size = compiled_data->evaluate.to_size;
1481 compiled_data->evaluate.to_op_size = 0;
1482 for (i = 0; i < evaluate_to_size; i++)
1483 {
1484 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1485 if (to.graph)
1486 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1487 }
1488 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1489 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1490}
1491
1492ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1493{
1494 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1495 if (!compiled_data || !compiled_data->graph)
1496 return 0;
1497 return ccv_nnc_graph_default_stream(compiled_data->graph);
1498}
1499
1500uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1501{
1502 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1503 if (!compiled_data || !compiled_data->tensor_arena)
1504 return 0;
1505 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1506}
1507
1508static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1509{
1510 int i, j;
1511 for (i = 0; i < tensor_size; i++)
1512 {
1513 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1514 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1515 continue;
1516 if (graph)
1517 {
1518 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1519 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1520 tensor_symbol = alias_to;
1521 }
1522 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1523 for (j = 1; j < parallel_count; j++)
1524 {
1525 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1526 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1527 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1528 }
1529 }
1530}
1531
1532void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1533{
1534 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1535 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1535, __extension__ __PRETTY_FUNCTION__); }))
;
1536 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1537 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1537, __extension__ __PRETTY_FUNCTION__
); }))
;
1538 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1538, __extension__ __PRETTY_FUNCTION__
); }))
;
1539 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1539
, __extension__ __PRETTY_FUNCTION__); }))
;
1540 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1540, __extension__ __PRETTY_FUNCTION__); }))
;
1541 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1542 {
1543 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1544 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1545 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1546 // Compile the symbolic graph down only when needed.
1547 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1548 } else {
1549 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1549, __extension__ __PRETTY_FUNCTION__); }))
;
1550 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1550, __extension__ __PRETTY_FUNCTION__); }))
;
1551 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1551
, __extension__ __PRETTY_FUNCTION__); }))
;
1552 const int input_size_per_p = input_size / parallel_count;
1553 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1554 const int output_size_per_p = output_size / parallel_count;
1555 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1556 const int fit_size_per_p = fit_size / parallel_count;
1557 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1558 }
1559 if (compiled_data->is_test)
1560 {
1561 compiled_data->is_test = 0;
1562 ccv_nnc_graph_exec_update_t update = {
1563 .parallel_count = parallel_count,
1564 .graph = model->graph,
1565 .graph_exec_arena = compiled_data->graph_exec_arena,
1566 };
1567 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1568 }
1569 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1570}
1571
1572// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1573static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1574{
1575 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1576 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1577 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1578 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1578, __extension__ __PRETTY_FUNCTION__
); }))
;
1579 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1579, __extension__ __PRETTY_FUNCTION__
); }))
;
1580 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1581 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1582 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1583 {
1584 const int evaluate_to_size = compiled_data->evaluate.to_size;
1585 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1586 _ccv_cnnp_model_set_rewindables(model);
1587 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1588 0, 0,
1589 0, 0, 0,
1590 0, 0, 0,
1591 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1592 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1593 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1594 int i, j;
1595 for (i = 0; i < evaluate_to_size; i++)
1596 for (j = 1; j < parallel_count; j++)
1597 {
1598 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1599 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1600 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1601 }
1602 }
1603 const int tensors_init = !!compiled_data->tensors_init.v;
1604 if (!tensors_init)
1605 _ccv_cnnp_model_tensors_init(model, compiled_data);
1606 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1607 // Check if it is not fully allocated, if it is not, init_1.
1608 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1609 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1610 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1610, __extension__ __PRETTY_FUNCTION__); }))
;
1611 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1611, __extension__ __PRETTY_FUNCTION__); }))
;
1612 const int input_size_per_p = input_size / parallel_count;
1613 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1614 const int output_size_per_p = output_size / parallel_count;
1615 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1616 const int parameter_size = compiled_data->parameters->rnum;
1617 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1618 const int internal_size = compiled_data->internals->rnum;
1619 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1620 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1621 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1622 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1623 ccv_array_free(tensor_binds);
1624 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1625 // If tensor is not init'ed, we need to init states first.
1626 if (tensors_init && parallel_count > 1)
1627 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1628 if (_ccv_cnnp_any_to_init(compiled_data))
1629 {
1630 ccv_nnc_tensor_init_states_t tensor_init_states = {
1631 .parallel_count = parallel_count,
1632 .graph = model->graph,
1633 .compiled_data = compiled_data,
1634 .tensor_arena = compiled_data->tensor_arena
1635 };
1636 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1637 }
1638 compiled_data->is_test = 1;
1639 ccv_nnc_graph_exec_update_t update = {
1640 .parallel_count = parallel_count,
1641 .graph = model->graph,
1642 .graph_exec_arena = compiled_data->graph_exec_arena,
1643 };
1644 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1645 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1646 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1647}
1648
1649static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1650{
1651 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1651, __extension__ __PRETTY_FUNCTION__
); }))
;
1652 const int parameter_size = compiled_data->parameters->rnum;
1653 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1654 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1655 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1656 int i, j;
1657 for (i = 0; i < parameter_size; i++)
1658 {
1659 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1660 {
1661 compiled_data->tensors.gradients[i] = 0;
1662 compiled_data->tensors.accum_gradients[i] = 0;
1663 for (j = 1; j < parallel_count; j++)
1664 {
1665 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1666 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1667 }
1668 continue;
1669 }
1670 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1671 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1672 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1673 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1674 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1675 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1676 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1677 for (j = 1; j < parallel_count; j++)
1678 {
1679 if (j != device_id)
1680 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1681 else
1682 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1683 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1684 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1685 }
1686 }
1687}
1688
1689static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1690{
1691 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1692 return 1;
1693 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1694 return 0;
1695 int i;
1696 for (i = 0; i < input_size; i++)
1697 if (!(disable_outgrad & ((uint64_t)1 << i)))
1698 return 0;
1699 return 1;
1700}
1701
1702// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1703// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1704static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1705{
1706 int i, j;
1707 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1708 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1709 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1709, __extension__ __PRETTY_FUNCTION__
); }))
;
1710 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1711 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1712 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1712, __extension__ __PRETTY_FUNCTION__
); }))
;
1713 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1713, __extension__ __PRETTY_FUNCTION__
); }))
;
1714 // There shouldn't be a loss function if we evaluate with multistage jit.
1715 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1715, __extension__ __PRETTY_FUNCTION__
); }))
;
1716 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1717 {
1718 _ccv_cnnp_model_set_rewindables(model);
1719 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1720 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1721 _ccv_cnnp_model_rewind_graph(model);
1722 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1723 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1724 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1725 }
1726 const int tensors_init = !!compiled_data->tensors_init.v;
1727 if (!tensors_init)
1728 _ccv_cnnp_model_tensors_init(model, compiled_data);
1729 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1730 // Check if it is not fully allocated, if it is not, init_1.
1731 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1732 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1733 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1733, __extension__ __PRETTY_FUNCTION__); }))
;
1734 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1734, __extension__ __PRETTY_FUNCTION__); }))
;
1735 const int input_size_per_p = input_size / parallel_count;
1736 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1737 const int output_size_per_p = output_size / parallel_count;
1738 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1739 const int parameter_size = compiled_data->parameters->rnum;
1740 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1741 const int internal_size = compiled_data->internals->rnum;
1742 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1743 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1744 if (!compiled_data->tensors.gradients)
1745 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1746 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1747 if (compiled_data->backward.to_size > 0)
1748 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1749 else
1750 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1751 ccv_array_free(tensor_binds);
1752 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1753 if (tensors_init && parallel_count > 1)
1754 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1755 // If tensor is not init'ed, we need to init states first.
1756 if (_ccv_cnnp_any_to_init(compiled_data))
1757 {
1758 ccv_nnc_tensor_init_states_t tensor_init_states = {
1759 .parallel_count = parallel_count,
1760 .graph = model->graph,
1761 .compiled_data = compiled_data,
1762 .tensor_arena = compiled_data->tensor_arena
1763 };
1764 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1765 }
1766 compiled_data->is_test = is_test;
1767 ccv_nnc_graph_exec_update_t update = {
1768 .parallel_count = parallel_count,
1769 .graph = model->graph,
1770 .graph_exec_arena = compiled_data->graph_exec_arena,
1771 };
1772 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1773 const int evaluate_to_size = compiled_data->evaluate.to_size;
1774 compiled_data->evaluate.to_op_size = 0;
1775 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1776 for (i = 0; i < evaluate_to_size; i++)
1777 {
1778 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1779 if (to_op.graph)
1780 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1781 const int* tos;
1782 int to_size;
1783 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1784 for (j = 0; j < to_size; j++)
1785 {
1786 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1787 .d = tos[j],
1788 .graph = model->graph
1789 });
1790 if (to_op.graph)
1791 ccv_array_add_unique_int(backward_from, to_op.d);
1792 }
1793 }
1794 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1794, __extension__
__PRETTY_FUNCTION__); }))
;
1795 compiled_data->backward.from_op_size = backward_from->rnum;
1796 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1797 for (i = 0; i < backward_from->rnum; i++)
1798 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1799 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1800 .graph = compiled_data->graph,
1801 };
1802 ccv_array_free(backward_from);
1803 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1804 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1805}
1806
1807void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1808{
1809 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1810 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1810, __extension__ __PRETTY_FUNCTION__); }))
;
1811 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1812 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__
); }))
;
1813 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1813, __extension__ __PRETTY_FUNCTION__
); }))
;
1814 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1814, __extension__ __PRETTY_FUNCTION__); }))
;
1815 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1816 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1817 if (!compiled_data->graph || mode_mismatch)
1818 {
1819 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1820 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1821 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1822 if (params.requires_grad)
1823 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1824 else
1825 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1826 } else {
1827 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1828 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1828, __extension__ __PRETTY_FUNCTION__); }))
;
1829 const int input_size_per_p = input_size / parallel_count;
1830 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1831 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1831, __extension__ __PRETTY_FUNCTION__); }))
;
1832 const int output_size_per_p = output_size / parallel_count;
1833 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1834 }
1835 if (compiled_data->is_test != params.is_test)
1836 {
1837 compiled_data->is_test = params.is_test;
1838 ccv_nnc_graph_exec_update_t update = {
1839 .parallel_count = parallel_count,
1840 .graph = model->graph,
1841 .graph_exec_arena = compiled_data->graph_exec_arena,
1842 };
1843 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1844 }
1845}
1846
1847void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1848{
1849 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1850 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1850, __extension__ __PRETTY_FUNCTION__); }))
;
1851 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1852 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1853 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1854 else {
1855 if (!compiled_data->evaluate.schedule)
1856 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1857 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1858 }
1859}
1860
1861// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1862// Particularly, this method compiles the accumulator graph.
1863static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1864{
1865 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1866 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1866, __extension__ __PRETTY_FUNCTION__); }))
;
1867 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1867, __extension__ __PRETTY_FUNCTION__
); }))
;
1868 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1869 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1870 const int parameter_size = compiled_data->parameters->rnum;
1871 int i, j;
1872 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1873 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1874 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1875 for (i = 0; i < parameter_size; i++)
1876 for (j = 0; j < parallel_count; j++)
1877 if (compiled_data->tensors.gradients[i + j * parameter_size])
1878 {
1879 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1880 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1881 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1882 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1883 ccv_nnc_tensor_symbol_t inputs[2];
1884 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1885 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1886 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1887 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1888 } else {
1889 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1890 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1891 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1892 }
1893 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1894 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1895 {
1896 ccv_nnc_symbolic_graph_free(accum);
1897 // Create empty graph.
1898 compiled_data->backward.accum = ccv_nnc_graph_new();
1899 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1900 return;
1901 }
1902 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1903 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1904 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1905 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1906 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1907 ccv_nnc_symbolic_graph_free(accum);
1908 ccv_array_free(tensor_binds);
1909 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1910}
1911
1912void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1913{
1914 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1915 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1915, __extension__ __PRETTY_FUNCTION__); }))
;
1916 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1916, __extension__ __PRETTY_FUNCTION__
); }))
;
1917 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1918 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1918, __extension__ __PRETTY_FUNCTION__
); }))
;
1919 if (outgrad_size > 0)
1920 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1920, __extension__ __PRETTY_FUNCTION__
); }))
; }
1921 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1921, __extension__ __PRETTY_FUNCTION__); }))
;
1922 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1922, __extension__ __PRETTY_FUNCTION__
); }))
;
1923 const int parameter_size = compiled_data->parameters->rnum;
1924 // If we need to accumulate the gradients now, do jit on accumulator.
1925 if (compiled_data->backward.count > 0)
1926 {
1927 if (!compiled_data->backward.accum)
1928 _ccv_cnnp_model_multistage_jit_1(model);
1929 else if (compiled_data->backward.count == 1) {
1930 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1931 int i;
1932 for (i = 0; i < parameter_size * parallel_count; i++)
1933 {
1934 ccv_nnc_tensor_t* tensor;
1935 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1936 }
1937 if (compiled_data->backward.tensor_arena)
1938 {
1939 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1940 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1941 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1942 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1943 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1944 }
1945 }
1946 }
1947 const int ingrad_size_per_p = model->output_size;
1948 const int outgrad_size_per_p = compiled_data->outgrad_size;
1949 int i, j;
1950 for (i = 0; i < ingrad_size_per_p; i++)
1951 {
1952 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1953 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1954 {
1955 // Set it to 1 if it is not specified.
1956 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1957 if (ingrad_tensor)
1958 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1959 for (j = 1; j < parallel_count; j++)
1960 {
1961 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
1962 if (ingrad_tensor)
1963 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1964 }
1965 } else {
1966 // Make sure the length matches, in case it is an alias.
1967 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 1967, __extension__ __PRETTY_FUNCTION__
); }))
;
1968 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
1969 for (j = 1; j < parallel_count; j++)
1970 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
1971 }
1972 }
1973 if (outgrad_size > 0)
1974 {
1975 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 1975, __extension__ __PRETTY_FUNCTION__
); }))
;
1976 for (i = 0; i < outgrad_size_per_p; i++)
1977 if (outgrads[i])
1978 {
1979 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
1980 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
1981 for (j = 1; j < parallel_count; j++)
1982 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
1983 }
1984 } else {
1985 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
1986 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
;
1987 }
1988 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
1989 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
1990 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
1991 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1992 if (!compiled_data->backward.schedule)
1993 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
1994 // Run the backward pass.
1995 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
1996 // If we need to run accumulation round, do that now.
1997 if (compiled_data->backward.count > 0)
1998 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
1999 // Update the count, this determines whether we need to accumulate or not.
2000 ++compiled_data->backward.count;
2001}
2002
2003// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2004// Particularly, this method compiles the parameter update graph.
2005static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2006{
2007 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2008 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2008, __extension__ __PRETTY_FUNCTION__
); }))
;
2009 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2010 const int parameter_size = compiled_data->parameters->rnum;
2011 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2012 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2013 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2014 // Bind accumulated gradients.
2015 if (compiled_data->backward.count > 1)
2016 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2017 else
2018 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2019 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2020 int i, j;
2021 for (i = 0; i < compiled_data->backward.to_size; i++)
2022 {
2023 const int* tos;
2024 int to_size;
2025 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2026 for (j = 0; j < to_size; j++)
2027 {
2028 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2029 // gradients graph.
2030 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2031 .d = tos[j],
2032 .graph = model->graph,
2033 });
2034 if (!exec.graph)
2035 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2036 }
2037 }
2038 const int from_size = apply_gradients_from->rnum;
2039 if (from_size == 0)
2040 {
2041 ccv_array_free(apply_gradients_from);
2042 ccv_array_free(tensor_binds);
2043 return;
2044 }
2045 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2046 for (i = 0; i < from_size; i++)
2047 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2048 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2049 .graph = model->graph
2050 };
2051 ccv_array_free(apply_gradients_from);
2052 // It can only ends with updates on the parameters.
2053 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2054 for (i = 0; i < parameter_size; i++)
2055 {
2056 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2057 continue;
2058 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2059 for (j = 1; j < parallel_count; j++)
2060 {
2061 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2062 ccv_array_push(tos, &copy);
2063 }
2064 }
2065 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2066 ccv_array_free(tos);
2067 ccv_array_free(tensor_binds);
2068 ccfreefree(froms);
2069 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2070 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2071 {
2072 // Skip on no tensor.
2073 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2074 continue;
2075 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2076 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2077 for (j = 1; j < parallel_count; j++)
2078 {
2079 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2080 if (copy)
2081 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2082 }
2083 }
2084 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2085}
2086
2087void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2088{
2089 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2090 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2090, __extension__ __PRETTY_FUNCTION__); }))
;
2091 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2091, __extension__ __PRETTY_FUNCTION__
); }))
;
2092 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2093 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2093, __extension__ __PRETTY_FUNCTION__); }))
;
2094 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2094, __extension__ __PRETTY_FUNCTION__
); }))
;
2095 // Skip if there is no backward pass.
2096 if (compiled_data->backward.count <= 0)
2097 return;
2098 // Skip if there is no parameters.
2099 if (compiled_data->parameters->rnum == 0)
2100 {
2101 compiled_data->backward.count = 0;
2102 return;
2103 }
2104 if (!compiled_data->apply_gradients.graph)
2105 _ccv_cnnp_model_multistage_jit_2(model);
2106 else {
2107 const int parameter_size = compiled_data->parameters->rnum;
2108 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2109 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2110 if (compiled_data->backward.count > 1)
2111 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2112 else
2113 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2114 }
2115 if (compiled_data->apply_gradients.graph)
2116 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2117 // Reset backward count to 0.
2118 compiled_data->backward.count = 0;
2119}
2120
2121void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2122{
2123 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2124 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2125 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2125, __extension__ __PRETTY_FUNCTION__
); }))
;
2126 const int tensors_init = !!compiled_data->tensors_init.v;
2127 if (!tensors_init)
2128 _ccv_cnnp_model_tensors_init(model, compiled_data);
2129 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2130 // Check if it is not fully allocated, if it is not, init_1.
2131 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2132 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2133 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2134 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2135 if (param_ref < 0)
2136 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2136
, __extension__ __PRETTY_FUNCTION__); }))
; }
2137 else
2138 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2138, __extension__ __PRETTY_FUNCTION__
); }))
; }
2139 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2140 ccv_array_free(parameter_indices);
2141 const int parameter_size = compiled_data->parameters->rnum;
2142 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2142
, __extension__ __PRETTY_FUNCTION__); }))
;
2143 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2143, __extension__ __PRETTY_FUNCTION__
); }))
;
2144 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2145 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2146 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2146, __extension__
__PRETTY_FUNCTION__); }))
;
2147 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2148 int i;
2149 for (i = 1; i < parallel_count; i++)
2150 {
2151 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2152 if (copy_tensor)
2153 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2154 }
2155 // Mark this symbol as init'ed.
2156 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2157 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2158 init_v[s >> 5] |= (1u << (s & 0x1f));
2159}
2160
2161void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2162{
2163 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2164 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2165 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2165, __extension__ __PRETTY_FUNCTION__
); }))
;
2166 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2166, __extension__ __PRETTY_FUNCTION__
); }))
;
2167 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2168 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2169 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2170 if (param_ref < 0)
2171 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2171
, __extension__ __PRETTY_FUNCTION__); }))
; }
2172 else
2173 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2173, __extension__ __PRETTY_FUNCTION__
); }))
; }
2174 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2175 ccv_array_free(parameter_indices);
2176 const int parameter_size = compiled_data->parameters->rnum;
2177 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2177
, __extension__ __PRETTY_FUNCTION__); }))
;
2178 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2178, __extension__ __PRETTY_FUNCTION__
); }))
;
2179 // We don't need to consider parallel_count, every parameter on each device is identical.
2180 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2181 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2181, __extension__
__PRETTY_FUNCTION__); }))
;
2182 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2183}
2184
2185ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2186{
2187 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2188 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2189 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2189, __extension__ __PRETTY_FUNCTION__
); }))
;
2190 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2190, __extension__ __PRETTY_FUNCTION__
); }))
;
2191 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2192 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2193 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2194 if (param_ref < 0)
2195 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2195
, __extension__ __PRETTY_FUNCTION__); }))
; }
2196 else
2197 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2197, __extension__ __PRETTY_FUNCTION__
); }))
; }
2198 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2199 ccv_array_free(parameter_indices);
2200 const int parameter_size = compiled_data->parameters->rnum;
2201 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2201
, __extension__ __PRETTY_FUNCTION__); }))
;
2202 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2202, __extension__ __PRETTY_FUNCTION__
); }))
;
2203 // We don't need to consider parallel_count, every parameter on each device is identical.
2204 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2205 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2205, __extension__
__PRETTY_FUNCTION__); }))
;
2206 return tensor->info;
2207}
2208
2209const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2210{
2211 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2212 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2213 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2213, __extension__ __PRETTY_FUNCTION__
); }))
;
2214 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2215 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2216 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2217 if (param_ref < 0)
2218 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2218
, __extension__ __PRETTY_FUNCTION__); }))
; }
2219 else
2220 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2220, __extension__ __PRETTY_FUNCTION__
); }))
; }
2221 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2222 ccv_array_free(parameter_indices);
2223 const int parameter_size = compiled_data->parameters->rnum;
2224 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2224
, __extension__ __PRETTY_FUNCTION__); }))
;
2225 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2225, __extension__ __PRETTY_FUNCTION__
); }))
;
2226 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2227}
2228
2229int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2230{
2231 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2231, __extension__ __PRETTY_FUNCTION__
); }))
;
2232 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2233 return compiled_data->parameters->rnum;
2234}
2235
2236ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2237{
2238 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2239 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2239, __extension__ __PRETTY_FUNCTION__); }))
;
2240 const int parameter_size = compiled_data->parameters->rnum;
2241 int i;
2242 for (i = 0; i < parameter_size; i++)
2243 {
2244 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2245 if (first(model, name, context))
2246 return ccv_cnnp_model_parameters(model, -1, i);
2247 }
2248 return 0;
2249}
2250
2251ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2252{
2253 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2254 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2254, __extension__ __PRETTY_FUNCTION__); }))
;
2255 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2256 const int parameter_size = compiled_data->parameters->rnum;
2257 int i;
2258 for (i = 0; i < parameter_size; i++)
2259 {
2260 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2261 if (filter(model, name, context))
2262 {
2263 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2264 ccv_array_push(parameters, &parameter);
2265 }
2266 }
2267 return parameters;
2268
2269}
2270
2271static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2272{
2273 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2274 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2274, __extension__
__PRETTY_FUNCTION__); }))
;
2275 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2276 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2277 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2278 return to_parameter_indices;
2279}
2280
2281static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2282{
2283 // If the model is not compiled yet. Compile them now.
2284 if (!model->graph)
2285 {
2286 model->graph = ccv_nnc_symbolic_graph_new();
2287 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2287, __extension__ __PRETTY_FUNCTION__
); }))
;
2288 const int input_size = from_model->input_size;
2289 ccv_nnc_tensor_param_t input_params[input_size];
2290 int i;
2291 for (i = 0; i < input_size; i++)
2292 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2293 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2294 model->parallel_count = from_model->parallel_count;
2295 model->memory_compression = from_model->memory_compression;
2296 model->memory_reduction = from_model->memory_reduction;
2297 model->gradient_checkpointing = from_model->gradient_checkpointing;
2298 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2299 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2300 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2301 }
2302 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2303 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2303, __extension__ __PRETTY_FUNCTION__
); }))
;
2304 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2305 if (!to_tensors_init)
2306 {
2307 if (only_init_0)
2308 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2309 else
2310 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2311 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2312 // Check if it is not fully allocated, if it is not, init_1.
2313 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2314 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2314, __extension__ __PRETTY_FUNCTION__
); }))
;
2315 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2316 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2317 if (*from_param_ref < 0 && *param_ref >= 0)
2318 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2318, __extension__ __PRETTY_FUNCTION__
); }))
; }
2319 else if (*from_param_ref >= 0)
2320 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2320, __extension__ __PRETTY_FUNCTION__
); }))
; }
2321 if (*param_ref < 0 && *from_param_ref >= 0)
2322 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2322, __extension__ __PRETTY_FUNCTION__); }))
; }
2323 else if (*param_ref >= 0)
2324 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2324, __extension__ __PRETTY_FUNCTION__
); }))
; }
2325}
2326
2327void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2328{
2329 ccv_array_t* to_parameter_indices;
2330 int to_param_ref;
2331 ccv_array_t* from_parameter_indices;
2332 int from_param_ref;
2333 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2334 // Should be exactly the same tensor.
2335 if (to_param_ref < 0 && from_param_ref < 0)
2336 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2336, __extension__ __PRETTY_FUNCTION__
); }))
; }
2337 // To models.
2338 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2339 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2339, __extension__ __PRETTY_FUNCTION__
); }))
;
2340 // From models.
2341 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2342 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2343 const int to_parameter_size = to_compiled_data->parameters->rnum;
2344 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2345 int i, j;
2346 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2347 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2348 for (i = 0; i < rnum; i++)
2349 {
2350 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2351 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2351, __extension__ __PRETTY_FUNCTION__); }))
;
2352 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2352, __extension__ __PRETTY_FUNCTION__
); }))
;
2353 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2354 // If the original is not init'ed. We cannot copy from.
2355 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2356 continue;
2357 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2358 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2358, __extension__ __PRETTY_FUNCTION__); }))
;
2359 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2359, __extension__ __PRETTY_FUNCTION__
); }))
;
2360 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2361 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2361, __extension__
__PRETTY_FUNCTION__); }))
;
2362 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2363 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2363, __extension__
__PRETTY_FUNCTION__); }))
;
2364 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2365 for (j = 1; j < parallel_count; j++)
2366 {
2367 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2368 if (copy_tensor)
2369 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2370 }
2371 // Mark this symbol as init'ed.
2372 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2373 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2374 }
2375 ccv_array_free(to_parameter_indices);
2376 ccv_array_free(from_parameter_indices);
2377}
2378
2379KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
25
Null pointer value stored to field 'vals'
2380
2381void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2382{
2383 ccv_array_t* to_parameter_indices;
2384 int to_param_ref;
2385 ccv_array_t* from_parameter_indices;
2386 int from_param_ref;
2387 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2388 // Should be exactly the same tensor.
2389 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2390 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2390, __extension__ __PRETTY_FUNCTION__
); }))
; }
2391 // To models.
2392 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2393 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2393, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2394 // From models.
2395 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2396 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2397 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2397, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2398 const int from_parameter_size = from_compiled_data->parameters->rnum;
2399 const int to_parameter_size = to_compiled_data->parameters->rnum;
2400 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2401 int i, j;
2402 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2403 char* updated_name = 0;
2404 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2405 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2406 for (i = 0; i < rnum; i++)
2407 {
2408 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is >= 0
11
'?' condition is true
12
Assuming the condition is false
13
'?' condition is false
2409 // Need to figure out how to use the renamer here.
2410 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2411 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2411, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2412 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2412, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2413 if (renamer
18.1
'renamer' is non-null
)
2414 {
2415 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2416 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2417 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2418 updated_name = (char*)ccmallocmalloc(1024);
2419 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2420 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2421 memcpy(updated_name, src_name, src_name_len);
2422 updated_name[src_name_len] = 0;
2423 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2424 continue; // Skip this.
2425 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2426 {
2427 // Nothing changed.
2428 } else {
2429 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2430 {
2431 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
24
Calling 'kh_init_ccv_cnnp_parameter_id'
26
Returning from 'kh_init_ccv_cnnp_parameter_id'
2432 for (j = 0; j < from_parameter_size; j++)
27
Assuming 'j' is >= 'from_parameter_size'
28
Loop condition is false. Execution continues on line 2440
2433 {
2434 int ret;
2435 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
2436 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2436
, __extension__ __PRETTY_FUNCTION__); }))
;
2437 kh_val(id_map, k)((id_map)->vals[k]) = j;
2438 }
2439 }
2440 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
2441 if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
29
Assuming 'k' is not equal to field 'n_buckets'
30
Taking false branch
2442 continue;
2443 src_d = kh_val(id_map, k)((id_map)->vals[k]);
31
Array access (via field 'vals') results in a null pointer dereference
2444 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2444, __extension__ __PRETTY_FUNCTION__); }))
;
2445 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2445, __extension__
__PRETTY_FUNCTION__); }))
;
2446 }
2447 }
2448 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2448, __extension__ __PRETTY_FUNCTION__); }))
;
2449 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2449, __extension__
__PRETTY_FUNCTION__); }))
;
2450 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2451 // If the original is not init'ed. We cannot share from.
2452 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2453 continue;
2454 for (j = 0; j < parallel_count; j++)
2455 {
2456 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2457 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2457, __extension__
__PRETTY_FUNCTION__); }))
;
2458 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2459 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2460 ccv_nnc_tensor_free(dest);
2461 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2462 }
2463 // Mark this symbol as init'ed.
2464 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2465 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2466 }
2467 ccv_array_free(to_parameter_indices);
2468 ccv_array_free(from_parameter_indices);
2469 if (id_map)
2470 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2471 if (updated_name)
2472 ccfreefree(updated_name);
2473 // Mark it as incomplete so we will call init_1.
2474 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2475 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2476 else // Remove the flag.
2477 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2478}
2479
2480ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2481{
2482 if (!compiled_data->stream_map)
2483 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2484 int ret = 0;
2485 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2486 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2486, __extension__ __PRETTY_FUNCTION__); }))
;
2487 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2488 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2489 if (ret != 0)
2490 {
2491 stream = ccv_nnc_stream_context_new(type);
2492 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2493 }
2494 return stream;
2495}
2496
2497void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2498{
2499 ccv_array_t* to_parameter_indices;
2500 int to_param_ref;
2501 ccv_array_t* from_parameter_indices;
2502 int from_param_ref;
2503 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2504 // Should be exactly the same tensor.
2505 if (to_param_ref < 0 && from_param_ref < 0)
2506 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2506, __extension__ __PRETTY_FUNCTION__
); }))
; }
2507 // To models.
2508 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2509 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2509, __extension__ __PRETTY_FUNCTION__
); }))
;
2510 // From models.
2511 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2512 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2513 const int to_parameter_size = to_compiled_data->parameters->rnum;
2514 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2515 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2515, __extension__ __PRETTY_FUNCTION__
); }))
;
2516 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2516, __extension__ __PRETTY_FUNCTION__
); }))
;
2517 int i, j;
2518 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2519 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2520 for (i = 0; i < aux_in_size; i++)
2521 inputs[i + 2] = aux_ins[i];
2522 for (i = 0; i < aux_out_size; i++)
2523 outputs[i + 1] = aux_outs[i];
2524 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2525 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2526 for (i = 0; i < rnum; i++)
2527 {
2528 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2529 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2529, __extension__ __PRETTY_FUNCTION__); }))
;
2530 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2530, __extension__ __PRETTY_FUNCTION__
); }))
;
2531 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2532 // If the original is not init'ed. We cannot copy from.
2533 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2534 continue;
2535 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2536 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2536, __extension__ __PRETTY_FUNCTION__); }))
;
2537 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2537, __extension__ __PRETTY_FUNCTION__
); }))
;
2538 if (parallel_count > 1)
2539 {
2540 ccv_nnc_stream_context_t* streams[parallel_count];
2541 ccv_nnc_stream_signal_t* signal;
2542 if (stream_context)
2543 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2544 for (j = 0; j < parallel_count; j++)
2545 {
2546 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2547 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2548 if (!dest || !src)
2549 {
2550 streams[j] = 0;
2551 continue;
2552 }
2553 // At the moment, can only handle them on the same device.
2554 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2554, __extension__ __PRETTY_FUNCTION__
); }))
;
2555 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2555, __extension__ __PRETTY_FUNCTION__
); }))
;
2556 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2557 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2558 int type = stream_type;
2559 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2560 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2561 // Wait signal to finish.
2562 if (stream_context)
2563 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2564 inputs[0] = outputs[0] = dest;
2565 inputs[1] = src;
2566 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2567 if (stream_context)
2568 {
2569 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2570 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2571 }
2572 streams[j] = stream_0;
2573 }
2574 // If this should be blocking, blocking it.
2575 if (!stream_context)
2576 for (j = 0; j < parallel_count; j++)
2577 if (streams[j])
2578 ccv_nnc_stream_context_wait(streams[j]);
2579 } else {
2580 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2581 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2581, __extension__
__PRETTY_FUNCTION__); }))
;
2582 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2583 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2583, __extension__
__PRETTY_FUNCTION__); }))
;
2584 inputs[0] = outputs[0] = dest;
2585 inputs[1] = src;
2586 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2587 }
2588 // Mark this symbol as init'ed.
2589 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2590 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2591 }
2592 ccv_array_free(to_parameter_indices);
2593 ccv_array_free(from_parameter_indices);
2594}
2595
2596void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2597{
2598 int to_param_ref;
2599 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2600 // To models.
2601 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2602 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2602, __extension__ __PRETTY_FUNCTION__
); }))
;
2603 // Tensor has to be inited already.
2604 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2604, __extension__ __PRETTY_FUNCTION__
); }))
;
2605 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2605, __extension__ __PRETTY_FUNCTION__
); }))
;
2606 // From models.
2607 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2608 const int to_parameter_size = to_compiled_data->parameters->rnum;
2609 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2610 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2610, __extension__ __PRETTY_FUNCTION__
); }))
;
2611 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2611, __extension__ __PRETTY_FUNCTION__
); }))
;
2612 int i, j;
2613 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2614 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2615 for (i = 0; i < aux_in_size; i++)
2616 inputs[i + 1] = aux_ins[i];
2617 for (i = 0; i < aux_out_size; i++)
2618 outputs[i + 1] = aux_outs[i];
2619 for (i = 0; i < rnum; i++)
2620 {
2621 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2622 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2622, __extension__ __PRETTY_FUNCTION__); }))
;
2623 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2623, __extension__ __PRETTY_FUNCTION__
); }))
;
2624 if (parallel_count > 1)
2625 {
2626 ccv_nnc_stream_context_t* streams[parallel_count];
2627 ccv_nnc_stream_signal_t* signal;
2628 if (stream_context)
2629 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2630 for (j = 0; j < parallel_count; j++)
2631 {
2632 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2633 if (!dest)
2634 {
2635 streams[j] = 0;
2636 continue;
2637 }
2638 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2639 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2640 int type = stream_type;
2641 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2642 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2643 // Wait signal to finish.
2644 if (stream_context)
2645 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2646 inputs[0] = outputs[0] = dest;
2647 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2648 if (stream_context)
2649 {
2650 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2651 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2652 }
2653 streams[j] = stream_0;
2654 }
2655 // If this should be blocking, blocking it.
2656 if (!stream_context)
2657 for (j = 0; j < parallel_count; j++)
2658 if (streams[j])
2659 ccv_nnc_stream_context_wait(streams[j]);
2660 } else {
2661 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2662 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2662, __extension__
__PRETTY_FUNCTION__); }))
;
2663 inputs[0] = outputs[0] = dest;
2664 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2665 }
2666 // No need to mark this symbol as init'ed, it is already.
2667 }
2668 ccv_array_free(to_parameter_indices);
2669}
2670
2671void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2672{
2673 int to_param_ref;
2674 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2675 // To models.
2676 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2677 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2677, __extension__ __PRETTY_FUNCTION__
); }))
;
2678 // Tensor has to be inited already.
2679 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2679, __extension__ __PRETTY_FUNCTION__
); }))
;
2680 ccv_nnc_tensor_t** tensor_gradients;
2681 if (to_compiled_data->backward.count > 1)
2682 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2683 else
2684 tensor_gradients = to_compiled_data->tensors.gradients;
2685 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2685, __extension__ __PRETTY_FUNCTION__
); }))
;
2686 // From models.
2687 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2688 const int to_parameter_size = to_compiled_data->parameters->rnum;
2689 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2690 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2690, __extension__ __PRETTY_FUNCTION__
); }))
;
2691 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2691, __extension__ __PRETTY_FUNCTION__
); }))
;
2692 int i, j;
2693 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2694 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2695 for (i = 0; i < aux_in_size; i++)
2696 inputs[i + 1] = aux_ins[i];
2697 for (i = 0; i < aux_out_size; i++)
2698 outputs[i + 1] = aux_outs[i];
2699 for (i = 0; i < rnum; i++)
2700 {
2701 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2702 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2702, __extension__ __PRETTY_FUNCTION__); }))
;
2703 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2703, __extension__ __PRETTY_FUNCTION__
); }))
;
2704 if (parallel_count > 1)
2705 {
2706 ccv_nnc_stream_context_t* streams[parallel_count];
2707 ccv_nnc_stream_signal_t* signal;
2708 if (stream_context)
2709 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2710 for (j = 0; j < parallel_count; j++)
2711 {
2712 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2713 if (!dest)
2714 {
2715 streams[j] = 0;
2716 continue;
2717 }
2718 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2719 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2720 int type = stream_type;
2721 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2722 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2723 // Wait signal to finish.
2724 if (stream_context)
2725 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2726 inputs[0] = outputs[0] = dest;
2727 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2728 if (stream_context)
2729 {
2730 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2731 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2732 }
2733 streams[j] = stream_0;
2734 }
2735 // If this should be blocking, blocking it.
2736 if (!stream_context)
2737 for (j = 0; j < parallel_count; j++)
2738 if (streams[j])
2739 ccv_nnc_stream_context_wait(streams[j]);
2740 } else {
2741 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2742 if (!dest)
2743 continue;
2744 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2744, __extension__
__PRETTY_FUNCTION__); }))
;
2745 inputs[0] = outputs[0] = dest;
2746 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2747 }
2748 // No need to mark this symbol as init'ed, it is already.
2749 }
2750 ccv_array_free(to_parameter_indices);
2751}
2752
2753ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2754{
2755 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2756 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2756, __extension__ __PRETTY_FUNCTION__); }))
;
2757 return compiled_data->minimize.minimizer;
2758}
2759
2760void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2761{
2762 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2763 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2763, __extension__ __PRETTY_FUNCTION__); }))
;
2764 const int parameter_size = compiled_data->parameters->rnum;
2765 if (parameter_size == 0)
2766 return;
2767 if (reset)
2768 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2768, __extension__ __PRETTY_FUNCTION__
); }))
; }
2769 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2770 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2771 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2772 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2773 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2774 // We update all parameters, at this point, we have one minimizer.
2775 if (set_parameters == 0 || set_parameter_size == 0)
2776 compiled_data->minimize.minimizer = minimizer;
2777 int i;
2778 if (set_parameters && set_parameter_size)
2779 {
2780 // I need to save what's the minimizer along with this.
2781 if (!compiled_data->minimize.parameters)
2782 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2783 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2784 set_minimizer_for_parameter->minimizer = minimizer;
2785 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2786 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2787 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2788 }
2789 // If reset is true, clear the parameters array.
2790 if (reset && compiled_data->minimize.parameters)
2791 {
2792 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2793 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2794 ccv_array_clear(compiled_data->minimize.parameters);
2795 }
2796 if (!compiled_data->update_nodes)
2797 return;
2798 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2799 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2799, __extension__ __PRETTY_FUNCTION__); }))
;
2800 if (saved_aux_size > old_max_saved_aux_size)
2801 {
2802 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2802, __extension__ __PRETTY_FUNCTION__
); }))
;
2803 // Reallocate first, move them around later.
2804 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2805 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2806 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2807 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2808 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2809 }
2810 int flag = 0;
2811 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2812 if (set_parameters && set_parameter_size)
2813 {
2814 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2815 for (i = 0; i < set_parameter_size; i++)
2816 {
2817 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2818 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2818, __extension__ __PRETTY_FUNCTION__
); }))
;
2819 const int old_rnum = parameter_indices->rnum;
2820 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2821 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2822 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2822, __extension__ __PRETTY_FUNCTION__
); }))
;
2823 if (param_ref >= 0)
2824 {
2825 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2825, __extension__ __PRETTY_FUNCTION__
); }))
;
2826 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2827 parameter_indices->rnum = old_rnum + 1;
2828 }
2829 }
2830 // We may have duplicated indices, but that is OK, we will set it twice.
2831 for (i = 0; i < parameter_indices->rnum; i++)
2832 {
2833 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2834 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2835 flag = 1;
2836 }
2837 ccv_array_free(parameter_indices);
2838 } else {
2839 for (i = 0; i < parameter_size; i++)
2840 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2841 flag = 1;
2842 if (compiled_data->minimize.parameters)
2843 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2844 flag = 1;
2845 }
2846 if (flag)
2847 {
2848 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2849 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2850 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2851 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2852 }
2853}
2854
2855void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2856{
2857 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2858 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2858, __extension__ __PRETTY_FUNCTION__); }))
;
2859 compiled_data->compile_params = compile_params;
2860}
2861
2862void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2863{
2864 if (model->graph && out_size > 0)
2865 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2866 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2867 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2868 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2869 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2870 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2871 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2872}
2873
2874void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2875{
2876 if (model->graph)
2877 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2878}
2879
2880static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2881{
2882 int i;
2883 const int parameter_size = compiled_data->parameters->rnum;
2884 ccv_array_free(compiled_data->parameters);
2885 if (compiled_data->parameter_flags)
2886 ccfreefree(compiled_data->parameter_flags);
2887 const int internal_size = compiled_data->internals->rnum;
2888 ccv_array_free(compiled_data->internals);
2889 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2889, __extension__ __PRETTY_FUNCTION__
); }))
;
2890 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2890, __extension__ __PRETTY_FUNCTION__
); }))
;
2891 for (i = 0; i < parameter_size; i++)
2892 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2893 ccv_array_free(compiled_data->ids.parameters);
2894 for (i = 0; i < internal_size; i++)
2895 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2896 ccv_array_free(compiled_data->ids.internals);
2897 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2898 if (compiled_data->tensors.parameters)
2899 {
2900 for (i = 0; i < parameter_size * parallel_count; i++)
2901 // If it is not marked as not belonging, we can free it.
2902 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2903 if (compiled_data->tensors.parameters[i])
2904 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2905 for (i = 0; i < internal_size * parallel_count; i++)
2906 if (compiled_data->tensors.internals[i])
2907 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2908 ccfreefree(compiled_data->tensors.parameters);
2909 }
2910 if (compiled_data->tensors.gradients)
2911 {
2912 for (i = 0; i < parameter_size * parallel_count; i++)
2913 {
2914 if (compiled_data->tensors.gradients[i])
2915 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2916 if (compiled_data->tensors.accum_gradients[i])
2917 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2918 }
2919 ccfreefree(compiled_data->tensors.gradients);
2920 }
2921 if (compiled_data->minimize.parameters)
2922 {
2923 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2924 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2925 ccv_array_free(compiled_data->minimize.parameters);
2926 }
2927 if (compiled_data->rewindables)
2928 ccv_array_free(compiled_data->rewindables);
2929 if (compiled_data->tensors_init.v)
2930 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
2931 if (compiled_data->evaluate.tos)
2932 ccfreefree(compiled_data->evaluate.tos);
2933 compiled_data->evaluate.tos = 0;
2934 if (compiled_data->stream_map)
2935 {
2936 khiter_t k;
2937 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2938 {
2939 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
2940 continue;
2941 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2942 ccv_nnc_stream_context_free(stream);
2943 }
2944 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
2945 }
2946 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2947 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
2948 _ccv_cnnp_compiled_data_backward_free(compiled_data);
2949 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2950 if (compiled_data->gradient_checkpoints)
2951 {
2952 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
2953 {
2954 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
2955 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 2955, __extension__ __PRETTY_FUNCTION__
); }))
;
2956 ccfreefree(checkpoint->inputs);
2957 ccv_array_free(checkpoint->tensor_symbols);
2958 }
2959 ccv_array_free(compiled_data->gradient_checkpoints);
2960 }
2961 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
2962 ccfreefree(compiled_data);
2963}
2964
2965void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
2966{
2967 if (model->isa->deinit)
2968 model->isa->deinit(model);
2969 if (model->io)
2970 {
2971 int i;
2972 for (i = 0; i < model->io->rnum; i++)
2973 {
2974 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
2975 if (model_io->outgoings)
2976 ccv_array_free(model_io->outgoings);
2977 if (model_io->incomings)
2978 ccv_array_free(model_io->incomings);
2979 if (model_io->dependencies)
2980 ccv_array_free(model_io->dependencies);
2981 ccfreefree(model_io);
2982 }
2983 ccv_array_free(model->io);
2984 }
2985 if (model->parameter_indices)
2986 ccv_array_free(model->parameter_indices);
2987 if (model->inputs)
2988 ccfreefree(model->inputs);
2989 if (model->graph)
2990 ccv_nnc_symbolic_graph_free(model->graph);
2991 if (model->compiled_data)
2992 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
2993 if (model->name)
2994 ccfreefree(model->name);
2995 ccfreefree(model);
2996}