Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 2443, column 11
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/18 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -D HAVE_CUDA_SM80 -I /usr/local/include -internal-isystem /usr/local/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-07-02-165253-122966-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6
7// MARK - Level-5 API
8
9ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
10{
11 if (!model->io)
12 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
13 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
14 model_io->param_ref = 0;
15 model_io->param_sel = 0;
16 model_io->visit = 0;
17 model_io->model = model;
18 model_io->dependencies = 0;
19 model_io->dependents = 0;
20 model_io->outgoings = 0;
21 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
22 ccv_array_push(model->io, &model_io);
23 if (input_size > 0)
24 {
25 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0);
26 ccv_array_resize(model_io->incomings, input_size);
27 int i;
28 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
29 for (i = 0; i < input_size; i++)
30 {
31 if (!inputs[i]->outgoings)
32 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
33 ccv_array_push(inputs[i]->outgoings, &model_io);
34 }
35 } else {
36 model_io->incomings = 0;
37 }
38 return model_io;
39}
40
41void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size)
42{
43 assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__
({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0"
, "ccv_cnnp_model.c", 43, __extension__ __PRETTY_FUNCTION__);
}))
;
44 if (!model_io->dependencies)
45 model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0);
46 int i, j;
47 for (i = 0; i < dependency_size; i++)
48 {
49 int flag = 0;
50 // Check if it is already exist or not.
51 for (j = 0; !flag && j < model_io->dependencies->rnum; j++)
52 if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t
)(model_io->dependencies)->rsize * (size_t)(j)))
== dependencies[i])
53 flag = 1;
54 if (flag)
55 continue;
56 ccv_array_push(model_io->dependencies, dependencies + i);
57 ++dependencies[i]->dependents;
58 }
59}
60
61int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
62{
63 return model->output_size;
64}
65
66int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model)
67{
68 // If the model is compiled, it is default to 1 unless it is not.
69 if (model->compiled_data)
70 return model->is_trainable >= 0 ? model->is_trainable : 1;
71 return model->is_trainable;
72}
73
74ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
75{
76 if (!model->io)
77 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
78 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
79 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
80 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
81 model_io->visit = 0;
82 model_io->model = model;
83 model_io->outputs = 0;
84 model_io->dependencies = 0;
85 model_io->dependents = 0;
86 model_io->incomings = 0;
87 model_io->outgoings = 0;
88 ccv_array_push(model->io, &model_io);
89 return model_io;
90}
91
92void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
93{
94 model->notify_hook.func = func;
95 model->notify_hook.context = context;
96}
97
98void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
99{
100 if (model->notify_hook.func)
101 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
102 if (model->isa->notify)
103 model->isa->notify(model, tag, payload);
104}
105
106static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
107{
108 int i, j;
109 for (i = 0; i < graph_exec_symbol_size; i++)
110 {
111 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
112 // Check whether this tensor symbol has any duplicate.
113 for (j = i + 1; j < graph_exec_symbol_size;)
114 {
115 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
116 // If there is a same tensor symbol, remove it.
117 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
118 {
119 if (j + 1 < graph_exec_symbol_size)
120 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
121 --graph_exec_symbol_size;
122 continue;
123 }
124 ++j;
125 }
126 }
127 return graph_exec_symbol_size;
128}
129
130void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable)
131{
132 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
133 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
134 int i;
135 if (!model->parameter_indices)
136 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
137 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
138 {
139 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
140 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
141 {
142 // Only add to parameter_indices if it is trainable.
143 if (add_to_array_context->prefix == 't')
144 ccv_array_add_unique_int(model->parameter_indices, i);
145 // Found it, return, don't add it.
146 return;
147 }
148 }
149 // Only add to parameter_indices if it is trainable.
150 if (add_to_array_context->prefix == 't')
151 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
152 // This is a new one, no need to add_unique_int, it is unique.
153 ccv_array_push(add_to_array_context->symbols, &symbol);
154 if (add_to_array_context->trainables)
155 ccv_array_push(add_to_array_context->trainables, &is_trainable);
156 char id[2048];
157 id[0] = add_to_array_context->prefix;
158 id[1] = '-';
159 int total_len = 2;
160 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
161 {
162 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
163 int len;
164 if (name->name && name->name[0] != '\0')
165 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
166 else
167 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
168 total_len += len;
169 if (total_len >= 2047)
170 break;
171 }
172 if (total_len < 2047)
173 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
174 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 174, __extension__ __PRETTY_FUNCTION__)
; }))
;
175 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
176 memcpy(heap_id, id, total_len + 1);
177 ccv_array_push(add_to_array_context->ids, &heap_id);
178 ++add_to_array_context->sequence->it;
179}
180
181static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints)
182{
183 compiled_data->f = compiled_data->fits + output_size;
184 compiled_data->xpu_alloc.mp_hdr = -1;
185 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
186 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
187 compiled_data->gradient_checkpoints = gradient_checkpoints;
188}
189
190static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
191{
192 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 192, __extension__ __PRETTY_FUNCTION__); }))
;
193 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
194 int i;
195 for (i = 0; i < input_size; i++)
196 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
197 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
198 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
199 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
200 ccv_cnnp_model_sequence_t model_sequence = {
201 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
202 };
203 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
204 .sequence = &model_sequence,
205 .prefix = 't',
206 .symbols = parameters,
207 .ids = parameter_ids,
208 .trainables = parameter_trainables,
209 };
210 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
211 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
212 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
213 .sequence = &model_sequence,
214 .prefix = 'r',
215 .symbols = internals,
216 .ids = internal_ids,
217 .trainables = 0,
218 };
219 ccv_cnnp_model_build_data_t build_data = {
220 .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1,
221 .model_sequence = &model_sequence,
222 .add_to_array = ccv_cnnp_model_add_to_array,
223 .parameters = parameters,
224 .context = {
225 .add_to_parameter = &add_to_parameter_context,
226 .add_to_output = &add_to_output_context,
227 },
228 .gradient_checkpoints = 0,
229 };
230 model->data = &build_data;
231 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
232 for (i = 0; i < model->output_size; i++)
233 {
234 const ccv_nnc_tensor_symbol_t output = model->outputs[i];
235 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output);
236 if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL)
237 continue;
238 // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method
239 // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be
240 // honest, because we cannot handle cases of alias is part of the original tensor but bind differently).
241 const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output);
242 model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0);
243 ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, &output, 1, model->outputs + i, 1, "contiguous");
244 ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
245 }
246 model->data = 0;
247 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
248 if (model_sequence.sequences)
249 ccv_array_free(model_sequence.sequences);
250 // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that.
251 int not_trainables = 0;
252 // Assert no parameter is alias.
253 for (i = 0; i < parameters->rnum; i++)
254 {
255 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
256 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
257 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 257, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
258 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
== 0)
259 not_trainables = 1;
260 }
261 assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables->
rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables
->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum"
, "ccv_cnnp_model.c", 261, __extension__ __PRETTY_FUNCTION__)
; }))
;
262 uint64_t* parameter_flags = 0;
263 if (not_trainables)
264 {
265 parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t));
266 for (i = 0; i < parameter_trainables->rnum; i++)
267 if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t
)(parameter_trainables)->rsize * (size_t)(i)))
)
268 parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63));
269 }
270 ccv_array_free(parameter_trainables);
271 // Assert no internal is alias.
272 for (i = 0; i < internals->rnum; i++)
273 {
274 const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
275 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal);
276 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 276, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
277 }
278 const int output_size = model->output_size;
279 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
280 const int parameters_rnum = parameters->rnum;
281 if (input_size > 0)
282 {
283 ccv_array_resize(parameters, parameters_rnum + input_size);
284 memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(parameters_rnum)))
, model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t));
285 }
286 ccv_nnc_symbolic_graph_simplify(model->graph,
287 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
288 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
289 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
290 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
291 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters_rnum + input_size,
292 model->outputs, output_size,
293 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
294 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
295 // Size it down.
296 parameters->rnum = parameters_rnum;
297 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
298 _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints);
299 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
300 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 300, __extension__ __PRETTY_FUNCTION__)
; }))
;
301 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
302 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
303 compiled_data->loss = loss;
304 if (loss.cmd == CCV_NNC_NOOP)
305 {
306 // If no loss function provided, there is no fits.
307 for (i = 0; i < output_size; i++)
308 {
309 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
310 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
311 if (alias_to.d < 0)
312 compiled_data->f[i] = model->outputs[i];
313 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
314 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
315 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
316 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
317 int j;
318 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
319 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 319, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
320 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
321 }
322 }
323 } else {
324 for (i = 0; i < output_size; i++)
325 {
326 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
327 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
328 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
329 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
330 }
331 }
332 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
333 ccv_nnc_symbolic_graph_simplify(model->graph,
334 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
335 0, 0, // No need to provide binds at this point.
336 compiled_data->f, model->output_size,
337 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
338 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
339 // If inputs are from GPU, stream type is GPU.
340 compiled_data->parameters = parameters;
341 compiled_data->parameter_flags = parameter_flags;
342 compiled_data->internals = internals;
343 compiled_data->ids.parameters = parameter_ids;
344 compiled_data->ids.internals = internal_ids;
345}
346
347static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
348{
349 ccv_array_t* const stack = (ccv_array_t*)context;
350 ccv_array_push(stack, &symbol.d);
351}
352
353static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
354{
355 const ccv_nnc_tensor_symbol_t src_symbol = {
356 .d = src_index,
357 .graph = src_graph
358 };
359 const ccv_nnc_tensor_symbol_t dest_symbol = {
360 .d = dest_index,
361 .graph = dest_graph
362 };
363 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
364 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
365 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
366 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
367 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
368 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
369}
370
371static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
372{
373 const ccv_nnc_tensor_symbol_t src_symbol = {
374 .d = src_index,
375 .graph = src_graph
376 };
377 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
378 const ccv_nnc_tensor_symbol_t dest_symbol = {
379 .d = dest_index,
380 .graph = dest_graph
381 };
382 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
383 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
384}
385
386static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
387static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
388
389typedef struct {
390 int parallel_count;
391 ccv_nnc_symbolic_graph_t* graph;
392 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
393} ccv_nnc_graph_exec_update_t;
394
395static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
396{
397 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
398 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
399 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
400 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
401 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
402 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
403 const int parallel_count = graph_exec_update->parallel_count;
404 int i;
405 for (i = 1; i < parallel_count; i++)
406 {
407 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
408 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
409 {
410 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
411 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
412 }
413 }
414}
415
416void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
417{
418 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 418, __extension__ __PRETTY_FUNCTION__); }))
;
419 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 419, __extension__ __PRETTY_FUNCTION__)
; }))
;
420 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 420, __extension__ __PRETTY_FUNCTION__); }))
;
421 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
422 init->graph = ccv_nnc_symbolic_graph_new();
423 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
424 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0);
425 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
426 init->parallel_count = model->parallel_count;
427 init->memory_compression = model->memory_compression;
428 init->memory_reduction = model->memory_reduction;
429 init->gradient_checkpointing = model->gradient_checkpointing;
430 init->compiled_data->stream_type = model->compiled_data->stream_type;
431 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
432 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
433 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
434 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
435 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0);
436 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
437 int i, j;
438 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
439 for (i = 0; i < compiled_data->parameters->rnum; i++)
440 {
441 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
442 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 442, __extension__ __PRETTY_FUNCTION__)
; }))
;
443 }
444 for (i = 0; i < compiled_data->internals->rnum; i++)
445 {
446 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
447 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 447, __extension__ __PRETTY_FUNCTION__)
; }))
;
448 }
449 // Update inputs.
450 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 450, __extension__ __PRETTY_FUNCTION__)
; }))
;
451 for (i = 0; i < model->input_size; i++)
452 if (model->inputs[i].d >= 0)
453 {
454 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 454, __extension__ __PRETTY_FUNCTION__)
; }))
;
455 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
456 }
457 // Update outputs.
458 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 458, __extension__ __PRETTY_FUNCTION__)
; }))
;
459 for (i = 0; i < model->output_size; i++)
460 {
461 if (model->outputs[i].d >= 0)
462 {
463 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 463, __extension__
__PRETTY_FUNCTION__); }))
;
464 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
465 }
466 if (model->outputs[i].d != model->compiled_data->f[i].d)
467 {
468 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 468, __extension__ __PRETTY_FUNCTION__)
; }))
;
469 if (model->compiled_data->f[i].d >= 0)
470 {
471 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 471, __extension__ __PRETTY_FUNCTION__)
; }))
;
472 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
473 }
474 }
475 }
476 // Go through the graph to set tensor on matching symbols
477 for (i = 0; i < stack->rnum; i++)
478 {
479 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
480 // If exceed range, skip.
481 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
482 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
483 continue;
484 const ccv_nnc_graph_exec_symbol_t src_symbol = {
485 .d = d,
486 .graph = init->graph
487 };
488 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
489 .d = d,
490 .graph = model->graph
491 };
492 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
493 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
494 // If the name doesn't match, skip.
495 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
496 continue;
497 // Now get all the inputs and outputs, if matches, set them.
498 const int* src_inputs;
499 int src_input_size;
500 const int* src_outputs;
501 int src_output_size;
502 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
503 const int* dest_inputs;
504 int dest_input_size;
505 const int* dest_outputs;
506 int dest_output_size;
507 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
508 // We may have unmatched input / output size because this is the minimizer and it has
509 // different saved_aux (for example, when we shrunk with CMD_NOOP).
510 if (src_input_size != dest_input_size)
511 continue;
512 if (src_output_size != dest_output_size)
513 continue;
514 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
515 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
516 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
517 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
518 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
519 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
520 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
521 // a new exec symbol.
522 for (j = 0; j < src_input_size; j++)
523 if (src_inputs[j] >= 0)
524 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
525 for (j = 0; j < src_output_size; j++)
526 if (src_outputs[j] >= 0)
527 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
528 }
529 ccv_array_free(stack);
530 // After this, we get all tensors in the model graph resolved through tensor_auto.
531 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
532 // Verify symbols we get matches.
533 const int parameter_size = compiled_data->parameters->rnum;
534 for (i = 0; i < parameter_size; i++)
535 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 535, __extension__ __PRETTY_FUNCTION__)
; }))
; }
536 const int internal_size = compiled_data->internals->rnum;
537 for (i = 0; i < internal_size; i++)
538 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 538, __extension__ __PRETTY_FUNCTION__)
; }))
; }
539 // Go through compiled data.
540 if (compiled_data->tensor_arena)
541 {
542 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
543 if (flag == 0 && compiled_data->graph_exec_arena)
544 {
545 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
546 // Since we will reinit, if we previously set is_test, we need to set it again.
547 if (compiled_data->is_test)
548 {
549 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
550 ccv_nnc_graph_exec_update_t update = {
551 .parallel_count = parallel_count,
552 .graph = model->graph,
553 .graph_exec_arena = compiled_data->graph_exec_arena,
554 };
555 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
556 }
557 } else
558 // Free-up tensor arena & graph exec arena.
559 _ccv_cnnp_compiled_data_graph_free(compiled_data);
560 }
561 // There are other compiled graphs, for accum and apply gradients.
562 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
563 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
564 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
565 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
566 // That is why we don't update these compiled graphs at all this point.
567 // Free the model, we've already "absorbed" it.
568 ccv_cnnp_model_free(init);
569}
570
571void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
572{
573 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 573, __extension__ __PRETTY_FUNCTION__)
; }))
;
574 if (model->input_size == 0)
575 model->input_size = input_size;
576 if (!model->graph) // The graph is not compiled yet.
577 {
578 model->graph = ccv_nnc_symbolic_graph_new();
579 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
580 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 580, __extension__ __PRETTY_FUNCTION__)
; }))
;
581 int i, flag = 0;
582 for (i = 0; !flag && i < input_size; i++)
583 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
584 // If inputs are from GPU, stream type is GPU.
585 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
586 model->compiled_data->minimize.minimizer = minimizer;
587 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
588 } else {
589 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
590 // And then absorb the "new model" to the old one.
591 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable);
592 ccv_cnnp_model_absorb(model, init, inputs, input_size);
593 // Reset minimizer.
594 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
595 }
596}
597
598ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable)
599{
600 ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0);
601 new_model->is_trainable = is_trainable;
602 return new_model;
603}
604
605void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
606{
607 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 607, __extension__ __PRETTY_FUNCTION__); }))
;
608 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 608, __extension__ __PRETTY_FUNCTION__)
; }))
;
609 ccv_nnc_symbolic_graph_t* const graph = model->graph;
610 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
611 int i;
612 for (i = 0; i < output_size; i++)
613 {
614 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 614, __extension__ __PRETTY_FUNCTION__)
; }))
;
615 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
616 }
617}
618
619void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
620{
621 if (workspace_size == model->workspace_size)
622 return;
623 model->workspace_size = workspace_size;
624 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
625 if (compiled_data && compiled_data->graph)
626 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
627}
628
629size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model)
630{
631 return model->workspace_size;
632}
633
634void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
635{
636 if (parallel == 0)
637 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
638 else
639 model->parallel_count = parallel;
640 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
641 if (compiled_data)
642 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 642, __extension__ __PRETTY_FUNCTION__)
; }))
; }
643}
644
645void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count)
646{
647 model->max_stream_count = max_stream_count;
648 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
649 if (compiled_data)
650 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 650, __extension__ __PRETTY_FUNCTION__)
; }))
; }
651}
652
653void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
654{
655 model->memory_compression = memory_compression;
656 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
657 if (compiled_data)
658 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 658, __extension__ __PRETTY_FUNCTION__)
; }))
; }
659}
660
661void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction)
662{
663 model->memory_reduction = memory_reduction;
664 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
665 if (compiled_data)
666 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 666, __extension__ __PRETTY_FUNCTION__)
; }))
; }
667}
668
669void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing)
670{
671 model->gradient_checkpointing = gradient_checkpointing;
672}
673
674int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model)
675{
676 return model->gradient_checkpointing;
677}
678
679typedef struct {
680 int parallel_count;
681 ccv_nnc_symbolic_graph_t* graph;
682 ccv_cnnp_compiled_data_t* compiled_data;
683 ccv_nnc_tensor_arena_t* tensor_arena;
684} ccv_nnc_tensor_init_states_t;
685
686static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
687{
688 int i;
689 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
690 for (i = 0; i < compiled_data->parameters->rnum; i++)
691 {
692 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
693 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
694 return 1;
695 }
696 for (i = 0; i < compiled_data->internals->rnum; i++)
697 {
698 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
699 if (!(init_v[d >> 5] & (1u << (d & 0x1f))))
700 return 1;
701 }
702 return 0;
703}
704
705static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
706{
707 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
708 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
709 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
710 if (!output_tensor)
711 return;
712 const int d = output_symbol.d;
713 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 713, __extension__ __PRETTY_FUNCTION__)
; }))
;
714 uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data
->tensors_init.v) & ~(uintptr_t)1))
;
715 if (init_v[d >> 5] & (1u << (d & 0x1f)))
716 return;
717 init_v[d >> 5] |= (1u << (d & 0x1f));
718 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
719 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
720 const int parallel_count = tensor_init_states->parallel_count;
721 int i;
722 for (i = 1; i < parallel_count; i++)
723 {
724 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
725 if (copy)
726 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
727 }
728}
729
730// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
731// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
732static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
733{
734 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 734, __extension__ __PRETTY_FUNCTION__); }))
;
735 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 735, __extension__ __PRETTY_FUNCTION__)
; }))
;
736 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
737 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 737, __extension__
__PRETTY_FUNCTION__); }))
;
738 int i;
739 for (i = 0; i < compiled_data->rewindables->rnum; i++)
740 {
741 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
742 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
743 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
744 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
745 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
746 }
747 ccv_array_clear(compiled_data->rewindables);
748 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
749}
750
751static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
752{
753 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
754 .type = CCV_CNNP_REWIND_TENSOR,
755 .tensor = symbol
756 };
757 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
758 ccv_array_push(rewind_symbols, &rewind_symbol);
759}
760
761static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
762{
763 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
764 .type = CCV_CNNP_REWIND_TENSOR,
765 .tensor = symbol
766 };
767 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
768 ccv_array_push(rewind_symbols, &rewind_symbol);
769}
770
771static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
772{
773 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
774 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
775 .graph_exec = symbol
776 };
777 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
778 ccv_array_push(rewind_symbols, &rewind_symbol);
779}
780
781static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
782{
783 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
784 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
785 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
786 int i;
787 for (i = 1; i < parallel_count; i++)
788 {
789 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
790 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
791 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
792 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
793 }
794}
795
796static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
797{
798 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 798, __extension__ __PRETTY_FUNCTION__); }))
;
799 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 799, __extension__ __PRETTY_FUNCTION__); }))
;
800 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
801 int i;
802 for (i = 1; i < parallel_count; i++)
803 {
804 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
805 if (copy_symbol.graph)
806 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
807 }
808 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
809 if (graph_exec_arena)
810 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
811 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
812 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
813 if (gradient_graph_exec_arena)
814 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
815}
816
817static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
818{
819 int this_parameter_flag = 0;
820 if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL)
821 return this_parameter_flag;
822 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
823 int j, k;
824 // For no-op, we can preserve previous saved_aux_size.
825 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
826 {
827 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
828 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
829 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
830 // make sure some model parameters don't update if we don't want them to.
831 int old_saved_aux_size;
832 if (old_minimizer.cmd == CCV_NNC_NOOP)
833 {
834 int input_size;
835 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
836 if (input_size < 2) // This is not legit.
837 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
838 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
839 old_saved_aux_size = input_size - 2;
840 } else
841 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
842 if (old_saved_aux_size != saved_aux_size)
843 {
844 this_parameter_flag = 1;
845 if (saved_aux_size > old_saved_aux_size)
846 {
847 // Allocate new tensor symbols.
848 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
849 for (j = old_saved_aux_size; j < saved_aux_size; j++)
850 {
851 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
852 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
853 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
854 for (k = 1; k < parallel_count; k++)
855 {
856 ccv_nnc_tensor_param_t dev_info = info;
857 if (k != device_id)
858 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
859 else
860 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) &
0xfff) << 8))
;
861 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
862 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
863 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
864 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
865 }
866 }
867 } else {
868 for (j = saved_aux_size; j < old_saved_aux_size; j++)
869 {
870 for (k = 1; k < parallel_count; k++)
871 {
872 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
873 if (src_copy.d >= 0)
874 {
875 ccv_nnc_tensor_symbol_free(graph, src_copy);
876 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
877 }
878 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
879 if (dest_copy.d >= 0)
880 {
881 ccv_nnc_tensor_symbol_free(graph, dest_copy);
882 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
883 }
884 }
885 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
886 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
887 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
888 }
889 }
890 }
891 }
892 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
893 if (this_parameter_flag)
894 {
895 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
896 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
897 const int* inputs = 0;
898 int input_size = 0;
899 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
900 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 900, __extension__ __PRETTY_FUNCTION__)
; }))
;
901 update_inputs[0].d = inputs[0];
902 update_inputs[0].graph = graph;
903 update_inputs[1].d = inputs[1];
904 update_inputs[1].graph = graph;
905 update_outputs[0] = updated_parameters[parameter_indice];
906 for (j = 0; j < saved_aux_size; j++)
907 {
908 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
909 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
910 }
911 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
912 for (k = 1; k < parallel_count; k++)
913 {
914 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
915 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 915, __extension__ __PRETTY_FUNCTION__); }))
;
916 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
917 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 917, __extension__ __PRETTY_FUNCTION__)
; }))
;
918 update_inputs[0].d = inputs[0];
919 update_inputs[0].graph = graph;
920 update_inputs[1].d = inputs[1];
921 update_inputs[1].graph = graph;
922 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
923 for (j = 0; j < saved_aux_size; j++)
924 {
925 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
926 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
927 }
928 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
929 }
930 }
931 return this_parameter_flag;
932}
933
934typedef struct {
935 int parameter_size;
936 ccv_nnc_cmd_t minimizer;
937 ccv_cnnp_model_io_t parameters[1];
938} ccv_cnnp_set_minimizer_for_parameter_t;
939
940static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
941{
942 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
943 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 943, __extension__ __PRETTY_FUNCTION__); }))
;
944 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
945 // We update all parameters, at this point, we have one minimizer.
946 const int parameter_size = compiled_data->parameters->rnum;
947 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
948 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
949 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 949, __extension__ __PRETTY_FUNCTION__); }))
;
950 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
951 ccv_array_t* const parameters = compiled_data->minimize.parameters;
952 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
953 int i, j, flag = 0;
954 for (i = 0; i < parameters->rnum; i++)
955 {
956 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
957 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
958 {
959 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
960 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 960, __extension__ __PRETTY_FUNCTION__)
; }))
;
961 const int old_rnum = parameter_indices->rnum;
962 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
963 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
964 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 964, __extension__ __PRETTY_FUNCTION__)
; }))
;
965 if (param_ref >= 0)
966 {
967 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 967, __extension__ __PRETTY_FUNCTION__)
; }))
;
968 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
969 parameter_indices->rnum = old_rnum + 1;
970 }
971 }
972 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
973 // We may have duplicated indices, but that is OK, we will set it twice.
974 for (j = 0; j < parameter_indices->rnum; j++)
975 {
976 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
977 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 977, __extension__ __PRETTY_FUNCTION__)
; }))
;
978 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
979 flag = 1;
980 }
981 ccv_array_clear(parameter_indices);
982 }
983 ccv_array_free(parameter_indices);
984 return flag;
985}
986
987static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
988{
989 if (new_saved_aux_size == old_saved_aux_size)
990 return;
991 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 991, __extension__ __PRETTY_FUNCTION__)
; }))
;
992 int i, j;
993 for (i = parameter_size - 1; i >= 0; i--)
994 {
995 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
996 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
997 for (j = old_saved_aux_size - 1; j >= 0; j--)
998 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
999 }
1000}
1001
1002static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
1003{
1004 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1005 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1005, __extension__ __PRETTY_FUNCTION__); }))
;
1006 if (!compiled_data->rewindables)
1007 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
1008 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0);
1009 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0);
1010 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0);
1011}
1012
1013static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
1014{
1015 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1016 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1016, __extension__ __PRETTY_FUNCTION__
); }))
;
1017 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 1017, __extension__ __PRETTY_FUNCTION__
); }))
;
1018 const int evaluate_to_size = compiled_data->evaluate.to_size;
1019 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 1019, __extension__ __PRETTY_FUNCTION__
); }))
;
1020 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1021 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1022 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
1023 int i, j;
1024 const int output_size = model->output_size;
1025 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 1025, __extension__ __PRETTY_FUNCTION__
); }))
;
1026 if (fits)
1027 for (i = 0; i < output_size; i++)
1028 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
1029 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1030 const int parameter_size = compiled_data->parameters->rnum;
1031 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
1032 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
1033 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
1034 int parameter_size_maybe_more = parameter_size;
1035 compiled_data->disable_outgrad = disable_outgrad;
1036 int outgrad_size;
1037 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1038 outgrad_size = 0;
1039 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1040 outgrad_size = model->input_size;
1041 else {
1042 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1042, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1043 outgrad_size = 0;
1044 for (i = 0; i < model->input_size; i++)
1045 if (!(disable_outgrad & ((uint64_t)1 << i)))
1046 ++outgrad_size;
1047 }
1048 compiled_data->outgrad_size = outgrad_size;
1049 parameter_size_maybe_more += outgrad_size;
1050 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
1051 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
1052 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
1053 compiled_data->backward.to_size = parameter_size_maybe_more;
1054 ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
;
1055 if (compiled_data->parameter_flags)
1056 {
1057 parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size);
1058 for (i = 0; i < parameter_size; i++)
1059 if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))
1060 parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1061 else
1062 parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1063 }
1064 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
1065 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1066 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
1067 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1068 else { // Compute minimize with gradients including selected inputs.
1069 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 1069, __extension__ __PRETTY_FUNCTION__
); }))
;
1070 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 1070, __extension__ __PRETTY_FUNCTION__
); }))
; // If it is disable all, gradient mode won't be this.
1071 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 1071, __extension__ __PRETTY_FUNCTION__
); }))
;
1072 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
1073 j = 0;
1074 for (i = 0; i < model->input_size; i++)
1075 if (!(disable_outgrad & ((uint64_t)1 << i)))
1076 outgrads[j++] = model->inputs[i];
1077 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
1078 }
1079 if (compiled_data->parameter_flags)
1080 ccfreefree(parameters);
1081 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
1082 if (compiled_data->minimize.parameters)
1083 _ccv_cnnp_apply_parameters_with_minimizer(model);
1084 // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass.
1085 ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph);
1086 for (i = 0; i < output_size; i++)
1087 {
1088 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1089 // Init this to 1 so we can backprop.
1090 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
1091 }
1092 compiled_data->backward.to_size = 0;
1093 for (i = 0; i < parameter_size_maybe_more; i++)
1094 if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL)
1095 compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
1096 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
1097 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
1098 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
1099 {
1100 if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads.
1101 continue;
1102 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
1103 const int* tos;
1104 int to_size;
1105 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
1106 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
1107 {
1108 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
1109 const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph);
1110 int flag = 0;
1111 const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = (
destination_count - i); (_a > _b) ? _a : _b; })
;
1112 for (j = i - 1; !flag && j >= 0; j--)
1113 if (j + outgrad_destination_start < destination_count)
1114 flag = (destinations[j + outgrad_destination_start].d == outgrad.d);
1115 if (!flag) // Only if we cannot find it, we add it.
1116 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
1117 }
1118 }
1119 if (parallel_count > 1)
1120 {
1121 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1122 0, 0,
1123 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
1124 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
1125 0, 0, 0,
1126 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1127 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1128 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1129 for (i = 0; i < evaluate_to_size; i++)
1130 for (j = 1; j < parallel_count; j++)
1131 {
1132 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1133 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1134 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1135 }
1136 const int backward_to_size = compiled_data->backward.to_size;
1137 for (i = 0; i < backward_to_size; i++)
1138 for (j = 1; j < parallel_count; j++)
1139 {
1140 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1141 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1142 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1143 }
1144 }
1145 // Only use memory compression if we are in gradient parameter mode.
1146 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)
1147 {
1148 if (model->memory_compression)
1149 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1150 if (model->memory_reduction)
1151 ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1152 }
1153 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1154 compiled_data->gradient_mode = gradient_mode;
1155}
1156
1157void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1158{
1159 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1159, __extension__ __PRETTY_FUNCTION__
); }))
;
1160 const int parameter_size = compiled_data->parameters->rnum;
1161 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1162 const int internal_size = compiled_data->internals->rnum;
1163 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1164 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1165 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
1166 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1167}
1168
1169int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1170{
1171 int i, j;
1172 const int parameter_size = compiled_data->parameters->rnum;
1173 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1174 const int internal_size = compiled_data->internals->rnum;
1175 for (i = 0; i < parameter_size; i++)
1176 {
1177 // parameters has to be allocated all together.
1178 if (compiled_data->tensors.parameters[i])
1179 {
1180 for (j = 1; j < parallel_count; j++)
1181 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1181, __extension__ __PRETTY_FUNCTION__
); }))
; }
1182 continue;
1183 }
1184 return 1;
1185 }
1186 for (i = 0; i < internal_size; i++)
1187 {
1188 if (!compiled_data->tensors.internals[i])
1189 return 1;
1190 for (j = 1; j < parallel_count; j++)
1191 if (!compiled_data->tensors.internals[i + j * internal_size])
1192 return 1;
1193 }
1194 return 0;
1195}
1196
1197void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1198{
1199 int i, j;
1200 const int parameter_size = compiled_data->parameters->rnum;
1201 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1202 const int internal_size = compiled_data->internals->rnum;
1203 for (i = 0; i < parameter_size; i++)
1204 {
1205 // parameters has to be allocated all together.
1206 if (compiled_data->tensors.parameters[i])
1207 {
1208 for (j = 1; j < parallel_count; j++)
1209 { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j *
parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data
->tensors.parameters[i + j * parameter_size]) ; else __assert_fail
("compiled_data->tensors.parameters[i + j * parameter_size]"
, "ccv_cnnp_model.c", 1209, __extension__ __PRETTY_FUNCTION__
); }))
; }
1210 continue;
1211 }
1212 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1213 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1214 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1215 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1216 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1217 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1218 for (j = 1; j < parallel_count; j++)
1219 {
1220 if (j != device_id)
1221 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1222 else
1223 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1224 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1225 }
1226 }
1227 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1228 for (i = 0; i < internal_size; i++)
1229 {
1230 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1231 const int d = retained.d;
1232 if (init_v[d >> 5] & (1u << (d & 0x1f)))
1233 continue;
1234 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1235 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1236 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1237 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1238 if (!compiled_data->tensors.internals[i])
1239 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1240 for (j = 1; j < parallel_count; j++)
1241 {
1242 if (j != device_id)
1243 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1244 else
1245 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1246 if (!compiled_data->tensors.internals[i + j * internal_size])
1247 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1248 }
1249 }
1250 compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
; // Remove 1 if any.
1251}
1252
1253static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1254{
1255 ccv_cnnp_model_tensors_init_0(model, compiled_data);
1256 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1257}
1258
1259static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1260{
1261 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1261, __extension__ __PRETTY_FUNCTION__
); }))
;
1262 int i, j;
1263 for (i = 0; i < tensor_size; i++)
1264 {
1265 if (!tensors[i])
1266 continue;
1267 const int d = tensor_symbols[i].d;
1268 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1269 continue;
1270 for (j = 1; j < parallel_count; j++)
1271 if (tensors[i + j * tensor_size])
1272 {
1273 ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1274 ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size]
) & ~(uintptr_t)1))
;
1275 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0);
1276 }
1277 }
1278}
1279
1280static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1281{
1282 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1282, __extension__ __PRETTY_FUNCTION__
); }))
;
1283 int i, j;
1284 for (i = 0; i < tensor_size; i++)
1285 {
1286 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1287 for (j = 1; j < parallel_count; j++)
1288 {
1289 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1290 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1291 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1292 { // We shouldn't allocate this, free it up.
1293 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1294 tensors[i + j * tensor_size] = 0;
1295 }
1296 }
1297 }
1298}
1299
1300static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1301{
1302 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1302, __extension__ __PRETTY_FUNCTION__
); }))
;
1303 int i, j;
1304 for (i = 0; i < tensor_size; i++)
1305 {
1306 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1307 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1308 continue;
1309 if (graph)
1310 {
1311 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1312 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1313 tensor_symbol = alias_to;
1314 }
1315 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t
)1))
;
1316 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1317 {
1318 const ccv_nnc_tensor_bind_t retained_bind = {
1319 .symbol = tensor_symbol,
1320 .tensor = tensor
1321 };
1322 ccv_array_push(tensor_binds, &retained_bind);
1323 }
1324 for (j = 1; j < parallel_count; j++)
1325 {
1326 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1327 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1328 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1329 {
1330 const ccv_nnc_tensor_bind_t bind = {
1331 .symbol = copy,
1332 .tensor = tensors[i + j * tensor_size]
1333 };
1334 ccv_array_push(tensor_binds, &bind);
1335 }
1336 }
1337 }
1338}
1339
1340static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1341{
1342 if (compiled_data->graph)
1343 ccv_nnc_graph_free(compiled_data->graph);
1344 compiled_data->graph = 0;
1345 compiled_data->is_test = 0;
1346 if (compiled_data->tensor_arena)
1347 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1348 compiled_data->tensor_arena = 0;
1349 if (compiled_data->graph_exec_arena)
1350 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1351 compiled_data->graph_exec_arena = 0;
1352 if (compiled_data->backward.from_ops)
1353 ccfreefree(compiled_data->backward.from_ops);
1354 compiled_data->backward.from_ops = 0;
1355 if (compiled_data->evaluate.schedule)
1356 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1357 compiled_data->evaluate.schedule = 0;
1358 if (compiled_data->backward.schedule)
1359 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1360 compiled_data->backward.schedule = 0;
1361}
1362
1363static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1364{
1365 if (compiled_data->gradients)
1366 ccfreefree(compiled_data->gradients);
1367 compiled_data->gradients = 0;
1368 if (compiled_data->updated_parameters)
1369 ccfreefree(compiled_data->updated_parameters);
1370 compiled_data->updated_parameters = 0;
1371 compiled_data->update_nodes = 0;
1372 compiled_data->saved_aux = 0;
1373}
1374
1375static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1376{
1377 if (compiled_data->backward.gradients)
1378 ccfreefree(compiled_data->backward.gradients);
1379 compiled_data->backward.gradients = 0;
1380 if (compiled_data->backward.accum)
1381 ccv_nnc_graph_free(compiled_data->backward.accum);
1382 compiled_data->backward.accum = 0;
1383 if (compiled_data->backward.tensor_arena)
1384 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1385 compiled_data->backward.tensor_arena = 0;
1386 if (compiled_data->backward.graph_exec_arena)
1387 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1388 compiled_data->backward.graph_exec_arena = 0;
1389}
1390
1391static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1392{
1393 if (compiled_data->apply_gradients.graph)
1394 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1395 compiled_data->apply_gradients.graph = 0;
1396 if (compiled_data->apply_gradients.tensor_arena)
1397 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1398 compiled_data->apply_gradients.tensor_arena = 0;
1399 if (compiled_data->apply_gradients.graph_exec_arena)
1400 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1401 compiled_data->apply_gradients.graph_exec_arena = 0;
1402}
1403
1404// Compile the graph to run ccv_cnnp_model_fit
1405static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1406{
1407 int i, j;
1408 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1409 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1409, __extension__ __PRETTY_FUNCTION__
); }))
;
1410 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1411 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1412 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1412, __extension__ __PRETTY_FUNCTION__
); }))
;
1413 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1413
, __extension__ __PRETTY_FUNCTION__); }))
;
1414 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1414, __extension__ __PRETTY_FUNCTION__
); }))
;
1415 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1416 {
1417 _ccv_cnnp_model_set_rewindables(model);
1418 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1419 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1420 _ccv_cnnp_model_rewind_graph(model);
1421 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1422 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1423 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1424 }
1425 const int tensors_init = !!compiled_data->tensors_init.v;
1426 if (!tensors_init)
1427 _ccv_cnnp_model_tensors_init(model, compiled_data);
1428 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1429 // Check if it is not fully allocated, if it is not, init_1.
1430 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1431 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1432 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1432, __extension__ __PRETTY_FUNCTION__); }))
;
1433 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1433, __extension__ __PRETTY_FUNCTION__); }))
;
1434 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1434
, __extension__ __PRETTY_FUNCTION__); }))
;
1435 const int input_size_per_p = input_size / parallel_count;
1436 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1437 const int output_size_per_p = output_size / parallel_count;
1438 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1439 const int fit_size_per_p = fit_size / parallel_count;
1440 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1441 const int parameter_size = compiled_data->parameters->rnum;
1442 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1443 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1444 const int internal_size = compiled_data->internals->rnum;
1445 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1446 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1447 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1448 ccv_array_free(tensor_binds);
1449 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1450 if (tensors_init && parallel_count > 1)
1451 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1452 // If tensor is not init'ed, we need to init states first.
1453 if (_ccv_cnnp_any_to_init(compiled_data))
1454 {
1455 ccv_nnc_tensor_init_states_t tensor_init_states = {
1456 .parallel_count = parallel_count,
1457 .graph = model->graph,
1458 .compiled_data = compiled_data,
1459 .tensor_arena = compiled_data->tensor_arena
1460 };
1461 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1462 }
1463 compiled_data->is_test = 0;
1464 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1465 // No need to set because it is default to training mode.
1466 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1467 for (i = 0; i < saved_aux_size * parameter_size; i++)
1468 {
1469 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1470 continue;
1471 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1472 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1473 for (j = 1; j < parallel_count; j++)
1474 {
1475 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1476 if (copy)
1477 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1478 }
1479 }
1480 const int evaluate_to_size = compiled_data->evaluate.to_size;
1481 compiled_data->evaluate.to_op_size = 0;
1482 for (i = 0; i < evaluate_to_size; i++)
1483 {
1484 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1485 if (to.graph)
1486 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1487 }
1488 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1489 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1490}
1491
1492ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1493{
1494 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1495 if (!compiled_data || !compiled_data->graph)
1496 return 0;
1497 return ccv_nnc_graph_default_stream(compiled_data->graph);
1498}
1499
1500uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1501{
1502 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1503 if (!compiled_data || !compiled_data->tensor_arena)
1504 return 0;
1505 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1506}
1507
1508static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1509{
1510 int i, j;
1511 for (i = 0; i < tensor_size; i++)
1512 {
1513 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1514 if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1515 continue;
1516 if (graph)
1517 {
1518 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1519 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1520 tensor_symbol = alias_to;
1521 }
1522 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1523 for (j = 1; j < parallel_count; j++)
1524 {
1525 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1526 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1527 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1528 }
1529 }
1530}
1531
1532void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1533{
1534 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1535 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1535, __extension__ __PRETTY_FUNCTION__); }))
;
1536 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1537 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1537, __extension__ __PRETTY_FUNCTION__
); }))
;
1538 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1538, __extension__ __PRETTY_FUNCTION__
); }))
;
1539 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1539
, __extension__ __PRETTY_FUNCTION__); }))
;
1540 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1540, __extension__ __PRETTY_FUNCTION__); }))
;
1541 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1542 {
1543 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1544 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1545 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1546 // Compile the symbolic graph down only when needed.
1547 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1548 } else {
1549 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1549, __extension__ __PRETTY_FUNCTION__); }))
;
1550 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1550, __extension__ __PRETTY_FUNCTION__); }))
;
1551 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1551
, __extension__ __PRETTY_FUNCTION__); }))
;
1552 const int input_size_per_p = input_size / parallel_count;
1553 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1554 const int output_size_per_p = output_size / parallel_count;
1555 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1556 const int fit_size_per_p = fit_size / parallel_count;
1557 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1558 }
1559 if (compiled_data->is_test)
1560 {
1561 compiled_data->is_test = 0;
1562 ccv_nnc_graph_exec_update_t update = {
1563 .parallel_count = parallel_count,
1564 .graph = model->graph,
1565 .graph_exec_arena = compiled_data->graph_exec_arena,
1566 };
1567 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1568 }
1569 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1570}
1571
1572// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1573static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1574{
1575 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1576 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1577 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1578 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1578, __extension__ __PRETTY_FUNCTION__
); }))
;
1579 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1579, __extension__ __PRETTY_FUNCTION__
); }))
;
1580 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1581 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1582 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1583 {
1584 const int evaluate_to_size = compiled_data->evaluate.to_size;
1585 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1586 _ccv_cnnp_model_set_rewindables(model);
1587 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1588 0, 0,
1589 0, 0, 0,
1590 0, 0, 0,
1591 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1592 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1593 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1594 int i, j;
1595 for (i = 0; i < evaluate_to_size; i++)
1596 for (j = 1; j < parallel_count; j++)
1597 {
1598 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1599 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1600 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1601 }
1602 }
1603 const int tensors_init = !!compiled_data->tensors_init.v;
1604 if (!tensors_init)
1605 _ccv_cnnp_model_tensors_init(model, compiled_data);
1606 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1607 // Check if it is not fully allocated, if it is not, init_1.
1608 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1609 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1610 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1610, __extension__ __PRETTY_FUNCTION__); }))
;
1611 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1611, __extension__ __PRETTY_FUNCTION__); }))
;
1612 const int input_size_per_p = input_size / parallel_count;
1613 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1614 const int output_size_per_p = output_size / parallel_count;
1615 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1616 const int parameter_size = compiled_data->parameters->rnum;
1617 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1618 const int internal_size = compiled_data->internals->rnum;
1619 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1620 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1621 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1622 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1623 ccv_array_free(tensor_binds);
1624 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1625 // If tensor is not init'ed, we need to init states first.
1626 if (tensors_init && parallel_count > 1)
1627 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1628 if (_ccv_cnnp_any_to_init(compiled_data))
1629 {
1630 ccv_nnc_tensor_init_states_t tensor_init_states = {
1631 .parallel_count = parallel_count,
1632 .graph = model->graph,
1633 .compiled_data = compiled_data,
1634 .tensor_arena = compiled_data->tensor_arena
1635 };
1636 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1637 }
1638 compiled_data->is_test = 1;
1639 ccv_nnc_graph_exec_update_t update = {
1640 .parallel_count = parallel_count,
1641 .graph = model->graph,
1642 .graph_exec_arena = compiled_data->graph_exec_arena,
1643 };
1644 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1645 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1646 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1647}
1648
1649static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1650{
1651 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1651, __extension__ __PRETTY_FUNCTION__
); }))
;
1652 const int parameter_size = compiled_data->parameters->rnum;
1653 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1654 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1655 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1656 int i, j;
1657 for (i = 0; i < parameter_size; i++)
1658 {
1659 if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))))
1660 {
1661 compiled_data->tensors.gradients[i] = 0;
1662 compiled_data->tensors.accum_gradients[i] = 0;
1663 for (j = 1; j < parallel_count; j++)
1664 {
1665 compiled_data->tensors.gradients[i + j * parameter_size] = 0;
1666 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1667 }
1668 continue;
1669 }
1670 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1671 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1672 if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY)
1673 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1674 const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8);
1675 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1676 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1677 for (j = 1; j < parallel_count; j++)
1678 {
1679 if (j != device_id)
1680 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1681 else
1682 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1683 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1684 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1685 }
1686 }
1687}
1688
1689static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1690{
1691 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1692 return 1;
1693 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1694 return 0;
1695 int i;
1696 for (i = 0; i < input_size; i++)
1697 if (!(disable_outgrad & ((uint64_t)1 << i)))
1698 return 0;
1699 return 1;
1700}
1701
1702// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1703// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1704static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1705{
1706 int i, j;
1707 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1708 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1709 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1709, __extension__ __PRETTY_FUNCTION__
); }))
;
1710 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1711 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1712 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1712, __extension__ __PRETTY_FUNCTION__
); }))
;
1713 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1713, __extension__ __PRETTY_FUNCTION__
); }))
;
1714 // There shouldn't be a loss function if we evaluate with multistage jit.
1715 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1715, __extension__ __PRETTY_FUNCTION__
); }))
;
1716 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1717 {
1718 _ccv_cnnp_model_set_rewindables(model);
1719 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1720 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1721 _ccv_cnnp_model_rewind_graph(model);
1722 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1723 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1724 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1725 }
1726 const int tensors_init = !!compiled_data->tensors_init.v;
1727 if (!tensors_init)
1728 _ccv_cnnp_model_tensors_init(model, compiled_data);
1729 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
1730 // Check if it is not fully allocated, if it is not, init_1.
1731 ccv_cnnp_model_tensors_init_1(model, compiled_data);
1732 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1733 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1733, __extension__ __PRETTY_FUNCTION__); }))
;
1734 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1734, __extension__ __PRETTY_FUNCTION__); }))
;
1735 const int input_size_per_p = input_size / parallel_count;
1736 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1737 const int output_size_per_p = output_size / parallel_count;
1738 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1739 const int parameter_size = compiled_data->parameters->rnum;
1740 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1741 const int internal_size = compiled_data->internals->rnum;
1742 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1743 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1744 if (!compiled_data->tensors.gradients)
1745 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1746 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1747 if (compiled_data->backward.to_size > 0)
1748 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1749 else
1750 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1751 ccv_array_free(tensor_binds);
1752 const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
1753 if (tensors_init && parallel_count > 1)
1754 _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1755 // If tensor is not init'ed, we need to init states first.
1756 if (_ccv_cnnp_any_to_init(compiled_data))
1757 {
1758 ccv_nnc_tensor_init_states_t tensor_init_states = {
1759 .parallel_count = parallel_count,
1760 .graph = model->graph,
1761 .compiled_data = compiled_data,
1762 .tensor_arena = compiled_data->tensor_arena
1763 };
1764 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1765 }
1766 compiled_data->is_test = is_test;
1767 ccv_nnc_graph_exec_update_t update = {
1768 .parallel_count = parallel_count,
1769 .graph = model->graph,
1770 .graph_exec_arena = compiled_data->graph_exec_arena,
1771 };
1772 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1773 const int evaluate_to_size = compiled_data->evaluate.to_size;
1774 compiled_data->evaluate.to_op_size = 0;
1775 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1776 for (i = 0; i < evaluate_to_size; i++)
1777 {
1778 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1779 if (to_op.graph)
1780 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1781 const int* tos;
1782 int to_size;
1783 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1784 for (j = 0; j < to_size; j++)
1785 {
1786 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1787 .d = tos[j],
1788 .graph = model->graph
1789 });
1790 if (to_op.graph)
1791 ccv_array_add_unique_int(backward_from, to_op.d);
1792 }
1793 }
1794 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1794, __extension__
__PRETTY_FUNCTION__); }))
;
1795 compiled_data->backward.from_op_size = backward_from->rnum;
1796 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1797 for (i = 0; i < backward_from->rnum; i++)
1798 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1799 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1800 .graph = compiled_data->graph,
1801 };
1802 ccv_array_free(backward_from);
1803 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count);
1804 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1805}
1806
1807void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1808{
1809 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1810 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1810, __extension__ __PRETTY_FUNCTION__); }))
;
1811 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1812 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__
); }))
;
1813 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1813, __extension__ __PRETTY_FUNCTION__
); }))
;
1814 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1814, __extension__ __PRETTY_FUNCTION__); }))
;
1815 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1816 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1817 if (!compiled_data->graph || mode_mismatch)
1818 {
1819 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1820 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1821 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1822 if (params.requires_grad)
1823 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1824 else
1825 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1826 } else {
1827 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1828 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1828, __extension__ __PRETTY_FUNCTION__); }))
;
1829 const int input_size_per_p = input_size / parallel_count;
1830 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1831 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1831, __extension__ __PRETTY_FUNCTION__); }))
;
1832 const int output_size_per_p = output_size / parallel_count;
1833 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1834 }
1835 if (compiled_data->is_test != params.is_test)
1836 {
1837 compiled_data->is_test = params.is_test;
1838 ccv_nnc_graph_exec_update_t update = {
1839 .parallel_count = parallel_count,
1840 .graph = model->graph,
1841 .graph_exec_arena = compiled_data->graph_exec_arena,
1842 };
1843 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1844 }
1845}
1846
1847void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1848{
1849 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1850 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1850, __extension__ __PRETTY_FUNCTION__); }))
;
1851 ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
1852 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1853 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1854 else {
1855 if (!compiled_data->evaluate.schedule)
1856 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1857 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1858 }
1859}
1860
1861// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1862// Particularly, this method compiles the accumulator graph.
1863static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1864{
1865 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1866 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1866, __extension__ __PRETTY_FUNCTION__); }))
;
1867 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1867, __extension__ __PRETTY_FUNCTION__
); }))
;
1868 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1869 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1870 const int parameter_size = compiled_data->parameters->rnum;
1871 int i, j;
1872 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1873 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1874 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1875 for (i = 0; i < parameter_size; i++)
1876 for (j = 0; j < parallel_count; j++)
1877 if (compiled_data->tensors.gradients[i + j * parameter_size])
1878 {
1879 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1880 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1881 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1882 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1883 ccv_nnc_tensor_symbol_t inputs[2];
1884 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1885 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1886 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1887 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1888 } else {
1889 compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1890 compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1891 compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1892 }
1893 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1894 if (ccv_nnc_symbolic_graph_source_size(accum) == 0)
1895 {
1896 ccv_nnc_symbolic_graph_free(accum);
1897 // Create empty graph.
1898 compiled_data->backward.accum = ccv_nnc_graph_new();
1899 ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0);
1900 return;
1901 }
1902 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1903 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1904 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1905 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1906 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1907 ccv_nnc_symbolic_graph_free(accum);
1908 ccv_array_free(tensor_binds);
1909 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count);
1910}
1911
1912void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1913{
1914 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1915 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1915, __extension__ __PRETTY_FUNCTION__); }))
;
1916 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1916, __extension__ __PRETTY_FUNCTION__
); }))
;
1917 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1918 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1918, __extension__ __PRETTY_FUNCTION__
); }))
;
1919 if (outgrad_size > 0)
1920 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1920, __extension__ __PRETTY_FUNCTION__
); }))
; }
1921 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1921, __extension__ __PRETTY_FUNCTION__); }))
;
1922 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1922, __extension__ __PRETTY_FUNCTION__
); }))
;
1923 const int parameter_size = compiled_data->parameters->rnum;
1924 // If we need to accumulate the gradients now, do jit on accumulator.
1925 if (compiled_data->backward.count > 0)
1926 {
1927 if (!compiled_data->backward.accum)
1928 _ccv_cnnp_model_multistage_jit_1(model);
1929 else if (compiled_data->backward.count == 1) {
1930 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1931 int i;
1932 for (i = 0; i < parameter_size * parallel_count; i++)
1933 {
1934 ccv_nnc_tensor_t* tensor;
1935 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1936 }
1937 if (compiled_data->backward.tensor_arena)
1938 {
1939 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1940 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1941 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1942 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1943 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1944 }
1945 }
1946 }
1947 const int ingrad_size_per_p = model->output_size;
1948 const int outgrad_size_per_p = compiled_data->outgrad_size;
1949 int i, j;
1950 for (i = 0; i < ingrad_size_per_p; i++)
1951 {
1952 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1953 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1954 {
1955 // Set it to 1 if it is not specified.
1956 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1957 if (ingrad_tensor)
1958 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1959 for (j = 1; j < parallel_count; j++)
1960 {
1961 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
1962 if (ingrad_tensor)
1963 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1964 }
1965 } else {
1966 // Make sure the length matches, in case it is an alias.
1967 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 1967, __extension__ __PRETTY_FUNCTION__
); }))
;
1968 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
1969 for (j = 1; j < parallel_count; j++)
1970 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
1971 }
1972 }
1973 if (outgrad_size > 0)
1974 {
1975 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 1975, __extension__ __PRETTY_FUNCTION__
); }))
;
1976 for (i = 0; i < outgrad_size_per_p; i++)
1977 if (outgrads[i])
1978 {
1979 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
1980 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
1981 for (j = 1; j < parallel_count; j++)
1982 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
1983 }
1984 } else {
1985 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
1986 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__
); }))
;
1987 }
1988 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
1989 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
1990 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
1991 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1992 if (!compiled_data->backward.schedule)
1993 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
1994 // Run the backward pass.
1995 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
1996 // If we need to run accumulation round, do that now.
1997 if (compiled_data->backward.count > 0)
1998 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
1999 // Update the count, this determines whether we need to accumulate or not.
2000 ++compiled_data->backward.count;
2001}
2002
2003// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
2004// Particularly, this method compiles the parameter update graph.
2005static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
2006{
2007 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2008 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2008, __extension__ __PRETTY_FUNCTION__
); }))
;
2009 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2010 const int parameter_size = compiled_data->parameters->rnum;
2011 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
2012 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2013 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
2014 // Bind accumulated gradients.
2015 if (compiled_data->backward.count > 1)
2016 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
2017 else
2018 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
2019 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
2020 int i, j;
2021 for (i = 0; i < compiled_data->backward.to_size; i++)
2022 {
2023 const int* tos;
2024 int to_size;
2025 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
2026 for (j = 0; j < to_size; j++)
2027 {
2028 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
2029 // gradients graph.
2030 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
2031 .d = tos[j],
2032 .graph = model->graph,
2033 });
2034 if (!exec.graph)
2035 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
2036 }
2037 }
2038 const int from_size = apply_gradients_from->rnum;
2039 if (from_size == 0)
2040 {
2041 ccv_array_free(apply_gradients_from);
2042 ccv_array_free(tensor_binds);
2043 return;
2044 }
2045 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
2046 for (i = 0; i < from_size; i++)
2047 froms[i] = (ccv_nnc_graph_exec_symbol_t){
2048 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
2049 .graph = model->graph
2050 };
2051 ccv_array_free(apply_gradients_from);
2052 // It can only ends with updates on the parameters.
2053 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
2054 for (i = 0; i < parameter_size; i++)
2055 {
2056 if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL)
2057 continue;
2058 ccv_array_push(tos, &compiled_data->update_nodes[i]);
2059 for (j = 1; j < parallel_count; j++)
2060 {
2061 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
2062 ccv_array_push(tos, &copy);
2063 }
2064 }
2065 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
2066 ccv_array_free(tos);
2067 ccv_array_free(tensor_binds);
2068 ccfreefree(froms);
2069 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2070 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
2071 {
2072 // Skip on no tensor.
2073 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
2074 continue;
2075 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
2076 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
2077 for (j = 1; j < parallel_count; j++)
2078 {
2079 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
2080 if (copy)
2081 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
2082 }
2083 }
2084 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count);
2085}
2086
2087void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
2088{
2089 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2090 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2090, __extension__ __PRETTY_FUNCTION__); }))
;
2091 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 2091, __extension__ __PRETTY_FUNCTION__
); }))
;
2092 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2093 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 2093, __extension__ __PRETTY_FUNCTION__); }))
;
2094 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 2094, __extension__ __PRETTY_FUNCTION__
); }))
;
2095 // Skip if there is no backward pass.
2096 if (compiled_data->backward.count <= 0)
2097 return;
2098 // Skip if there is no parameters.
2099 if (compiled_data->parameters->rnum == 0)
2100 {
2101 compiled_data->backward.count = 0;
2102 return;
2103 }
2104 if (!compiled_data->apply_gradients.graph)
2105 _ccv_cnnp_model_multistage_jit_2(model);
2106 else {
2107 const int parameter_size = compiled_data->parameters->rnum;
2108 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
2109 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
2110 if (compiled_data->backward.count > 1)
2111 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
2112 else
2113 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
2114 }
2115 if (compiled_data->apply_gradients.graph)
2116 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
2117 // Reset backward count to 0.
2118 compiled_data->backward.count = 0;
2119}
2120
2121void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
2122{
2123 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2124 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2125 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2125, __extension__ __PRETTY_FUNCTION__
); }))
;
2126 const int tensors_init = !!compiled_data->tensors_init.v;
2127 if (!tensors_init)
2128 _ccv_cnnp_model_tensors_init(model, compiled_data);
2129 else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1)
2130 // Check if it is not fully allocated, if it is not, init_1.
2131 ccv_cnnp_model_tensors_init_1(model, compiled_data);
2132 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2133 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2134 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2135 if (param_ref < 0)
2136 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2136
, __extension__ __PRETTY_FUNCTION__); }))
; }
2137 else
2138 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2138, __extension__ __PRETTY_FUNCTION__
); }))
; }
2139 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2140 ccv_array_free(parameter_indices);
2141 const int parameter_size = compiled_data->parameters->rnum;
2142 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2142
, __extension__ __PRETTY_FUNCTION__); }))
;
2143 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2143, __extension__ __PRETTY_FUNCTION__
); }))
;
2144 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2145 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2146 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2146, __extension__
__PRETTY_FUNCTION__); }))
;
2147 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2148 int i;
2149 for (i = 1; i < parallel_count; i++)
2150 {
2151 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d + i * parameter_size]) & ~(uintptr_t)1))
;
2152 if (copy_tensor)
2153 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2154 }
2155 // Mark this symbol as init'ed.
2156 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
2157 uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
;
2158 init_v[s >> 5] |= (1u << (s & 0x1f));
2159}
2160
2161void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
2162{
2163 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2164 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2165 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2165, __extension__ __PRETTY_FUNCTION__
); }))
;
2166 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2166, __extension__ __PRETTY_FUNCTION__
); }))
;
2167 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2168 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2169 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2170 if (param_ref < 0)
2171 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2171
, __extension__ __PRETTY_FUNCTION__); }))
; }
2172 else
2173 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2173, __extension__ __PRETTY_FUNCTION__
); }))
; }
2174 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2175 ccv_array_free(parameter_indices);
2176 const int parameter_size = compiled_data->parameters->rnum;
2177 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2177
, __extension__ __PRETTY_FUNCTION__); }))
;
2178 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2178, __extension__ __PRETTY_FUNCTION__
); }))
;
2179 // We don't need to consider parallel_count, every parameter on each device is identical.
2180 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2181 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2181, __extension__
__PRETTY_FUNCTION__); }))
;
2182 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2183}
2184
2185ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2186{
2187 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2188 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2189 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2189, __extension__ __PRETTY_FUNCTION__
); }))
;
2190 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2190, __extension__ __PRETTY_FUNCTION__
); }))
;
2191 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2192 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2193 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2194 if (param_ref < 0)
2195 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2195
, __extension__ __PRETTY_FUNCTION__); }))
; }
2196 else
2197 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2197, __extension__ __PRETTY_FUNCTION__
); }))
; }
2198 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2199 ccv_array_free(parameter_indices);
2200 const int parameter_size = compiled_data->parameters->rnum;
2201 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2201
, __extension__ __PRETTY_FUNCTION__); }))
;
2202 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2202, __extension__ __PRETTY_FUNCTION__
); }))
;
2203 // We don't need to consider parallel_count, every parameter on each device is identical.
2204 ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters
[d]) & ~(uintptr_t)1))
;
2205 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2205, __extension__
__PRETTY_FUNCTION__); }))
;
2206 return tensor->info;
2207}
2208
2209const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
2210{
2211 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2212 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
2213 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 2213, __extension__ __PRETTY_FUNCTION__
); }))
;
2214 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2215 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
2216 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
2217 if (param_ref < 0)
2218 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2218
, __extension__ __PRETTY_FUNCTION__); }))
; }
2219 else
2220 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2220, __extension__ __PRETTY_FUNCTION__
); }))
; }
2221 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
2222 ccv_array_free(parameter_indices);
2223 const int parameter_size = compiled_data->parameters->rnum;
2224 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2224
, __extension__ __PRETTY_FUNCTION__); }))
;
2225 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 2225, __extension__ __PRETTY_FUNCTION__
); }))
;
2226 return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(d)))
;
2227}
2228
2229int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model)
2230{
2231 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 2231, __extension__ __PRETTY_FUNCTION__
); }))
;
2232 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2233 return compiled_data->parameters->rnum;
2234}
2235
2236ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context)
2237{
2238 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2239 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2239, __extension__ __PRETTY_FUNCTION__); }))
;
2240 const int parameter_size = compiled_data->parameters->rnum;
2241 int i;
2242 for (i = 0; i < parameter_size; i++)
2243 {
2244 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2245 if (first(model, name, context))
2246 return ccv_cnnp_model_parameters(model, -1, i);
2247 }
2248 return 0;
2249}
2250
2251ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context)
2252{
2253 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2254 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2254, __extension__ __PRETTY_FUNCTION__); }))
;
2255 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0);
2256 const int parameter_size = compiled_data->parameters->rnum;
2257 int i;
2258 for (i = 0; i < parameter_size; i++)
2259 {
2260 const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
;
2261 if (filter(model, name, context))
2262 {
2263 ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i);
2264 ccv_array_push(parameters, &parameter);
2265 }
2266 }
2267 return parameters;
2268
2269}
2270
2271static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
2272{
2273 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
2274 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 2274, __extension__
__PRETTY_FUNCTION__); }))
;
2275 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2276 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
2277 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
2278 return to_parameter_indices;
2279}
2280
2281static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0)
2282{
2283 // If the model is not compiled yet. Compile them now.
2284 if (!model->graph)
2285 {
2286 model->graph = ccv_nnc_symbolic_graph_new();
2287 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 2287, __extension__ __PRETTY_FUNCTION__
); }))
;
2288 const int input_size = from_model->input_size;
2289 ccv_nnc_tensor_param_t input_params[input_size];
2290 int i;
2291 for (i = 0; i < input_size; i++)
2292 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
2293 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
2294 model->parallel_count = from_model->parallel_count;
2295 model->memory_compression = from_model->memory_compression;
2296 model->memory_reduction = from_model->memory_reduction;
2297 model->gradient_checkpointing = from_model->gradient_checkpointing;
2298 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
2299 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
2300 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
2301 }
2302 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2303 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2303, __extension__ __PRETTY_FUNCTION__
); }))
;
2304 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
2305 if (!to_tensors_init)
2306 {
2307 if (only_init_0)
2308 ccv_cnnp_model_tensors_init_0(model, to_compiled_data);
2309 else
2310 _ccv_cnnp_model_tensors_init(model, to_compiled_data);
2311 } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1)
2312 // Check if it is not fully allocated, if it is not, init_1.
2313 ccv_cnnp_model_tensors_init_1(model, to_compiled_data);
2314 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2314, __extension__ __PRETTY_FUNCTION__
); }))
;
2315 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
2316 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
2317 if (*from_param_ref < 0 && *param_ref >= 0)
2318 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 2318, __extension__ __PRETTY_FUNCTION__
); }))
; }
2319 else if (*from_param_ref >= 0)
2320 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2320, __extension__ __PRETTY_FUNCTION__
); }))
; }
2321 if (*param_ref < 0 && *from_param_ref >= 0)
2322 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 2322, __extension__ __PRETTY_FUNCTION__); }))
; }
2323 else if (*param_ref >= 0)
2324 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 2324, __extension__ __PRETTY_FUNCTION__
); }))
; }
2325}
2326
2327void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2328{
2329 ccv_array_t* to_parameter_indices;
2330 int to_param_ref;
2331 ccv_array_t* from_parameter_indices;
2332 int from_param_ref;
2333 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2334 // Should be exactly the same tensor.
2335 if (to_param_ref < 0 && from_param_ref < 0)
2336 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2336, __extension__ __PRETTY_FUNCTION__
); }))
; }
2337 // To models.
2338 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2339 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2339, __extension__ __PRETTY_FUNCTION__
); }))
;
2340 // From models.
2341 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2342 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2343 const int to_parameter_size = to_compiled_data->parameters->rnum;
2344 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2345 int i, j;
2346 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2347 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2348 for (i = 0; i < rnum; i++)
2349 {
2350 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2351 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2351, __extension__ __PRETTY_FUNCTION__); }))
;
2352 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2352, __extension__ __PRETTY_FUNCTION__
); }))
;
2353 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2354 // If the original is not init'ed. We cannot copy from.
2355 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2356 continue;
2357 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2358 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2358, __extension__ __PRETTY_FUNCTION__); }))
;
2359 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2359, __extension__ __PRETTY_FUNCTION__
); }))
;
2360 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2361 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2361, __extension__
__PRETTY_FUNCTION__); }))
;
2362 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2363 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2363, __extension__
__PRETTY_FUNCTION__); }))
;
2364 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2365 for (j = 1; j < parallel_count; j++)
2366 {
2367 ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2368 if (copy_tensor)
2369 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2370 }
2371 // Mark this symbol as init'ed.
2372 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2373 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2374 }
2375 ccv_array_free(to_parameter_indices);
2376 ccv_array_free(from_parameter_indices);
2377}
2378
2379KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t *
keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__
((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id
(void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof(
kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id
(const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask
; last = i; while (!((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && (((h->flags[i>>4]
>>((i&0xfU)<<1))&1) || !(strcmp(h->keys
[i], key) == 0))) { i = (i + (++step)) & mask; if (i == last
) return h->n_buckets; } return ((h->flags[i>>4]>>
((i&0xfU)<<1))&3)? h->n_buckets : i; } else return
0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id
(kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key);
i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t
*h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !(strcmp
(h->keys[i], key) == 0))) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&1)) site = i; i = (i + (++step))
& mask; if (i == last) { x = site; break; } } if (x == h
->n_buckets) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2) && site != h->n_buckets) x
= site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t
x) { if (x != h->n_buckets && !((h->flags[x>>
4]>>((x&0xfU)<<1))&3)) { (h->flags[x>>
4]|=1ul<<((x&0xfU)<<1)); --h->size; } }
27
Taking true branch
28
Taking false branch
29
Calling 'kh_resize_ccv_cnnp_parameter_id'
30
Taking true branch
31
Assuming the condition is false
32
Taking false branch
33
'?' condition is true
34
Assuming 'new_flags' is non-null, which participates in a condition later
35
Taking false branch
36
'?' condition is true
37
Taking true branch
38
Assuming 'new_keys' is non-null, which participates in a condition later
39
Taking false branch
40
Taking true branch
41
Storing uninitialized value
42
Assuming 'new_vals' is non-null, which participates in a condition later
43
Taking false branch
44
Taking true branch
45
Loop condition is false. Execution continues on line 2379
46
Taking false branch
47
Returning from 'kh_resize_ccv_cnnp_parameter_id'
48
Taking false branch
49
Assuming the condition is true
50
Taking true branch
51
Taking true branch
57
Taking true branch
58
Assuming the condition is true
59
Assuming the condition is true
60
The value 1 is assigned to 'i'
61
Taking false branch
62
Assuming the condition is false
63
Assuming the condition is false
64
'?' condition is false
65
Returning the value 1
2380
2381void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context)
2382{
2383 ccv_array_t* to_parameter_indices;
2384 int to_param_ref;
2385 ccv_array_t* from_parameter_indices;
2386 int from_param_ref;
2387 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1);
2388 // Should be exactly the same tensor.
2389 if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0)
1
Assuming 'renamer' is not equal to null
2390 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2390, __extension__ __PRETTY_FUNCTION__
); }))
; }
2391 // To models.
2392 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2393 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2393, __extension__ __PRETTY_FUNCTION__
); }))
;
2
Assuming 'to_compiled_data' is non-null
3
Taking true branch
2394 // From models.
2395 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2396 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
4
Assuming '_a' is <= '_b'
5
'?' condition is false
2397 assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model->
parallel_count) _a = (from_model->parallel_count); typeof (
1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (parallel_count == ({ typeof (from_model->parallel_count
) _a = (from_model->parallel_count); typeof (1) _b = (1); (
_a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)"
, "ccv_cnnp_model.c", 2397, __extension__ __PRETTY_FUNCTION__
); }))
; // Should have the same parallel count can share parameters.
6
Assuming '_a' is <= '_b'
7
'?' condition is false
8
Taking true branch
2398 const int from_parameter_size = from_compiled_data->parameters->rnum;
2399 const int to_parameter_size = to_compiled_data->parameters->rnum;
2400 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is >= 0
2401 int i, j;
2402 khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0;
2403 char* updated_name = 0;
2404 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2405 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2406 for (i = 0; i < rnum; i++)
2407 {
2408 int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
: from_parameter_size;
10
Assuming 'from_param_ref' is < 0
11
'?' condition is false
12
Assuming the condition is false
13
'?' condition is false
2409 // Need to figure out how to use the renamer here.
2410 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
14
'?' condition is true
2411 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2411, __extension__ __PRETTY_FUNCTION__); }))
;
15
Assuming 'dest_d' is >= 0
16
Taking true branch
2412 assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__
({ if (dest_d < to_parameter_size) ; else __assert_fail (
"dest_d < to_parameter_size", "ccv_cnnp_model.c", 2412, __extension__
__PRETTY_FUNCTION__); }))
;
17
Assuming 'dest_d' is < 'to_parameter_size'
18
Taking true branch
2413 if (renamer
18.1
'renamer' is non-null
)
2414 {
2415 const char* const src_name = (src_d
18.2
'src_d' is >= 'from_parameter_size'
< from_parameter_size && src_d >= 0) ? *(char**)ccv_array_get(from_compiled_data->ids.parameters, src_d)((void*)(((char*)((from_compiled_data->ids.parameters)->
data)) + (size_t)(from_compiled_data->ids.parameters)->
rsize * (size_t)(src_d)))
: 0;
2416 const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data
)) + (size_t)(to_compiled_data->ids.parameters)->rsize *
(size_t)(dest_d)))
;
2417 if (!updated_name
18.3
'updated_name' is null
)
19
Taking true branch
2418 updated_name = (char*)ccmallocmalloc(1024);
2419 const size_t src_name_len = src_name
19.1
'src_name' is equal to null
== 0 ? 0 : ccv_min(strnlen(src_name, 1023), 1023)({ typeof (strnlen(src_name, 1023)) _a = (strnlen(src_name, 1023
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; })
;
20
'?' condition is true
2420 if (src_name_len
20.1
'src_name_len' is <= 0
> 0)
21
Taking false branch
2421 memcpy(updated_name, src_name, src_name_len);
2422 updated_name[src_name_len] = 0;
2423 if (renamer(context, dest_name, updated_name, 1024) != 0)
22
Assuming the condition is false
2424 continue; // Skip this.
2425 if (src_name
22.1
'src_name' is equal to null
!= 0 && memcmp(updated_name, src_name, src_name_len) == 0 && strnlen(updated_name, 1023) == src_name_len)
2426 {
2427 // Nothing changed.
2428 } else {
2429 if (!id_map
22.2
'id_map' is null
)
23
Taking true branch
2430 {
2431 id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id();
2432 for (j = 0; j < from_parameter_size; j++)
24
Assuming 'j' is < 'from_parameter_size'
25
Loop condition is true. Entering loop body
54
Assuming 'j' is >= 'from_parameter_size'
55
Loop condition is false. Execution continues on line 2440
2433 {
2434 int ret;
2435 const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char
*)((from_compiled_data->ids.parameters)->data)) + (size_t
)(from_compiled_data->ids.parameters)->rsize * (size_t)
(j))), &ret)
;
26
Calling 'kh_put_ccv_cnnp_parameter_id'
52
Returning from 'kh_put_ccv_cnnp_parameter_id'
2436 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2436
, __extension__ __PRETTY_FUNCTION__); }))
;
53
Taking true branch
2437 kh_val(id_map, k)((id_map)->vals[k]) = j;
2438 }
2439 }
2440 const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name);
56
Calling 'kh_get_ccv_cnnp_parameter_id'
66
Returning from 'kh_get_ccv_cnnp_parameter_id'
67
'k' initialized to 1
2441 if (k
67.1
'k' is not equal to field 'n_buckets'
== kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip.
68
Taking false branch
2442 continue;
2443 src_d = kh_val(id_map, k)((id_map)->vals[k]);
69
Assigned value is garbage or undefined
2444 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2444, __extension__ __PRETTY_FUNCTION__); }))
;
2445 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2445, __extension__
__PRETTY_FUNCTION__); }))
;
2446 }
2447 }
2448 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2448, __extension__ __PRETTY_FUNCTION__); }))
;
2449 assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__
({ if (src_d < from_parameter_size) ; else __assert_fail (
"src_d < from_parameter_size", "ccv_cnnp_model.c", 2449, __extension__
__PRETTY_FUNCTION__); }))
;
2450 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2451 // If the original is not init'ed. We cannot share from.
2452 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2453 continue;
2454 for (j = 0; j < parallel_count; j++)
2455 {
2456 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * from_parameter_size]) & ~(uintptr_t
)1))
;
2457 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2457, __extension__
__PRETTY_FUNCTION__); }))
;
2458 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2459 if (dest && !((uintptr_t)dest & (uintptr_t)1))
2460 ccv_nnc_tensor_free(dest);
2461 to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1);
2462 }
2463 // Mark this symbol as init'ed.
2464 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2465 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2466 }
2467 ccv_array_free(to_parameter_indices);
2468 ccv_array_free(from_parameter_indices);
2469 if (id_map)
2470 kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map);
2471 if (updated_name)
2472 ccfreefree(updated_name);
2473 // Mark it as incomplete so we will call init_1.
2474 if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data))
2475 to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1);
2476 else // Remove the flag.
2477 to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2478}
2479
2480ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2481{
2482 if (!compiled_data->stream_map)
2483 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2484 int ret = 0;
2485 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2486 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2486, __extension__ __PRETTY_FUNCTION__); }))
;
2487 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2488 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2489 if (ret != 0)
2490 {
2491 stream = ccv_nnc_stream_context_new(type);
2492 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2493 }
2494 return stream;
2495}
2496
2497void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2498{
2499 ccv_array_t* to_parameter_indices;
2500 int to_param_ref;
2501 ccv_array_t* from_parameter_indices;
2502 int from_param_ref;
2503 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0);
2504 // Should be exactly the same tensor.
2505 if (to_param_ref < 0 && from_param_ref < 0)
2506 { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices
->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices
->rnum == to_parameter_indices->rnum) ; else __assert_fail
("from_parameter_indices->rnum == to_parameter_indices->rnum"
, "ccv_cnnp_model.c", 2506, __extension__ __PRETTY_FUNCTION__
); }))
; }
2507 // To models.
2508 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2509 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2509, __extension__ __PRETTY_FUNCTION__
); }))
;
2510 // From models.
2511 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2512 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2513 const int to_parameter_size = to_compiled_data->parameters->rnum;
2514 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2515 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2515, __extension__ __PRETTY_FUNCTION__
); }))
;
2516 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2516, __extension__ __PRETTY_FUNCTION__
); }))
;
2517 int i, j;
2518 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2519 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2520 for (i = 0; i < aux_in_size; i++)
2521 inputs[i + 2] = aux_ins[i];
2522 for (i = 0; i < aux_out_size; i++)
2523 outputs[i + 1] = aux_outs[i];
2524 const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init.
v) & ~(uintptr_t)1))
;
2525 uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v)
& ~(uintptr_t)1))
;
2526 for (i = 0; i < rnum; i++)
2527 {
2528 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2529 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2529, __extension__ __PRETTY_FUNCTION__); }))
;
2530 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2530, __extension__ __PRETTY_FUNCTION__
); }))
;
2531 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2532 // If the original is not init'ed. We cannot copy from.
2533 if (!(from_init_v[s >> 5] & (1u << (s & 0x1f))))
2534 continue;
2535 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2536 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2536, __extension__ __PRETTY_FUNCTION__); }))
;
2537 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2537, __extension__ __PRETTY_FUNCTION__
); }))
;
2538 if (parallel_count > 1)
2539 {
2540 ccv_nnc_stream_context_t* streams[parallel_count];
2541 ccv_nnc_stream_signal_t* signal;
2542 if (stream_context)
2543 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2544 for (j = 0; j < parallel_count; j++)
2545 {
2546 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2547 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2548 if (!dest || !src)
2549 {
2550 streams[j] = 0;
2551 continue;
2552 }
2553 // At the moment, can only handle them on the same device.
2554 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2554, __extension__ __PRETTY_FUNCTION__
); }))
;
2555 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2555, __extension__ __PRETTY_FUNCTION__
); }))
;
2556 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2557 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2558 int type = stream_type;
2559 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2560 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2561 // Wait signal to finish.
2562 if (stream_context)
2563 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2564 inputs[0] = outputs[0] = dest;
2565 inputs[1] = src;
2566 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2567 if (stream_context)
2568 {
2569 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2570 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2571 }
2572 streams[j] = stream_0;
2573 }
2574 // If this should be blocking, blocking it.
2575 if (!stream_context)
2576 for (j = 0; j < parallel_count; j++)
2577 if (streams[j])
2578 ccv_nnc_stream_context_wait(streams[j]);
2579 } else {
2580 ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors
.parameters[src_d]) & ~(uintptr_t)1))
;
2581 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2581, __extension__
__PRETTY_FUNCTION__); }))
;
2582 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2583 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2583, __extension__
__PRETTY_FUNCTION__); }))
;
2584 inputs[0] = outputs[0] = dest;
2585 inputs[1] = src;
2586 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2587 }
2588 // Mark this symbol as init'ed.
2589 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2590 to_init_v[d >> 5] |= (1u << (d & 0x1f));
2591 }
2592 ccv_array_free(to_parameter_indices);
2593 ccv_array_free(from_parameter_indices);
2594}
2595
2596void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2597{
2598 int to_param_ref;
2599 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2600 // To models.
2601 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2602 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2602, __extension__ __PRETTY_FUNCTION__
); }))
;
2603 // Tensor has to be inited already.
2604 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2604, __extension__ __PRETTY_FUNCTION__
); }))
;
2605 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2605, __extension__ __PRETTY_FUNCTION__
); }))
;
2606 // From models.
2607 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2608 const int to_parameter_size = to_compiled_data->parameters->rnum;
2609 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2610 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2610, __extension__ __PRETTY_FUNCTION__
); }))
;
2611 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2611, __extension__ __PRETTY_FUNCTION__
); }))
;
2612 int i, j;
2613 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2614 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2615 for (i = 0; i < aux_in_size; i++)
2616 inputs[i + 1] = aux_ins[i];
2617 for (i = 0; i < aux_out_size; i++)
2618 outputs[i + 1] = aux_outs[i];
2619 for (i = 0; i < rnum; i++)
2620 {
2621 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2622 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2622, __extension__ __PRETTY_FUNCTION__); }))
;
2623 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2623, __extension__ __PRETTY_FUNCTION__
); }))
;
2624 if (parallel_count > 1)
2625 {
2626 ccv_nnc_stream_context_t* streams[parallel_count];
2627 ccv_nnc_stream_signal_t* signal;
2628 if (stream_context)
2629 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2630 for (j = 0; j < parallel_count; j++)
2631 {
2632 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t
)1))
;
2633 if (!dest)
2634 {
2635 streams[j] = 0;
2636 continue;
2637 }
2638 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2639 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2640 int type = stream_type;
2641 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2642 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2643 // Wait signal to finish.
2644 if (stream_context)
2645 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2646 inputs[0] = outputs[0] = dest;
2647 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2648 if (stream_context)
2649 {
2650 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2651 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2652 }
2653 streams[j] = stream_0;
2654 }
2655 // If this should be blocking, blocking it.
2656 if (!stream_context)
2657 for (j = 0; j < parallel_count; j++)
2658 if (streams[j])
2659 ccv_nnc_stream_context_wait(streams[j]);
2660 } else {
2661 ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors
.parameters[dest_d]) & ~(uintptr_t)1))
;
2662 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2662, __extension__
__PRETTY_FUNCTION__); }))
;
2663 inputs[0] = outputs[0] = dest;
2664 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2665 }
2666 // No need to mark this symbol as init'ed, it is already.
2667 }
2668 ccv_array_free(to_parameter_indices);
2669}
2670
2671void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2672{
2673 int to_param_ref;
2674 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2675 // To models.
2676 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2677 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2677, __extension__ __PRETTY_FUNCTION__
); }))
;
2678 // Tensor has to be inited already.
2679 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2679, __extension__ __PRETTY_FUNCTION__
); }))
;
2680 ccv_nnc_tensor_t** tensor_gradients;
2681 if (to_compiled_data->backward.count > 1)
2682 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2683 else
2684 tensor_gradients = to_compiled_data->tensors.gradients;
2685 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2685, __extension__ __PRETTY_FUNCTION__
); }))
;
2686 // From models.
2687 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2688 const int to_parameter_size = to_compiled_data->parameters->rnum;
2689 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2690 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2690, __extension__ __PRETTY_FUNCTION__
); }))
;
2691 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2691, __extension__ __PRETTY_FUNCTION__
); }))
;
2692 int i, j;
2693 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2694 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2695 for (i = 0; i < aux_in_size; i++)
2696 inputs[i + 1] = aux_ins[i];
2697 for (i = 0; i < aux_out_size; i++)
2698 outputs[i + 1] = aux_outs[i];
2699 for (i = 0; i < rnum; i++)
2700 {
2701 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2702 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2702, __extension__ __PRETTY_FUNCTION__); }))
;
2703 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2703, __extension__ __PRETTY_FUNCTION__
); }))
;
2704 if (parallel_count > 1)
2705 {
2706 ccv_nnc_stream_context_t* streams[parallel_count];
2707 ccv_nnc_stream_signal_t* signal;
2708 if (stream_context)
2709 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2710 for (j = 0; j < parallel_count; j++)
2711 {
2712 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2713 if (!dest)
2714 {
2715 streams[j] = 0;
2716 continue;
2717 }
2718 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2719 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2720 int type = stream_type;
2721 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2722 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2723 // Wait signal to finish.
2724 if (stream_context)
2725 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2726 inputs[0] = outputs[0] = dest;
2727 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2728 if (stream_context)
2729 {
2730 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2731 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2732 }
2733 streams[j] = stream_0;
2734 }
2735 // If this should be blocking, blocking it.
2736 if (!stream_context)
2737 for (j = 0; j < parallel_count; j++)
2738 if (streams[j])
2739 ccv_nnc_stream_context_wait(streams[j]);
2740 } else {
2741 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2742 if (!dest)
2743 continue;
2744 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2744, __extension__
__PRETTY_FUNCTION__); }))
;
2745 inputs[0] = outputs[0] = dest;
2746 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2747 }
2748 // No need to mark this symbol as init'ed, it is already.
2749 }
2750 ccv_array_free(to_parameter_indices);
2751}
2752
2753ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2754{
2755 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2756 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2756, __extension__ __PRETTY_FUNCTION__); }))
;
2757 return compiled_data->minimize.minimizer;
2758}
2759
2760void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2761{
2762 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2763 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2763, __extension__ __PRETTY_FUNCTION__); }))
;
2764 const int parameter_size = compiled_data->parameters->rnum;
2765 if (parameter_size == 0)
2766 return;
2767 if (reset)
2768 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2768, __extension__ __PRETTY_FUNCTION__
); }))
; }
2769 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2770 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2771 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2772 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2773 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2774 // We update all parameters, at this point, we have one minimizer.
2775 if (set_parameters == 0 || set_parameter_size == 0)
2776 compiled_data->minimize.minimizer = minimizer;
2777 int i;
2778 if (set_parameters && set_parameter_size)
2779 {
2780 // I need to save what's the minimizer along with this.
2781 if (!compiled_data->minimize.parameters)
2782 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2783 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2784 set_minimizer_for_parameter->minimizer = minimizer;
2785 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2786 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2787 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2788 }
2789 // If reset is true, clear the parameters array.
2790 if (reset && compiled_data->minimize.parameters)
2791 {
2792 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2793 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2794 ccv_array_clear(compiled_data->minimize.parameters);
2795 }
2796 if (!compiled_data->update_nodes)
2797 return;
2798 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2799 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2799, __extension__ __PRETTY_FUNCTION__); }))
;
2800 if (saved_aux_size > old_max_saved_aux_size)
2801 {
2802 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2802, __extension__ __PRETTY_FUNCTION__
); }))
;
2803 // Reallocate first, move them around later.
2804 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2805 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2806 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2807 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2808 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2809 }
2810 int flag = 0;
2811 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2812 if (set_parameters && set_parameter_size)
2813 {
2814 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2815 for (i = 0; i < set_parameter_size; i++)
2816 {
2817 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2818 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2818, __extension__ __PRETTY_FUNCTION__
); }))
;
2819 const int old_rnum = parameter_indices->rnum;
2820 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2821 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2822 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2822, __extension__ __PRETTY_FUNCTION__
); }))
;
2823 if (param_ref >= 0)
2824 {
2825 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2825, __extension__ __PRETTY_FUNCTION__
); }))
;
2826 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2827 parameter_indices->rnum = old_rnum + 1;
2828 }
2829 }
2830 // We may have duplicated indices, but that is OK, we will set it twice.
2831 for (i = 0; i < parameter_indices->rnum; i++)
2832 {
2833 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2834 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2835 flag = 1;
2836 }
2837 ccv_array_free(parameter_indices);
2838 } else {
2839 for (i = 0; i < parameter_size; i++)
2840 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2841 flag = 1;
2842 if (compiled_data->minimize.parameters)
2843 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2844 flag = 1;
2845 }
2846 if (flag)
2847 {
2848 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2849 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2850 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2851 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2852 }
2853}
2854
2855void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2856{
2857 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2858 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2858, __extension__ __PRETTY_FUNCTION__); }))
;
2859 compiled_data->compile_params = compile_params;
2860}
2861
2862void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2863{
2864 if (model->graph && out_size > 0)
2865 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2866 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2867 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2868 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2869 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2870 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2871 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2872}
2873
2874void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
2875{
2876 if (model->graph)
2877 ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context);
2878}
2879
2880static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2881{
2882 int i;
2883 const int parameter_size = compiled_data->parameters->rnum;
2884 ccv_array_free(compiled_data->parameters);
2885 if (compiled_data->parameter_flags)
2886 ccfreefree(compiled_data->parameter_flags);
2887 const int internal_size = compiled_data->internals->rnum;
2888 ccv_array_free(compiled_data->internals);
2889 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2889, __extension__ __PRETTY_FUNCTION__
); }))
;
2890 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2890, __extension__ __PRETTY_FUNCTION__
); }))
;
2891 for (i = 0; i < parameter_size; i++)
2892 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2893 ccv_array_free(compiled_data->ids.parameters);
2894 for (i = 0; i < internal_size; i++)
2895 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2896 ccv_array_free(compiled_data->ids.internals);
2897 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2898 if (compiled_data->tensors.parameters)
2899 {
2900 for (i = 0; i < parameter_size * parallel_count; i++)
2901 // If it is not marked as not belonging, we can free it.
2902 if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))
2903 if (compiled_data->tensors.parameters[i])
2904 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2905 for (i = 0; i < internal_size * parallel_count; i++)
2906 if (compiled_data->tensors.internals[i])
2907 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2908 ccfreefree(compiled_data->tensors.parameters);
2909 }
2910 if (compiled_data->tensors.gradients)
2911 {
2912 for (i = 0; i < parameter_size * parallel_count; i++)
2913 {
2914 if (compiled_data->tensors.gradients[i])
2915 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2916 if (compiled_data->tensors.accum_gradients[i])
2917 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2918 }
2919 ccfreefree(compiled_data->tensors.gradients);
2920 }
2921 if (compiled_data->minimize.parameters)
2922 {
2923 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2924 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2925 ccv_array_free(compiled_data->minimize.parameters);
2926 }
2927 if (compiled_data->rewindables)
2928 ccv_array_free(compiled_data->rewindables);
2929 if (compiled_data->tensors_init.v)
2930 ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) &
~(uintptr_t)1))
);
2931 if (compiled_data->evaluate.tos)
2932 ccfreefree(compiled_data->evaluate.tos);
2933 compiled_data->evaluate.tos = 0;
2934 if (compiled_data->stream_map)
2935 {
2936 khiter_t k;
2937 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2938 {
2939 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
2940 continue;
2941 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2942 ccv_nnc_stream_context_free(stream);
2943 }
2944 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
2945 }
2946 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2947 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
2948 _ccv_cnnp_compiled_data_backward_free(compiled_data);
2949 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2950 if (compiled_data->gradient_checkpoints)
2951 {
2952 for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++)
2953 {
2954 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)->
data)) + (size_t)(compiled_data->gradient_checkpoints)->
rsize * (size_t)(i)))
;
2955 assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__
({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs"
, "ccv_cnnp_model.c", 2955, __extension__ __PRETTY_FUNCTION__
); }))
;
2956 ccfreefree(checkpoint->inputs);
2957 ccv_array_free(checkpoint->tensor_symbols);
2958 }
2959 ccv_array_free(compiled_data->gradient_checkpoints);
2960 }
2961 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
2962 ccfreefree(compiled_data);
2963}
2964
2965void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
2966{
2967 if (model->isa->deinit)
2968 model->isa->deinit(model);
2969 if (model->io)
2970 {
2971 int i;
2972 for (i = 0; i < model->io->rnum; i++)
2973 {
2974 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
2975 if (model_io->outgoings)
2976 ccv_array_free(model_io->outgoings);
2977 if (model_io->incomings)
2978 ccv_array_free(model_io->incomings);
2979 if (model_io->dependencies)
2980 ccv_array_free(model_io->dependencies);
2981 ccfreefree(model_io);
2982 }
2983 ccv_array_free(model->io);
2984 }
2985 if (model->parameter_indices)
2986 ccv_array_free(model->parameter_indices);
2987 if (model->inputs)
2988 ccfreefree(model->inputs);
2989 if (model->graph)
2990 ccv_nnc_symbolic_graph_free(model->graph);
2991 if (model->compiled_data)
2992 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
2993 if (model->name)
2994 ccfreefree(model->name);
2995 ccfreefree(model);
2996}