Bug Summary

File:nnc/ccv_cnnp_model.c
Warning:line 965, column 100
Array access (via field 'outgrads') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/14.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2022-06-24-190241-827281-1 -x c ccv_cnnp_model.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6
7// MARK - Level-5 API
8
9ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
10{
11 assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ (
{ if (input_size > 0) ; else __assert_fail ("input_size > 0"
, "ccv_cnnp_model.c", 11, __extension__ __PRETTY_FUNCTION__);
}))
;
12 if (!model->io)
13 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
14 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
15 model_io->param_ref = 0;
16 model_io->param_sel = 0;
17 model_io->visit = 0;
18 model_io->model = model;
19 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
20 model_io->outgoings = 0;
21 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
22 ccv_array_push(model->io, &model_io);
23 int i;
24 ccv_array_resize(model_io->incomings, input_size);
25 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
26 for (i = 0; i < input_size; i++)
27 {
28 if (!inputs[i]->outgoings)
29 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
30 ccv_array_push(inputs[i]->outgoings, &model_io);
31 }
32 return model_io;
33}
34
35int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
36{
37 return model->output_size;
38}
39
40ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
41{
42 if (!model->io)
43 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
44 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
45 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
46 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
47 model_io->visit = 0;
48 model_io->model = model;
49 model_io->outputs = 0;
50 model_io->incomings = 0;
51 model_io->outgoings = 0;
52 ccv_array_push(model->io, &model_io);
53 return model_io;
54}
55
56void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
57{
58 model->notify_hook.func = func;
59 model->notify_hook.context = context;
60}
61
62void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
63{
64 if (model->notify_hook.func)
65 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
66 if (model->isa->notify)
67 model->isa->notify(model, tag, payload);
68}
69
70static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
71{
72 int i, j;
73 for (i = 0; i < graph_exec_symbol_size; i++)
74 {
75 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
76 // Check whether this tensor symbol has any duplicate.
77 for (j = i + 1; j < graph_exec_symbol_size;)
78 {
79 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
80 // If there is a same tensor symbol, remove it.
81 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
82 {
83 if (j + 1 < graph_exec_symbol_size)
84 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
85 --graph_exec_symbol_size;
86 continue;
87 }
88 ++j;
89 }
90 }
91 return graph_exec_symbol_size;
92}
93
94typedef struct {
95 ccv_cnnp_model_sequence_t* sequence;
96 char prefix;
97 ccv_array_t* symbols;
98 ccv_array_t* ids;
99} ccv_cnnp_model_add_to_array_context_t;
100
101static void _ccv_cnnp_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol)
102{
103 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
104 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
105 int i;
106 if (!model->parameter_indices)
107 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
108 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
109 {
110 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
111 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
112 {
113 // Only add to parameter_indices if it is trainable.
114 if (add_to_array_context->prefix == 't')
115 ccv_array_add_unique_int(model->parameter_indices, i);
116 // Found it, return, don't add it.
117 return;
118 }
119 }
120 // Only add to parameter_indices if it is trainable.
121 if (add_to_array_context->prefix == 't')
122 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
123 // This is a new one, no need to add_unique_int, it is unique.
124 ccv_array_push(add_to_array_context->symbols, &symbol);
125 char id[2048];
126 id[0] = add_to_array_context->prefix;
127 id[1] = '-';
128 int total_len = 2;
129 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
130 {
131 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
132 int len;
133 if (name->name && name->name[0] != '\0')
134 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
135 else
136 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
137 total_len += len;
138 if (total_len >= 2047)
139 break;
140 }
141 if (total_len < 2047)
142 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
143 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 143, __extension__ __PRETTY_FUNCTION__)
; }))
;
144 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
145 memcpy(heap_id, id, total_len + 1);
146 ccv_array_push(add_to_array_context->ids, &heap_id);
147 ++add_to_array_context->sequence->it;
148}
149
150static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size)
151{
152 compiled_data->f = compiled_data->fits + output_size;
153 compiled_data->xpu_alloc.mp_hdr = -1;
154 compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str();
155 compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc();
156}
157
158static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
159{
160 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 160, __extension__ __PRETTY_FUNCTION__); }))
;
161 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
162 int i;
163 for (i = 0; i < input_size; i++)
164 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
165 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
166 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
167 ccv_cnnp_model_sequence_t model_sequence = {
168 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
169 };
170 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
171 .sequence = &model_sequence,
172 .prefix = 't',
173 .symbols = parameters,
174 .ids = parameter_ids,
175 };
176 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
177 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
178 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
179 .sequence = &model_sequence,
180 .prefix = 'r',
181 .symbols = internals,
182 .ids = internal_ids,
183 };
184 ccv_cnnp_model_build_data_t build_data = {
185 .model_sequence = &model_sequence,
186 .add_to_array = _ccv_cnnp_add_to_array,
187 .parameters = parameters,
188 .context = {
189 .add_to_parameter = &add_to_parameter_context,
190 .add_to_output = &add_to_output_context,
191 },
192 };
193 model->data = &build_data;
194 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
195 model->data = 0;
196 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
197 ccv_array_free(model_sequence.sequences);
198 // Assert no parameter is alias.
199 for (i = 0; i < parameters->rnum; i++)
200 {
201 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
202 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
203 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 203, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
204 }
205 // Assert no internal is alias.
206 for (i = 0; i < internals->rnum; i++)
207 {
208 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
209 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(retained.graph, retained);
210 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 210, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
211 }
212 const int output_size = model->output_size;
213 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
214 ccv_nnc_symbolic_graph_simplify(model->graph,
215 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
216 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
217 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
218 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
219 model->inputs, input_size,
220 model->outputs, output_size,
221 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
222 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
223 _ccv_cnnp_compiled_data_init(compiled_data, output_size);
224 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
225 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 225, __extension__ __PRETTY_FUNCTION__)
; }))
;
226 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
227 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
228 compiled_data->loss = loss;
229 if (loss.cmd == CCV_NNC_NOOP)
230 {
231 // If no loss function provided, there is no fits.
232 for (i = 0; i < output_size; i++)
233 {
234 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
235 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
236 if (alias_to.d < 0)
237 compiled_data->f[i] = model->outputs[i];
238 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
239 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
240 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
241 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
242 int j;
243 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
244 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 244, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
245 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
246 }
247 }
248 } else {
249 for (i = 0; i < output_size; i++)
250 {
251 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
252 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
253 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
254 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
255 }
256 }
257 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
258 ccv_nnc_symbolic_graph_simplify(model->graph,
259 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
260 0, 0, // No need to provide binds at this point.
261 compiled_data->f, model->output_size,
262 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
263 // If inputs are from GPU, stream type is GPU.
264 compiled_data->parameters = parameters;
265 compiled_data->internals = internals;
266 compiled_data->ids.parameters = parameter_ids;
267 compiled_data->ids.internals = internal_ids;
268}
269
270static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
271{
272 ccv_array_t* const stack = (ccv_array_t*)context;
273 ccv_array_push(stack, &symbol.d);
274}
275
276static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
277{
278 const ccv_nnc_tensor_symbol_t src_symbol = {
279 .d = src_index,
280 .graph = src_graph
281 };
282 const ccv_nnc_tensor_symbol_t dest_symbol = {
283 .d = dest_index,
284 .graph = dest_graph
285 };
286 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
287 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
288 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
289 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
290 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
291 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
292}
293
294static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
295{
296 const ccv_nnc_tensor_symbol_t src_symbol = {
297 .d = src_index,
298 .graph = src_graph
299 };
300 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
301 const ccv_nnc_tensor_symbol_t dest_symbol = {
302 .d = dest_index,
303 .graph = dest_graph
304 };
305 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
306 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
307}
308
309static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
310static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
311
312typedef struct {
313 int parallel_count;
314 ccv_nnc_symbolic_graph_t* graph;
315 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
316} ccv_nnc_graph_exec_update_t;
317
318static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
319{
320 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
321 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
322 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
323 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
324 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
325 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
326 const int parallel_count = graph_exec_update->parallel_count;
327 int i;
328 for (i = 1; i < parallel_count; i++)
329 {
330 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
331 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
332 {
333 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
334 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
335 }
336 }
337}
338
339void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
340{
341 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 341, __extension__ __PRETTY_FUNCTION__); }))
;
342 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 342, __extension__ __PRETTY_FUNCTION__)
; }))
;
343 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 343, __extension__ __PRETTY_FUNCTION__); }))
;
344 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
345 init->graph = ccv_nnc_symbolic_graph_new();
346 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
347 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack);
348 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
349 init->parallel_count = model->parallel_count;
350 init->memory_compression = model->memory_compression;
351 init->compiled_data->stream_type = model->compiled_data->stream_type;
352 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
353 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
354 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
355 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
356 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0);
357 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
358 int i, j;
359 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
360 for (i = 0; i < compiled_data->parameters->rnum; i++)
361 {
362 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
363 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 363, __extension__ __PRETTY_FUNCTION__)
; }))
;
364 }
365 for (i = 0; i < compiled_data->internals->rnum; i++)
366 {
367 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
368 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 368, __extension__ __PRETTY_FUNCTION__)
; }))
;
369 }
370 // Update inputs.
371 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 371, __extension__ __PRETTY_FUNCTION__)
; }))
;
372 for (i = 0; i < model->input_size; i++)
373 if (model->inputs[i].d >= 0)
374 {
375 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 375, __extension__ __PRETTY_FUNCTION__)
; }))
;
376 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
377 }
378 // Update outputs.
379 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 379, __extension__ __PRETTY_FUNCTION__)
; }))
;
380 for (i = 0; i < model->output_size; i++)
381 {
382 if (model->outputs[i].d >= 0)
383 {
384 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 384, __extension__
__PRETTY_FUNCTION__); }))
;
385 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
386 }
387 if (model->outputs[i].d != model->compiled_data->f[i].d)
388 {
389 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 389, __extension__ __PRETTY_FUNCTION__)
; }))
;
390 if (model->compiled_data->f[i].d >= 0)
391 {
392 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 392, __extension__ __PRETTY_FUNCTION__)
; }))
;
393 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
394 }
395 }
396 }
397 // Go through the graph to set tensor on matching symbols
398 for (i = 0; i < stack->rnum; i++)
399 {
400 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
401 // If exceed range, skip.
402 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
403 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
404 continue;
405 const ccv_nnc_graph_exec_symbol_t src_symbol = {
406 .d = d,
407 .graph = init->graph
408 };
409 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
410 .d = d,
411 .graph = model->graph
412 };
413 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
414 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
415 // If the name doesn't match, skip.
416 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
417 continue;
418 // Now get all the inputs and outputs, if matches, set them.
419 const int* src_inputs;
420 int src_input_size;
421 const int* src_outputs;
422 int src_output_size;
423 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
424 const int* dest_inputs;
425 int dest_input_size;
426 const int* dest_outputs;
427 int dest_output_size;
428 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
429 // We may have unmatched input / output size because this is the minimizer and it has
430 // different saved_aux (for example, when we shrunk with CMD_NOOP).
431 if (src_input_size != dest_input_size)
432 continue;
433 if (src_output_size != dest_output_size)
434 continue;
435 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
436 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
437 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
438 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
439 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
440 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
441 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
442 // a new exec symbol.
443 for (j = 0; j < src_input_size; j++)
444 if (src_inputs[j] >= 0)
445 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
446 for (j = 0; j < src_output_size; j++)
447 if (src_outputs[j] >= 0)
448 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
449 }
450 ccv_array_free(stack);
451 // After this, we get all tensors in the model graph resolved through tensor_auto.
452 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
453 // Verify symbols we get matches.
454 const int parameter_size = compiled_data->parameters->rnum;
455 for (i = 0; i < parameter_size; i++)
456 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 456, __extension__ __PRETTY_FUNCTION__)
; }))
; }
457 const int internal_size = compiled_data->internals->rnum;
458 for (i = 0; i < internal_size; i++)
459 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 459, __extension__ __PRETTY_FUNCTION__)
; }))
; }
460 // Go through compiled data.
461 if (compiled_data->tensor_arena)
462 {
463 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
464 if (flag == 0 && compiled_data->graph_exec_arena)
465 {
466 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
467 // Since we will reinit, if we previously set is_test, we need to set it again.
468 if (compiled_data->is_test)
469 {
470 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
471 ccv_nnc_graph_exec_update_t update = {
472 .parallel_count = parallel_count,
473 .graph = model->graph,
474 .graph_exec_arena = compiled_data->graph_exec_arena,
475 };
476 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
477 }
478 } else
479 // Free-up tensor arena & graph exec arena.
480 _ccv_cnnp_compiled_data_graph_free(compiled_data);
481 }
482 // There are other compiled graphs, for accum and apply gradients.
483 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
484 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
485 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
486 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
487 // That is why we don't update these compiled graphs at all this point.
488 // Free the model, we've already "absorbed" it.
489 ccv_cnnp_model_free(init);
490}
491
492void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
493{
494 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 494, __extension__ __PRETTY_FUNCTION__)
; }))
;
495 if (model->input_size == 0)
496 model->input_size = input_size;
497 if (!model->graph) // The graph is not compiled yet.
498 {
499 model->graph = ccv_nnc_symbolic_graph_new();
500 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
501 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__)
; }))
;
502 int i, flag = 0;
503 for (i = 0; !flag && i < input_size; i++)
504 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
505 // If inputs are from GPU, stream type is GPU.
506 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
507 model->compiled_data->minimize.minimizer = minimizer;
508 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
509 } else {
510 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
511 // And then absorb the "new model" to the old one.
512 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model);
513 ccv_cnnp_model_absorb(model, init, inputs, input_size);
514 // Reset minimizer.
515 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
516 }
517}
518
519ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model)
520{
521 return _ccv_cnnp_model_copy(model, 0);
522}
523
524void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
525{
526 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 526, __extension__ __PRETTY_FUNCTION__); }))
;
527 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 527, __extension__ __PRETTY_FUNCTION__)
; }))
;
528 ccv_nnc_symbolic_graph_t* const graph = model->graph;
529 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
530 int i;
531 for (i = 0; i < output_size; i++)
532 {
533 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 533, __extension__ __PRETTY_FUNCTION__)
; }))
;
534 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
535 }
536}
537
538void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
539{
540 if (workspace_size == model->workspace_size)
541 return;
542 model->workspace_size = workspace_size;
543 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
544 if (compiled_data && compiled_data->graph)
545 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
546}
547
548void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
549{
550 if (parallel == 0)
551 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
552 else
553 model->parallel_count = parallel;
554 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
555 if (compiled_data)
556 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 556, __extension__ __PRETTY_FUNCTION__)
; }))
; }
557}
558
559void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
560{
561 model->memory_compression = memory_compression;
562 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
563 if (compiled_data)
564 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 564, __extension__ __PRETTY_FUNCTION__)
; }))
; }
565}
566
567typedef struct {
568 int parallel_count;
569 ccv_nnc_symbolic_graph_t* graph;
570 ccv_cnnp_compiled_data_t* compiled_data;
571 ccv_nnc_tensor_arena_t* tensor_arena;
572} ccv_nnc_tensor_init_states_t;
573
574static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
575{
576 int i;
577 for (i = 0; i < compiled_data->parameters->rnum; i++)
578 {
579 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
580 if (!(compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f))))
581 return 1;
582 }
583 for (i = 0; i < compiled_data->internals->rnum; i++)
584 {
585 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
586 if (!(compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f))))
587 return 1;
588 }
589 return 0;
590}
591
592static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
593{
594 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
595 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
596 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
597 if (!output_tensor)
598 return;
599 const int d = output_symbol.d;
600 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 600, __extension__ __PRETTY_FUNCTION__)
; }))
;
601 if (tensor_init_states->compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f)))
602 return;
603 tensor_init_states->compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
604 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
605 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
606 const int parallel_count = tensor_init_states->parallel_count;
607 int i;
608 for (i = 1; i < parallel_count; i++)
609 {
610 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
611 if (copy)
612 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
613 }
614}
615
616// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
617// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
618static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
619{
620 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 620, __extension__ __PRETTY_FUNCTION__); }))
;
621 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 621, __extension__ __PRETTY_FUNCTION__)
; }))
;
622 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
623 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 623, __extension__
__PRETTY_FUNCTION__); }))
;
624 int i;
625 for (i = 0; i < compiled_data->rewindables->rnum; i++)
626 {
627 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
628 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
629 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
630 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
631 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
632 }
633 ccv_array_clear(compiled_data->rewindables);
634 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
635}
636
637
638static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
639{
640 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
641 .type = CCV_CNNP_REWIND_TENSOR,
642 .tensor = symbol
643 };
644 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
645 ccv_array_push(rewind_symbols, &rewind_symbol);
646}
647
648static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
649{
650 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
651 .type = CCV_CNNP_REWIND_TENSOR,
652 .tensor = symbol
653 };
654 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
655 ccv_array_push(rewind_symbols, &rewind_symbol);
656}
657
658static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
659{
660 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
661 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
662 .graph_exec = symbol
663 };
664 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
665 ccv_array_push(rewind_symbols, &rewind_symbol);
666}
667
668static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
669{
670 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
671 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
672 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
673 int i;
674 for (i = 1; i < parallel_count; i++)
675 {
676 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
677 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
678 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
679 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
680 }
681}
682
683static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
684{
685 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 685, __extension__ __PRETTY_FUNCTION__); }))
;
686 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 686, __extension__ __PRETTY_FUNCTION__); }))
;
687 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
688 int i;
689 for (i = 1; i < parallel_count; i++)
690 {
691 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
692 if (copy_symbol.graph)
693 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
694 }
695 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
696 if (graph_exec_arena)
697 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
698 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
699 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
700 if (gradient_graph_exec_arena)
701 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
702}
703
704static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
705{
706 int this_parameter_flag = 0;
707 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
708 int j, k;
709 // For no-op, we can preserve previous saved_aux_size.
710 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
711 {
712 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
713 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
714 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
715 // make sure some model parameters don't update if we don't want them to.
716 int old_saved_aux_size;
717 if (old_minimizer.cmd == CCV_NNC_NOOP)
718 {
719 int input_size;
720 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
721 if (input_size < 2) // This is not legit.
722 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
723 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
724 old_saved_aux_size = input_size - 2;
725 } else
726 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
727 if (old_saved_aux_size != saved_aux_size)
728 {
729 this_parameter_flag = 1;
730 if (saved_aux_size > old_saved_aux_size)
731 {
732 // Allocate new tensor symbols.
733 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
734 for (j = old_saved_aux_size; j < saved_aux_size; j++)
735 {
736 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
737 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
738 for (k = 1; k < parallel_count; k++)
739 {
740 ccv_nnc_tensor_param_t dev_info = info;
741 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
742 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
743 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
744 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
745 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
746 }
747 }
748 } else {
749 for (j = saved_aux_size; j < old_saved_aux_size; j++)
750 {
751 for (k = 1; k < parallel_count; k++)
752 {
753 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
754 if (src_copy.d >= 0)
755 {
756 ccv_nnc_tensor_symbol_free(graph, src_copy);
757 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
758 }
759 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
760 if (dest_copy.d >= 0)
761 {
762 ccv_nnc_tensor_symbol_free(graph, dest_copy);
763 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
764 }
765 }
766 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
767 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
768 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
769 }
770 }
771 }
772 }
773 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
774 if (this_parameter_flag)
775 {
776 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
777 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
778 const int* inputs = 0;
779 int input_size = 0;
780 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
781 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 781, __extension__ __PRETTY_FUNCTION__)
; }))
;
782 update_inputs[0].d = inputs[0];
783 update_inputs[0].graph = graph;
784 update_inputs[1].d = inputs[1];
785 update_inputs[1].graph = graph;
786 update_outputs[0] = updated_parameters[parameter_indice];
787 for (j = 0; j < saved_aux_size; j++)
788 {
789 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
790 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
791 }
792 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
793 for (k = 1; k < parallel_count; k++)
794 {
795 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
796 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 796, __extension__ __PRETTY_FUNCTION__); }))
;
797 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
798 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 798, __extension__ __PRETTY_FUNCTION__)
; }))
;
799 update_inputs[0].d = inputs[0];
800 update_inputs[0].graph = graph;
801 update_inputs[1].d = inputs[1];
802 update_inputs[1].graph = graph;
803 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
804 for (j = 0; j < saved_aux_size; j++)
805 {
806 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
807 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
808 }
809 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
810 }
811 }
812 return this_parameter_flag;
813}
814
815typedef struct {
816 int parameter_size;
817 ccv_nnc_cmd_t minimizer;
818 ccv_cnnp_model_io_t parameters[1];
819} ccv_cnnp_set_minimizer_for_parameter_t;
820
821static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
822{
823 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
824 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 824, __extension__ __PRETTY_FUNCTION__); }))
;
825 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
826 // We update all parameters, at this point, we have one minimizer.
827 const int parameter_size = compiled_data->parameters->rnum;
828 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
829 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
830 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 830, __extension__ __PRETTY_FUNCTION__); }))
;
831 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
832 ccv_array_t* const parameters = compiled_data->minimize.parameters;
833 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
834 int i, j, flag = 0;
835 for (i = 0; i < parameters->rnum; i++)
836 {
837 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
838 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
839 {
840 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
841 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 841, __extension__ __PRETTY_FUNCTION__)
; }))
;
842 const int old_rnum = parameter_indices->rnum;
843 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
844 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
845 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 845, __extension__ __PRETTY_FUNCTION__)
; }))
;
846 if (param_ref >= 0)
847 {
848 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 848, __extension__ __PRETTY_FUNCTION__)
; }))
;
849 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
850 parameter_indices->rnum = old_rnum + 1;
851 }
852 }
853 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
854 // We may have duplicated indices, but that is OK, we will set it twice.
855 for (j = 0; j < parameter_indices->rnum; j++)
856 {
857 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
858 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 858, __extension__ __PRETTY_FUNCTION__)
; }))
;
859 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
860 flag = 1;
861 }
862 ccv_array_clear(parameter_indices);
863 }
864 ccv_array_free(parameter_indices);
865 return flag;
866}
867
868static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
869{
870 if (new_saved_aux_size == old_saved_aux_size)
871 return;
872 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 872, __extension__ __PRETTY_FUNCTION__)
; }))
;
873 int i, j;
874 for (i = parameter_size - 1; i >= 0; i--)
875 {
876 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
877 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
878 for (j = old_saved_aux_size - 1; j >= 0; j--)
879 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
880 }
881}
882
883static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
884{
885 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
886 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 886, __extension__ __PRETTY_FUNCTION__); }))
;
887 if (!compiled_data->rewindables)
888 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
889 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables);
890 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables);
891 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables);
892}
893
894static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
895{
896 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
897 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 897, __extension__ __PRETTY_FUNCTION__)
; }))
;
1
Assuming field 'gradient_mode' is equal to CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
2
Taking true branch
898 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 898, __extension__ __PRETTY_FUNCTION__)
; }))
;
3
Assuming 'gradient_mode' is not equal to CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
4
Taking true branch
899 const int evaluate_to_size = compiled_data->evaluate.to_size;
900 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 900, __extension__ __PRETTY_FUNCTION__)
; }))
;
5
Assuming 'evaluate_to_size' is > 0
6
Taking true branch
901 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
7
Assuming '_a' is <= '_b'
8
'?' condition is false
902 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
903 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
904 int i, j;
905 const int output_size = model->output_size;
906 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 906, __extension__ __PRETTY_FUNCTION__)
; }))
;
9
Assuming 'fits' is null
907 if (fits
9.1
'fits' is null
)
10
Taking false branch
908 for (i = 0; i < output_size; i++)
909 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
910 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
911 const int parameter_size = compiled_data->parameters->rnum;
912 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
913 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
914 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
915 int parameter_size_maybe_more = parameter_size;
916 compiled_data->disable_outgrad = disable_outgrad;
917 int outgrad_size;
918 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
11
Assuming 'gradient_mode' is not equal to CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
12
Assuming field 'input_size' is not equal to 0
13
Taking false branch
919 outgrad_size = 0;
920 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
14
Assuming 'disable_outgrad' is equal to CCV_CNNP_DISABLE_OUTGRAD_NONE
15
Taking true branch
921 outgrad_size = model->input_size;
922 else {
923 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 923, __extension__ __PRETTY_FUNCTION__)
; }))
; // If it is disable all, gradient mode won't be this.
924 outgrad_size = 0;
925 for (i = 0; i < model->input_size; i++)
926 if (!(disable_outgrad & ((uint64_t)1 << i)))
927 ++outgrad_size;
928 }
929 compiled_data->outgrad_size = outgrad_size;
930 parameter_size_maybe_more += outgrad_size;
931 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
932 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
16
Assuming 'parameter_size_maybe_more' is <= 'parameter_size'
17
'?' condition is false
18
Null pointer value stored to field 'outgrads'
933 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
934 compiled_data->backward.to_size = parameter_size_maybe_more;
935 if (gradient_mode
18.1
'gradient_mode' is not equal to CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size
18.2
Field 'input_size' is not equal to 0
== 0)
19
Taking false branch
936 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
937 else if (disable_outgrad
19.1
'disable_outgrad' is equal to CCV_CNNP_DISABLE_OUTGRAD_NONE
== CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
20
Taking true branch
938 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
939 else { // Compute minimize with gradients including selected inputs.
940 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 940, __extension__ __PRETTY_FUNCTION__)
; }))
;
941 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 941, __extension__ __PRETTY_FUNCTION__)
; }))
; // If it is disable all, gradient mode won't be this.
942 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 942, __extension__ __PRETTY_FUNCTION__)
; }))
;
943 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
944 j = 0;
945 for (i = 0; i < model->input_size; i++)
946 if (!(disable_outgrad & ((uint64_t)1 << i)))
947 outgrads[j++] = model->inputs[i];
948 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
949 }
950 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
951 if (compiled_data->minimize.parameters)
21
Assuming field 'parameters' is null
22
Taking false branch
952 _ccv_cnnp_apply_parameters_with_minimizer(model);
953 for (i = 0; i < output_size; i++)
23
Assuming 'i' is >= 'output_size'
24
Loop condition is false. Execution continues on line 959
954 {
955 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
956 // Init this to 1 so we can backprop.
957 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
958 }
959 for (i = 0; i < parameter_size_maybe_more; i++)
25
Assuming 'i' is >= 'parameter_size_maybe_more'
26
Loop condition is false. Execution continues on line 961
960 compiled_data->backward.tos[i] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
961 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
962 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
963 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
27
Assuming the condition is true
28
Loop condition is true. Entering loop body
964 {
965 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
29
Array access (via field 'outgrads') results in a null pointer dereference
966 const int* tos;
967 int to_size;
968 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
969 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
970 {
971 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
972 int flag = 0;
973 for (j = i - 1; !flag && j >= 0; j--)
974 flag = (destinations[j + parameter_size].d == outgrad.d);
975 if (!flag) // Only if we cannot find it, we add it.
976 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
977 }
978 }
979 if (parallel_count > 1)
980 {
981 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
982 0, 0,
983 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
984 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
985 0, 0, 0,
986 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
987 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
988 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
989 for (i = 0; i < evaluate_to_size; i++)
990 for (j = 1; j < parallel_count; j++)
991 {
992 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
993 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
994 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
995 }
996 for (i = 0; i < parameter_size_maybe_more; i++)
997 for (j = 1; j < parallel_count; j++)
998 {
999 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
1000 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1001 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
1002 }
1003 }
1004 // Only use memory compression if we are in gradient parameter mode.
1005 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES && model->memory_compression)
1006 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1007 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1008 compiled_data->gradient_mode = gradient_mode;
1009}
1010
1011void ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1012{
1013 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1013, __extension__ __PRETTY_FUNCTION__
); }))
;
1014 const int parameter_size = compiled_data->parameters->rnum;
1015 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1016 const int internal_size = compiled_data->internals->rnum;
1017 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1018 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1019 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)ccmallocmalloc((sizeof(ccv_nnc_tensor_t*) * parameter_size + sizeof(ccv_nnc_tensor_t*) * internal_size) * parallel_count);
1020 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1021 int i, j;
1022 for (i = 0; i < parameter_size; i++)
1023 {
1024 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1025 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1026 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1027 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1028 for (j = 1; j < parallel_count; j++)
1029 {
1030 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1031 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1032 }
1033 }
1034 for (i = 0; i < internal_size; i++)
1035 {
1036 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1037 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1038 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1039 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1040 for (j = 1; j < parallel_count; j++)
1041 {
1042 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1043 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1044 }
1045 }
1046}
1047
1048static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1049{
1050 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1050, __extension__ __PRETTY_FUNCTION__
); }))
;
1051 int i, j;
1052 for (i = 0; i < tensor_size; i++)
1053 {
1054 if (!tensors[i])
1055 continue;
1056 const int d = tensor_symbols[i].d;
1057 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1058 continue;
1059 for (j = 1; j < parallel_count; j++)
1060 if (tensors[i + j * tensor_size])
1061 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &tensors[i], 1, &tensors[i + j * tensor_size], 1, 0);
1062 }
1063}
1064
1065static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1066{
1067 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1067, __extension__ __PRETTY_FUNCTION__
); }))
;
1068 int i, j;
1069 for (i = 0; i < tensor_size; i++)
1070 {
1071 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1072 for (j = 1; j < parallel_count; j++)
1073 {
1074 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1075 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1076 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1077 { // We shouldn't allocate this, free it up.
1078 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1079 tensors[i + j * tensor_size] = 0;
1080 }
1081 }
1082 }
1083}
1084
1085static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1086{
1087 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1087, __extension__ __PRETTY_FUNCTION__
); }))
;
1088 int i, j;
1089 for (i = 0; i < tensor_size; i++)
1090 {
1091 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1092 if (graph)
1093 {
1094 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1095 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1096 tensor_symbol = alias_to;
1097 }
1098 ccv_nnc_tensor_t* const tensor = tensors[i];
1099 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1100 {
1101 const ccv_nnc_tensor_bind_t retained_bind = {
1102 .symbol = tensor_symbol,
1103 .tensor = tensor
1104 };
1105 ccv_array_push(tensor_binds, &retained_bind);
1106 }
1107 for (j = 1; j < parallel_count; j++)
1108 {
1109 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1110 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1111 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1112 {
1113 const ccv_nnc_tensor_bind_t bind = {
1114 .symbol = copy,
1115 .tensor = tensors[i + j * tensor_size]
1116 };
1117 ccv_array_push(tensor_binds, &bind);
1118 }
1119 }
1120 }
1121}
1122
1123static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1124{
1125 if (compiled_data->graph)
1126 ccv_nnc_graph_free(compiled_data->graph);
1127 compiled_data->graph = 0;
1128 compiled_data->is_test = 0;
1129 if (compiled_data->tensor_arena)
1130 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1131 compiled_data->tensor_arena = 0;
1132 if (compiled_data->graph_exec_arena)
1133 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1134 compiled_data->graph_exec_arena = 0;
1135 if (compiled_data->backward.from_ops)
1136 ccfreefree(compiled_data->backward.from_ops);
1137 compiled_data->backward.from_ops = 0;
1138 if (compiled_data->evaluate.schedule)
1139 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1140 compiled_data->evaluate.schedule = 0;
1141 if (compiled_data->backward.schedule)
1142 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1143 compiled_data->backward.schedule = 0;
1144}
1145
1146static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1147{
1148 if (compiled_data->gradients)
1149 ccfreefree(compiled_data->gradients);
1150 compiled_data->gradients = 0;
1151 if (compiled_data->updated_parameters)
1152 ccfreefree(compiled_data->updated_parameters);
1153 compiled_data->updated_parameters = 0;
1154 compiled_data->update_nodes = 0;
1155 compiled_data->saved_aux = 0;
1156}
1157
1158static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1159{
1160 if (compiled_data->backward.gradients)
1161 ccfreefree(compiled_data->backward.gradients);
1162 compiled_data->backward.gradients = 0;
1163 if (compiled_data->backward.accum)
1164 ccv_nnc_graph_free(compiled_data->backward.accum);
1165 compiled_data->backward.accum = 0;
1166 if (compiled_data->backward.tensor_arena)
1167 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1168 compiled_data->backward.tensor_arena = 0;
1169 if (compiled_data->backward.graph_exec_arena)
1170 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1171 compiled_data->backward.graph_exec_arena = 0;
1172}
1173
1174static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1175{
1176 if (compiled_data->apply_gradients.graph)
1177 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1178 compiled_data->apply_gradients.graph = 0;
1179 if (compiled_data->apply_gradients.tensor_arena)
1180 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1181 compiled_data->apply_gradients.tensor_arena = 0;
1182 if (compiled_data->apply_gradients.graph_exec_arena)
1183 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1184 compiled_data->apply_gradients.graph_exec_arena = 0;
1185}
1186
1187// Compile the graph to run ccv_cnnp_model_fit
1188static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1189{
1190 int i, j;
1191 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1192 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1192, __extension__ __PRETTY_FUNCTION__
); }))
;
1193 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1194 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1195 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1195, __extension__ __PRETTY_FUNCTION__
); }))
;
1196 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1196
, __extension__ __PRETTY_FUNCTION__); }))
;
1197 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1197, __extension__ __PRETTY_FUNCTION__
); }))
;
1198 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1199 {
1200 _ccv_cnnp_model_set_rewindables(model);
1201 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1202 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1203 _ccv_cnnp_model_rewind_graph(model);
1204 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1205 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1206 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1207 }
1208 const int tensors_init = !!compiled_data->tensors_init.v;
1209 if (!tensors_init)
1210 ccv_cnnp_model_tensors_init(model, compiled_data);
1211 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1212 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1212, __extension__ __PRETTY_FUNCTION__); }))
;
1213 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1213, __extension__ __PRETTY_FUNCTION__); }))
;
1214 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1214
, __extension__ __PRETTY_FUNCTION__); }))
;
1215 const int input_size_per_p = input_size / parallel_count;
1216 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1217 const int output_size_per_p = output_size / parallel_count;
1218 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1219 const int fit_size_per_p = fit_size / parallel_count;
1220 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1221 const int parameter_size = compiled_data->parameters->rnum;
1222 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1223 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1224 const int internal_size = compiled_data->internals->rnum;
1225 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1226 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1227 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1228 ccv_array_free(tensor_binds);
1229 if (tensors_init && parallel_count > 1)
1230 _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1231 // If tensor is not init'ed, we need to init states first.
1232 if (_ccv_cnnp_any_to_init(compiled_data))
1233 {
1234 ccv_nnc_tensor_init_states_t tensor_init_states = {
1235 .parallel_count = parallel_count,
1236 .graph = model->graph,
1237 .compiled_data = compiled_data,
1238 .tensor_arena = compiled_data->tensor_arena
1239 };
1240 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1241 }
1242 compiled_data->is_test = 0;
1243 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1244 // No need to set because it is default to training mode.
1245 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1246 for (i = 0; i < saved_aux_size * parameter_size; i++)
1247 {
1248 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1249 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1250 for (j = 1; j < parallel_count; j++)
1251 {
1252 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1253 if (copy)
1254 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1255 }
1256 }
1257 const int evaluate_to_size = compiled_data->evaluate.to_size;
1258 compiled_data->evaluate.to_op_size = 0;
1259 for (i = 0; i < evaluate_to_size; i++)
1260 {
1261 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1262 if (to.graph)
1263 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1264 }
1265 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type);
1266 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1267}
1268
1269ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1270{
1271 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1272 if (!compiled_data || !compiled_data->graph)
1273 return 0;
1274 return ccv_nnc_graph_default_stream(compiled_data->graph);
1275}
1276
1277uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1278{
1279 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1280 if (!compiled_data || !compiled_data->tensor_arena)
1281 return 0;
1282 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1283}
1284
1285static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1286{
1287 int i, j;
1288 for (i = 0; i < tensor_size; i++)
1289 {
1290 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1291 if (graph)
1292 {
1293 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1294 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1295 tensor_symbol = alias_to;
1296 }
1297 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1298 for (j = 1; j < parallel_count; j++)
1299 {
1300 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1301 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1302 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1303 }
1304 }
1305}
1306
1307void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1308{
1309 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1310 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1310, __extension__ __PRETTY_FUNCTION__); }))
;
1311 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1312 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1312, __extension__ __PRETTY_FUNCTION__
); }))
;
1313 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1313, __extension__ __PRETTY_FUNCTION__
); }))
;
1314 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1314
, __extension__ __PRETTY_FUNCTION__); }))
;
1315 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1315, __extension__ __PRETTY_FUNCTION__); }))
;
1316 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1317 {
1318 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1319 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1320 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1321 // Compile the symbolic graph down only when needed.
1322 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1323 } else {
1324 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1324, __extension__ __PRETTY_FUNCTION__); }))
;
1325 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1325, __extension__ __PRETTY_FUNCTION__); }))
;
1326 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1326
, __extension__ __PRETTY_FUNCTION__); }))
;
1327 const int input_size_per_p = input_size / parallel_count;
1328 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1329 const int output_size_per_p = output_size / parallel_count;
1330 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1331 const int fit_size_per_p = fit_size / parallel_count;
1332 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1333 }
1334 if (compiled_data->is_test)
1335 {
1336 compiled_data->is_test = 0;
1337 ccv_nnc_graph_exec_update_t update = {
1338 .parallel_count = parallel_count,
1339 .graph = model->graph,
1340 .graph_exec_arena = compiled_data->graph_exec_arena,
1341 };
1342 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1343 }
1344 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1345}
1346
1347// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1348static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1349{
1350 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1351 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1352 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1353 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1353, __extension__ __PRETTY_FUNCTION__
); }))
;
1354 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1354, __extension__ __PRETTY_FUNCTION__
); }))
;
1355 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1356 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1357 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1358 {
1359 const int evaluate_to_size = compiled_data->evaluate.to_size;
1360 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1361 _ccv_cnnp_model_set_rewindables(model);
1362 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1363 0, 0,
1364 0, 0, 0,
1365 0, 0, 0,
1366 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1367 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1368 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1369 int i, j;
1370 for (i = 0; i < evaluate_to_size; i++)
1371 for (j = 1; j < parallel_count; j++)
1372 {
1373 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1374 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1375 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1376 }
1377 }
1378 const int tensors_init = !!compiled_data->tensors_init.v;
1379 if (!tensors_init)
1380 ccv_cnnp_model_tensors_init(model, compiled_data);
1381 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1382 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1382, __extension__ __PRETTY_FUNCTION__); }))
;
1383 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1383, __extension__ __PRETTY_FUNCTION__); }))
;
1384 const int input_size_per_p = input_size / parallel_count;
1385 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1386 const int output_size_per_p = output_size / parallel_count;
1387 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1388 const int parameter_size = compiled_data->parameters->rnum;
1389 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1390 const int internal_size = compiled_data->internals->rnum;
1391 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1392 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1393 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1394 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1395 ccv_array_free(tensor_binds);
1396 // If tensor is not init'ed, we need to init states first.
1397 if (tensors_init && parallel_count > 1)
1398 _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1399 if (_ccv_cnnp_any_to_init(compiled_data))
1400 {
1401 ccv_nnc_tensor_init_states_t tensor_init_states = {
1402 .parallel_count = parallel_count,
1403 .graph = model->graph,
1404 .compiled_data = compiled_data,
1405 .tensor_arena = compiled_data->tensor_arena
1406 };
1407 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1408 }
1409 compiled_data->is_test = 1;
1410 ccv_nnc_graph_exec_update_t update = {
1411 .parallel_count = parallel_count,
1412 .graph = model->graph,
1413 .graph_exec_arena = compiled_data->graph_exec_arena,
1414 };
1415 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1416 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type);
1417 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1418}
1419
1420static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1421{
1422 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1422, __extension__ __PRETTY_FUNCTION__
); }))
;
1423 const int parameter_size = compiled_data->parameters->rnum;
1424 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1425 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1426 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1427 int i, j;
1428 for (i = 0; i < parameter_size; i++)
1429 {
1430 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1431 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1432 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1433 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1434 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1435 for (j = 1; j < parallel_count; j++)
1436 {
1437 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1438 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1439 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1440 }
1441 }
1442}
1443
1444static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1445{
1446 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1447 return 1;
1448 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1449 return 0;
1450 int i;
1451 for (i = 0; i < input_size; i++)
1452 if (!(disable_outgrad & ((uint64_t)1 << i)))
1453 return 0;
1454 return 1;
1455}
1456
1457// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1458// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1459static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1460{
1461 int i, j;
1462 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1463 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1464 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1464, __extension__ __PRETTY_FUNCTION__
); }))
;
1465 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1466 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1467 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1467, __extension__ __PRETTY_FUNCTION__
); }))
;
1468 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1468, __extension__ __PRETTY_FUNCTION__
); }))
;
1469 // There shouldn't be a loss function if we evaluate with multistage jit.
1470 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1470, __extension__ __PRETTY_FUNCTION__
); }))
;
1471 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1472 {
1473 _ccv_cnnp_model_set_rewindables(model);
1474 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1475 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1476 _ccv_cnnp_model_rewind_graph(model);
1477 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1478 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1479 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1480 }
1481 const int tensors_init = !!compiled_data->tensors_init.v;
1482 if (!tensors_init)
1483 ccv_cnnp_model_tensors_init(model, compiled_data);
1484 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1485 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1485, __extension__ __PRETTY_FUNCTION__); }))
;
1486 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1486, __extension__ __PRETTY_FUNCTION__); }))
;
1487 const int input_size_per_p = input_size / parallel_count;
1488 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1489 const int output_size_per_p = output_size / parallel_count;
1490 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1491 const int parameter_size = compiled_data->parameters->rnum;
1492 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1493 const int internal_size = compiled_data->internals->rnum;
1494 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1495 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1496 if (!compiled_data->tensors.gradients)
1497 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1498 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1499 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1500 ccv_array_free(tensor_binds);
1501 if (tensors_init && parallel_count > 1)
1502 _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1503 // If tensor is not init'ed, we need to init states first.
1504 if (_ccv_cnnp_any_to_init(compiled_data))
1505 {
1506 ccv_nnc_tensor_init_states_t tensor_init_states = {
1507 .parallel_count = parallel_count,
1508 .graph = model->graph,
1509 .compiled_data = compiled_data,
1510 .tensor_arena = compiled_data->tensor_arena
1511 };
1512 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1513 }
1514 compiled_data->is_test = is_test;
1515 ccv_nnc_graph_exec_update_t update = {
1516 .parallel_count = parallel_count,
1517 .graph = model->graph,
1518 .graph_exec_arena = compiled_data->graph_exec_arena,
1519 };
1520 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1521 const int evaluate_to_size = compiled_data->evaluate.to_size;
1522 compiled_data->evaluate.to_op_size = 0;
1523 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1524 for (i = 0; i < evaluate_to_size; i++)
1525 {
1526 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1527 if (to_op.graph)
1528 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1529 const int* tos;
1530 int to_size;
1531 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1532 for (j = 0; j < to_size; j++)
1533 {
1534 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1535 .d = tos[j],
1536 .graph = model->graph
1537 });
1538 if (to_op.graph)
1539 ccv_array_add_unique_int(backward_from, to_op.d);
1540 }
1541 }
1542 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1542, __extension__
__PRETTY_FUNCTION__); }))
;
1543 compiled_data->backward.from_op_size = backward_from->rnum;
1544 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1545 for (i = 0; i < backward_from->rnum; i++)
1546 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1547 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1548 .graph = compiled_data->graph,
1549 };
1550 ccv_array_free(backward_from);
1551 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type);
1552 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1553}
1554
1555void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1556{
1557 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1558 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1558, __extension__ __PRETTY_FUNCTION__); }))
;
1559 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1560 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1560, __extension__ __PRETTY_FUNCTION__
); }))
;
1561 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1561, __extension__ __PRETTY_FUNCTION__
); }))
;
1562 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1562, __extension__ __PRETTY_FUNCTION__); }))
;
1563 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1564 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1565 if (!compiled_data->graph || mode_mismatch)
1566 {
1567 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1568 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1569 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1570 if (params.requires_grad)
1571 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1572 else
1573 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1574 } else {
1575 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1576 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1576, __extension__ __PRETTY_FUNCTION__); }))
;
1577 const int input_size_per_p = input_size / parallel_count;
1578 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1579 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1579, __extension__ __PRETTY_FUNCTION__); }))
;
1580 const int output_size_per_p = output_size / parallel_count;
1581 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1582 }
1583 if (compiled_data->is_test != params.is_test)
1584 {
1585 compiled_data->is_test = params.is_test;
1586 ccv_nnc_graph_exec_update_t update = {
1587 .parallel_count = parallel_count,
1588 .graph = model->graph,
1589 .graph_exec_arena = compiled_data->graph_exec_arena,
1590 };
1591 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1592 }
1593 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1594 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1595 else {
1596 if (!compiled_data->evaluate.schedule)
1597 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1598 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1599 }
1600}
1601
1602// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1603// Particularly, this method compiles the accumulator graph.
1604static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1605{
1606 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1607 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1607, __extension__ __PRETTY_FUNCTION__); }))
;
1608 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1608, __extension__ __PRETTY_FUNCTION__
); }))
;
1609 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1610 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1611 const int parameter_size = compiled_data->parameters->rnum;
1612 int i, j;
1613 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1614 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1615 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1616 for (i = 0; i < parameter_size; i++)
1617 for (j = 0; j < parallel_count; j++)
1618 {
1619 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1620 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1621 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1622 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1623 ccv_nnc_tensor_symbol_t inputs[2];
1624 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1625 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1626 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1627 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1628 }
1629 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1630 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1631 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1632 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1633 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1634 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1635 ccv_nnc_symbolic_graph_free(accum);
1636 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type);
1637 ccv_array_free(tensor_binds);
1638}
1639
1640void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1641{
1642 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1643 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1643, __extension__ __PRETTY_FUNCTION__); }))
;
1644 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1644, __extension__ __PRETTY_FUNCTION__
); }))
;
1645 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1646 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1646, __extension__ __PRETTY_FUNCTION__
); }))
;
1647 if (outgrad_size > 0)
1648 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1648, __extension__ __PRETTY_FUNCTION__
); }))
; }
1649 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1649, __extension__ __PRETTY_FUNCTION__); }))
;
1650 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1650, __extension__ __PRETTY_FUNCTION__
); }))
;
1651 const int parameter_size = compiled_data->parameters->rnum;
1652 // If we need to accumulate the gradients now, do jit on accumulator.
1653 if (compiled_data->backward.count > 0)
1654 {
1655 if (!compiled_data->backward.accum)
1656 _ccv_cnnp_model_multistage_jit_1(model);
1657 else if (compiled_data->backward.count == 1) {
1658 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1659 int i;
1660 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1661 for (i = 0; i < parameter_size * parallel_count; i++)
1662 {
1663 ccv_nnc_tensor_t* tensor;
1664 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1665 }
1666 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1667 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1668 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1669 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1670 }
1671 }
1672 const int ingrad_size_per_p = model->output_size;
1673 const int outgrad_size_per_p = compiled_data->outgrad_size;
1674 int i, j;
1675 for (i = 0; i < ingrad_size_per_p; i++)
1676 {
1677 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1678 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1679 {
1680 // Set it to 1 if it is not specified.
1681 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1682 if (ingrad_tensor)
1683 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1684 for (j = 1; j < parallel_count; j++)
1685 {
1686 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
1687 if (ingrad_tensor)
1688 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1689 }
1690 } else {
1691 // Make sure the length matches, in case it is an alias.
1692 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 1692, __extension__ __PRETTY_FUNCTION__
); }))
;
1693 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
1694 for (j = 1; j < parallel_count; j++)
1695 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
1696 }
1697 }
1698 if (outgrad_size > 0)
1699 {
1700 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 1700, __extension__ __PRETTY_FUNCTION__
); }))
;
1701 for (i = 0; i < outgrad_size_per_p; i++)
1702 if (outgrads[i])
1703 {
1704 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
1705 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
1706 for (j = 1; j < parallel_count; j++)
1707 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
1708 }
1709 } else {
1710 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1711, __extension__ __PRETTY_FUNCTION__
); }))
1711 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1711, __extension__ __PRETTY_FUNCTION__
); }))
;
1712 }
1713 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
1714 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
1715 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
1716 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1717 if (!compiled_data->backward.schedule)
1718 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
1719 // Run the backward pass.
1720 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
1721 // If we need to run accumulation round, do that now.
1722 if (compiled_data->backward.count > 0)
1723 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
1724 // Update the count, this determines whether we need to accumulate or not.
1725 ++compiled_data->backward.count;
1726}
1727
1728// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
1729// Particularly, this method compiles the parameter update graph.
1730static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
1731{
1732 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1733 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1733, __extension__ __PRETTY_FUNCTION__
); }))
;
1734 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1735 const int parameter_size = compiled_data->parameters->rnum;
1736 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1737 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1738 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1739 // Bind accumulated gradients.
1740 if (compiled_data->backward.count > 1)
1741 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
1742 else
1743 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1744 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
1745 int i, j;
1746 for (i = 0; i < compiled_data->backward.to_size; i++)
1747 {
1748 const int* tos;
1749 int to_size;
1750 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
1751 for (j = 0; j < to_size; j++)
1752 {
1753 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
1754 // gradients graph.
1755 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1756 .d = tos[j],
1757 .graph = model->graph,
1758 });
1759 if (!exec.graph)
1760 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
1761 }
1762 }
1763 const int from_size = apply_gradients_from->rnum;
1764 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
1765 for (i = 0; i < from_size; i++)
1766 froms[i] = (ccv_nnc_graph_exec_symbol_t){
1767 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
1768 .graph = model->graph
1769 };
1770 ccv_array_free(apply_gradients_from);
1771 // It can only ends with updates on the parameters.
1772 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
1773 for (i = 0; i < parameter_size; i++)
1774 {
1775 ccv_array_push(tos, &compiled_data->update_nodes[i]);
1776 for (j = 1; j < parallel_count; j++)
1777 {
1778 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
1779 ccv_array_push(tos, &copy);
1780 }
1781 }
1782 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
1783 ccv_array_free(tos);
1784 ccv_array_free(tensor_binds);
1785 ccfreefree(froms);
1786 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1787 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
1788 {
1789 // Skip on no tensor.
1790 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1791 continue;
1792 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
1793 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1794 for (j = 1; j < parallel_count; j++)
1795 {
1796 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1797 if (copy)
1798 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1799 }
1800 }
1801 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type);
1802}
1803
1804void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
1805{
1806 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1807 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1807, __extension__ __PRETTY_FUNCTION__); }))
;
1808 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1808, __extension__ __PRETTY_FUNCTION__
); }))
;
1809 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1810 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1810, __extension__ __PRETTY_FUNCTION__); }))
;
1811 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1811, __extension__ __PRETTY_FUNCTION__
); }))
;
1812 // Skip if there is no backward pass.
1813 if (compiled_data->backward.count <= 0)
1814 return;
1815 // Skip if there is no parameters.
1816 if (compiled_data->parameters->rnum == 0)
1817 {
1818 compiled_data->backward.count = 0;
1819 return;
1820 }
1821 if (!compiled_data->apply_gradients.graph)
1822 _ccv_cnnp_model_multistage_jit_2(model);
1823 else {
1824 const int parameter_size = compiled_data->parameters->rnum;
1825 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
1826 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
1827 if (compiled_data->backward.count > 1)
1828 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
1829 else
1830 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1831 }
1832 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
1833 // Reset backward count to 0.
1834 compiled_data->backward.count = 0;
1835}
1836
1837void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
1838{
1839 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1840 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
1841 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__
); }))
;
1842 const int tensors_init = !!compiled_data->tensors_init.v;
1843 if (!tensors_init)
1844 ccv_cnnp_model_tensors_init(model, compiled_data);
1845 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1846 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
1847 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
1848 if (param_ref < 0)
1849 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1849
, __extension__ __PRETTY_FUNCTION__); }))
; }
1850 else
1851 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1851, __extension__ __PRETTY_FUNCTION__
); }))
; }
1852 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
1853 ccv_array_free(parameter_indices);
1854 const int parameter_size = compiled_data->parameters->rnum;
1855 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1855
, __extension__ __PRETTY_FUNCTION__); }))
;
1856 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 1856, __extension__ __PRETTY_FUNCTION__
); }))
;
1857 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1858 ccv_nnc_tensor_t* const dest = compiled_data->tensors.parameters[d];
1859 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 1859, __extension__
__PRETTY_FUNCTION__); }))
;
1860 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1861 int i;
1862 for (i = 1; i < parallel_count; i++)
1863 {
1864 ccv_nnc_tensor_t* const copy_tensor = compiled_data->tensors.parameters[d + i * parameter_size];
1865 if (copy_tensor)
1866 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1867 }
1868 // Mark this symbol as init'ed.
1869 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
1870 compiled_data->tensors_init.v[s >> 5] |= (1u << (s & 0x1f));
1871}
1872
1873void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
1874{
1875 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1876 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
1877 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 1877, __extension__ __PRETTY_FUNCTION__
); }))
;
1878 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1878, __extension__ __PRETTY_FUNCTION__
); }))
;
1879 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1880 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
1881 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
1882 if (param_ref < 0)
1883 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1883
, __extension__ __PRETTY_FUNCTION__); }))
; }
1884 else
1885 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1885, __extension__ __PRETTY_FUNCTION__
); }))
; }
1886 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
1887 ccv_array_free(parameter_indices);
1888 const int parameter_size = compiled_data->parameters->rnum;
1889 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1889
, __extension__ __PRETTY_FUNCTION__); }))
;
1890 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__
); }))
;
1891 // We don't need to consider parallel_count, every parameter on each device is identical.
1892 ccv_nnc_tensor_t* const src = compiled_data->tensors.parameters[d];
1893 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 1893, __extension__
__PRETTY_FUNCTION__); }))
;
1894 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1895}
1896
1897ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter)
1898{
1899 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1900 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
1901 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 1901, __extension__ __PRETTY_FUNCTION__
); }))
;
1902 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__
); }))
;
1903 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1904 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
1905 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
1906 if (param_ref < 0)
1907 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1907
, __extension__ __PRETTY_FUNCTION__); }))
; }
1908 else
1909 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1909, __extension__ __PRETTY_FUNCTION__
); }))
; }
1910 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
1911 ccv_array_free(parameter_indices);
1912 const int parameter_size = compiled_data->parameters->rnum;
1913 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1913
, __extension__ __PRETTY_FUNCTION__); }))
;
1914 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 1914, __extension__ __PRETTY_FUNCTION__
); }))
;
1915 // We don't need to consider parallel_count, every parameter on each device is identical.
1916 ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[d];
1917 assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor
) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 1917, __extension__
__PRETTY_FUNCTION__); }))
;
1918 return tensor->info;
1919}
1920
1921static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
1922{
1923 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
1924 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 1924, __extension__
__PRETTY_FUNCTION__); }))
;
1925 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1926 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
1927 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
1928 return to_parameter_indices;
1929}
1930
1931static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref)
1932{
1933 // If the model is not compiled yet. Compile them now.
1934 if (!model->graph)
1935 {
1936 model->graph = ccv_nnc_symbolic_graph_new();
1937 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 1937, __extension__ __PRETTY_FUNCTION__
); }))
;
1938 const int input_size = from_model->input_size;
1939 ccv_nnc_tensor_param_t input_params[input_size];
1940 int i;
1941 for (i = 0; i < input_size; i++)
1942 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
1943 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
1944 model->parallel_count = from_model->parallel_count;
1945 model->memory_compression = from_model->memory_compression;
1946 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
1947 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
1948 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
1949 }
1950 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
1951 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 1951, __extension__ __PRETTY_FUNCTION__
); }))
;
1952 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
1953 if (!to_tensors_init)
1954 ccv_cnnp_model_tensors_init(model, to_compiled_data);
1955 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1955, __extension__ __PRETTY_FUNCTION__
); }))
;
1956 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
1957 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
1958 if (*from_param_ref < 0 && *param_ref >= 0)
1959 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 1959, __extension__ __PRETTY_FUNCTION__
); }))
; }
1960 else if (*from_param_ref >= 0)
1961 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 1961, __extension__ __PRETTY_FUNCTION__
); }))
; }
1962 if (*param_ref < 0 && *from_param_ref >= 0)
1963 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 1963, __extension__ __PRETTY_FUNCTION__); }))
; }
1964 else if (*param_ref >= 0)
1965 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 1965, __extension__ __PRETTY_FUNCTION__
); }))
; }
1966 // Should be exactly the same tensor.
1967 if (*param_ref < 0 && *from_param_ref < 0)
1968 { assert((*from_parameter_indices)->rnum == (*parameter_indices)->rnum)((void) sizeof (((*from_parameter_indices)->rnum == (*parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if ((*from_parameter_indices
)->rnum == (*parameter_indices)->rnum) ; else __assert_fail
("(*from_parameter_indices)->rnum == (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 1968, __extension__ __PRETTY_FUNCTION__
); }))
; }
1969}
1970
1971void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
1972{
1973 ccv_array_t* to_parameter_indices;
1974 int to_param_ref;
1975 ccv_array_t* from_parameter_indices;
1976 int from_param_ref;
1977 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref);
1978 // To models.
1979 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
1980 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 1980, __extension__ __PRETTY_FUNCTION__
); }))
;
1981 // From models.
1982 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
1983 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1984 const int to_parameter_size = to_compiled_data->parameters->rnum;
1985 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
1986 int i, j;
1987 for (i = 0; i < rnum; i++)
1988 {
1989 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
1990 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 1990, __extension__ __PRETTY_FUNCTION__); }))
;
1991 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 1991, __extension__ __PRETTY_FUNCTION__
); }))
;
1992 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
1993 // If the original is not init'ed. We cannot copy from.
1994 if (!(from_compiled_data->tensors_init.v[s >> 5] & (1u << (s & 0x1f))))
1995 continue;
1996 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
1997 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 1997, __extension__ __PRETTY_FUNCTION__); }))
;
1998 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 1998, __extension__ __PRETTY_FUNCTION__
); }))
;
1999 ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d];
2000 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2000, __extension__
__PRETTY_FUNCTION__); }))
;
2001 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d];
2002 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2002, __extension__
__PRETTY_FUNCTION__); }))
;
2003 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2004 for (j = 1; j < parallel_count; j++)
2005 {
2006 ccv_nnc_tensor_t* const copy_tensor = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2007 if (copy_tensor)
2008 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2009 }
2010 // Mark this symbol as init'ed.
2011 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2012 to_compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
2013 }
2014 ccv_array_free(to_parameter_indices);
2015 ccv_array_free(from_parameter_indices);
2016}
2017
2018ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
2019{
2020 if (!compiled_data->stream_map)
2021 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
2022 int ret = 0;
2023 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
2024 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 2024, __extension__ __PRETTY_FUNCTION__); }))
;
2025 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2026 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
2027 if (ret != 0)
2028 {
2029 stream = ccv_nnc_stream_context_new(type);
2030 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
2031 }
2032 return stream;
2033}
2034
2035void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2036{
2037 ccv_array_t* to_parameter_indices;
2038 int to_param_ref;
2039 ccv_array_t* from_parameter_indices;
2040 int from_param_ref;
2041 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref);
2042 // To models.
2043 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2044 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2044, __extension__ __PRETTY_FUNCTION__
); }))
;
2045 // From models.
2046 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2047 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2048 const int to_parameter_size = to_compiled_data->parameters->rnum;
2049 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2050 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2050, __extension__ __PRETTY_FUNCTION__
); }))
;
2051 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2051, __extension__ __PRETTY_FUNCTION__
); }))
;
2052 int i, j;
2053 ccv_nnc_tensor_t* inputs[aux_in_size + 2];
2054 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2055 for (i = 0; i < aux_in_size; i++)
2056 inputs[i + 2] = aux_ins[i];
2057 for (i = 0; i < aux_out_size; i++)
2058 outputs[i + 1] = aux_outs[i];
2059 for (i = 0; i < rnum; i++)
2060 {
2061 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2062 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2062, __extension__ __PRETTY_FUNCTION__); }))
;
2063 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2063, __extension__ __PRETTY_FUNCTION__
); }))
;
2064 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2065 // If the original is not init'ed. We cannot copy from.
2066 if (!(from_compiled_data->tensors_init.v[s >> 5] & (1u << (s & 0x1f))))
2067 continue;
2068 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2069 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2069, __extension__ __PRETTY_FUNCTION__); }))
;
2070 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2070, __extension__ __PRETTY_FUNCTION__
); }))
;
2071 if (parallel_count > 1)
2072 {
2073 ccv_nnc_stream_context_t* streams[parallel_count];
2074 ccv_nnc_stream_signal_t* signal;
2075 if (stream_context)
2076 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2077 for (j = 0; j < parallel_count; j++)
2078 {
2079 ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d + j * to_parameter_size];
2080 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2081 if (!dest || !src)
2082 {
2083 streams[j] = 0;
2084 continue;
2085 }
2086 // At the moment, can only handle them on the same device.
2087 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2087, __extension__ __PRETTY_FUNCTION__
); }))
;
2088 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2088, __extension__ __PRETTY_FUNCTION__
); }))
;
2089 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2090 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2091 int type = stream_type;
2092 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2093 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2094 // Wait signal to finish.
2095 if (stream_context)
2096 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2097 inputs[0] = outputs[0] = dest;
2098 inputs[1] = src;
2099 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0);
2100 if (stream_context)
2101 {
2102 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2103 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2104 }
2105 streams[j] = stream_0;
2106 }
2107 // If this should be blocking, blocking it.
2108 if (!stream_context)
2109 for (j = 0; j < parallel_count; j++)
2110 if (streams[j])
2111 ccv_nnc_stream_context_wait(streams[j]);
2112 } else {
2113 ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d];
2114 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2114, __extension__
__PRETTY_FUNCTION__); }))
;
2115 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d];
2116 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2116, __extension__
__PRETTY_FUNCTION__); }))
;
2117 inputs[0] = outputs[0] = dest;
2118 inputs[1] = src;
2119 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context);
2120 }
2121 // Mark this symbol as init'ed.
2122 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2123 to_compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
2124 }
2125 ccv_array_free(to_parameter_indices);
2126 ccv_array_free(from_parameter_indices);
2127}
2128
2129void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2130{
2131 int to_param_ref;
2132 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2133 // To models.
2134 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2135 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2135, __extension__ __PRETTY_FUNCTION__
); }))
;
2136 // Tensor has to be inited already.
2137 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2137, __extension__ __PRETTY_FUNCTION__
); }))
;
2138 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2138, __extension__ __PRETTY_FUNCTION__
); }))
;
2139 // From models.
2140 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2141 const int to_parameter_size = to_compiled_data->parameters->rnum;
2142 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2143 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2143, __extension__ __PRETTY_FUNCTION__
); }))
;
2144 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2144, __extension__ __PRETTY_FUNCTION__
); }))
;
2145 int i, j;
2146 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2147 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2148 for (i = 0; i < aux_in_size; i++)
2149 inputs[i + 1] = aux_ins[i];
2150 for (i = 0; i < aux_out_size; i++)
2151 outputs[i + 1] = aux_outs[i];
2152 for (i = 0; i < rnum; i++)
2153 {
2154 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2155 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2155, __extension__ __PRETTY_FUNCTION__); }))
;
2156 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2156, __extension__ __PRETTY_FUNCTION__
); }))
;
2157 if (parallel_count > 1)
2158 {
2159 ccv_nnc_stream_context_t* streams[parallel_count];
2160 ccv_nnc_stream_signal_t* signal;
2161 if (stream_context)
2162 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2163 for (j = 0; j < parallel_count; j++)
2164 {
2165 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2166 if (!dest)
2167 {
2168 streams[j] = 0;
2169 continue;
2170 }
2171 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2172 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2173 int type = stream_type;
2174 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2175 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2176 // Wait signal to finish.
2177 if (stream_context)
2178 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2179 inputs[0] = outputs[0] = dest;
2180 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2181 if (stream_context)
2182 {
2183 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2184 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2185 }
2186 streams[j] = stream_0;
2187 }
2188 // If this should be blocking, blocking it.
2189 if (!stream_context)
2190 for (j = 0; j < parallel_count; j++)
2191 if (streams[j])
2192 ccv_nnc_stream_context_wait(streams[j]);
2193 } else {
2194 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d];
2195 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2195, __extension__
__PRETTY_FUNCTION__); }))
;
2196 inputs[0] = outputs[0] = dest;
2197 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2198 }
2199 // No need to mark this symbol as init'ed, it is already.
2200 }
2201 ccv_array_free(to_parameter_indices);
2202}
2203
2204void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context)
2205{
2206 int to_param_ref;
2207 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2208 // To models.
2209 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2210 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2210, __extension__ __PRETTY_FUNCTION__
); }))
;
2211 // Tensor has to be inited already.
2212 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2212, __extension__ __PRETTY_FUNCTION__
); }))
;
2213 ccv_nnc_tensor_t** tensor_gradients;
2214 if (to_compiled_data->backward.count > 1)
2215 tensor_gradients = to_compiled_data->tensors.accum_gradients;
2216 else
2217 tensor_gradients = to_compiled_data->tensors.gradients;
2218 assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({
if (tensor_gradients) ; else __assert_fail ("tensor_gradients"
, "ccv_cnnp_model.c", 2218, __extension__ __PRETTY_FUNCTION__
); }))
;
2219 // From models.
2220 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2221 const int to_parameter_size = to_compiled_data->parameters->rnum;
2222 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
2223 assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__
({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0"
, "ccv_cnnp_model.c", 2223, __extension__ __PRETTY_FUNCTION__
); }))
;
2224 assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__
({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0"
, "ccv_cnnp_model.c", 2224, __extension__ __PRETTY_FUNCTION__
); }))
;
2225 int i, j;
2226 ccv_nnc_tensor_t* inputs[aux_in_size + 1];
2227 ccv_nnc_tensor_t* outputs[aux_out_size + 1];
2228 for (i = 0; i < aux_in_size; i++)
2229 inputs[i + 1] = aux_ins[i];
2230 for (i = 0; i < aux_out_size; i++)
2231 outputs[i + 1] = aux_outs[i];
2232 for (i = 0; i < rnum; i++)
2233 {
2234 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2235 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2235, __extension__ __PRETTY_FUNCTION__); }))
;
2236 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2236, __extension__ __PRETTY_FUNCTION__
); }))
;
2237 if (parallel_count > 1)
2238 {
2239 ccv_nnc_stream_context_t* streams[parallel_count];
2240 ccv_nnc_stream_signal_t* signal;
2241 if (stream_context)
2242 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2243 for (j = 0; j < parallel_count; j++)
2244 {
2245 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size];
2246 if (!dest)
2247 {
2248 streams[j] = 0;
2249 continue;
2250 }
2251 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2252 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2253 int type = stream_type;
2254 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2255 ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2256 // Wait signal to finish.
2257 if (stream_context)
2258 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2259 inputs[0] = outputs[0] = dest;
2260 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0);
2261 if (stream_context)
2262 {
2263 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2264 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2265 }
2266 streams[j] = stream_0;
2267 }
2268 // If this should be blocking, blocking it.
2269 if (!stream_context)
2270 for (j = 0; j < parallel_count; j++)
2271 if (streams[j])
2272 ccv_nnc_stream_context_wait(streams[j]);
2273 } else {
2274 ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d];
2275 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2275, __extension__
__PRETTY_FUNCTION__); }))
;
2276 inputs[0] = outputs[0] = dest;
2277 ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context);
2278 }
2279 // No need to mark this symbol as init'ed, it is already.
2280 }
2281 ccv_array_free(to_parameter_indices);
2282}
2283
2284ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2285{
2286 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2287 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2287, __extension__ __PRETTY_FUNCTION__); }))
;
2288 return compiled_data->minimize.minimizer;
2289}
2290
2291void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2292{
2293 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2294 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2294, __extension__ __PRETTY_FUNCTION__); }))
;
2295 const int parameter_size = compiled_data->parameters->rnum;
2296 if (parameter_size == 0)
2297 return;
2298 if (reset)
2299 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2299, __extension__ __PRETTY_FUNCTION__
); }))
; }
2300 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2301 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2302 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2303 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2304 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2305 // We update all parameters, at this point, we have one minimizer.
2306 if (set_parameters == 0 || set_parameter_size == 0)
2307 compiled_data->minimize.minimizer = minimizer;
2308 int i;
2309 if (set_parameters && set_parameter_size)
2310 {
2311 // I need to save what's the minimizer along with this.
2312 if (!compiled_data->minimize.parameters)
2313 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2314 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2315 set_minimizer_for_parameter->minimizer = minimizer;
2316 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2317 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2318 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2319 }
2320 // If reset is true, clear the parameters array.
2321 if (reset && compiled_data->minimize.parameters)
2322 {
2323 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2324 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2325 ccv_array_clear(compiled_data->minimize.parameters);
2326 }
2327 if (!compiled_data->update_nodes)
2328 return;
2329 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2330 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2330, __extension__ __PRETTY_FUNCTION__); }))
;
2331 if (saved_aux_size > old_max_saved_aux_size)
2332 {
2333 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2333, __extension__ __PRETTY_FUNCTION__
); }))
;
2334 // Reallocate first, move them around later.
2335 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2336 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2337 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2338 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2339 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2340 }
2341 int flag = 0;
2342 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2343 if (set_parameters && set_parameter_size)
2344 {
2345 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2346 for (i = 0; i < set_parameter_size; i++)
2347 {
2348 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2349 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2349, __extension__ __PRETTY_FUNCTION__
); }))
;
2350 const int old_rnum = parameter_indices->rnum;
2351 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2352 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2353 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2353, __extension__ __PRETTY_FUNCTION__
); }))
;
2354 if (param_ref >= 0)
2355 {
2356 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2356, __extension__ __PRETTY_FUNCTION__
); }))
;
2357 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2358 parameter_indices->rnum = old_rnum + 1;
2359 }
2360 }
2361 // We may have duplicated indices, but that is OK, we will set it twice.
2362 for (i = 0; i < parameter_indices->rnum; i++)
2363 {
2364 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2365 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2366 flag = 1;
2367 }
2368 ccv_array_free(parameter_indices);
2369 } else {
2370 for (i = 0; i < parameter_size; i++)
2371 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2372 flag = 1;
2373 if (compiled_data->minimize.parameters)
2374 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2375 flag = 1;
2376 }
2377 if (flag)
2378 {
2379 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2380 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2381 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2382 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2383 }
2384}
2385
2386void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2387{
2388 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2389 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2389, __extension__ __PRETTY_FUNCTION__); }))
;
2390 compiled_data->compile_params = compile_params;
2391}
2392
2393void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2394{
2395 if (model->graph && out_size > 0)
2396 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2397 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2398 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2399 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2400 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2401 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2402 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2403}
2404
2405static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2406{
2407 int i;
2408 const int parameter_size = compiled_data->parameters->rnum;
2409 ccv_array_free(compiled_data->parameters);
2410 const int internal_size = compiled_data->internals->rnum;
2411 ccv_array_free(compiled_data->internals);
2412 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2412, __extension__ __PRETTY_FUNCTION__
); }))
;
2413 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2413, __extension__ __PRETTY_FUNCTION__
); }))
;
2414 for (i = 0; i < parameter_size; i++)
2415 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2416 ccv_array_free(compiled_data->ids.parameters);
2417 for (i = 0; i < internal_size; i++)
2418 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2419 ccv_array_free(compiled_data->ids.internals);
2420 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2421 if (compiled_data->tensors.parameters)
2422 {
2423 for (i = 0; i < parameter_size * parallel_count; i++)
2424 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2425 for (i = 0; i < internal_size * parallel_count; i++)
2426 if (compiled_data->tensors.internals[i])
2427 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2428 ccfreefree(compiled_data->tensors.parameters);
2429 }
2430 if (compiled_data->tensors.gradients)
2431 {
2432 for (i = 0; i < parameter_size * parallel_count; i++)
2433 {
2434 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2435 if (compiled_data->tensors.accum_gradients[i])
2436 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2437 }
2438 ccfreefree(compiled_data->tensors.gradients);
2439 }
2440 if (compiled_data->minimize.parameters)
2441 {
2442 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2443 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2444 ccv_array_free(compiled_data->minimize.parameters);
2445 }
2446 if (compiled_data->rewindables)
2447 ccv_array_free(compiled_data->rewindables);
2448 if (compiled_data->tensors_init.v)
2449 ccfreefree(compiled_data->tensors_init.v);
2450 if (compiled_data->evaluate.tos)
2451 ccfreefree(compiled_data->evaluate.tos);
2452 compiled_data->evaluate.tos = 0;
2453 if (compiled_data->stream_map)
2454 {
2455 khiter_t k;
2456 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2457 {
2458 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
2459 continue;
2460 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2461 ccv_nnc_stream_context_free(stream);
2462 }
2463 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
2464 }
2465 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2466 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
2467 _ccv_cnnp_compiled_data_backward_free(compiled_data);
2468 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2469 ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc);
2470 ccfreefree(compiled_data);
2471}
2472
2473void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
2474{
2475 if (model->isa->deinit)
2476 model->isa->deinit(model);
2477 if (model->io)
2478 {
2479 int i;
2480 for (i = 0; i < model->io->rnum; i++)
2481 {
2482 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
2483 if (model_io->outgoings)
2484 ccv_array_free(model_io->outgoings);
2485 if (model_io->incomings)
2486 ccv_array_free(model_io->incomings);
2487 ccfreefree(model_io);
2488 }
2489 ccv_array_free(model->io);
2490 }
2491 if (model->parameter_indices)
2492 ccv_array_free(model->parameter_indices);
2493 if (model->inputs)
2494 ccfreefree(model->inputs);
2495 if (model->graph)
2496 ccv_nnc_symbolic_graph_free(model->graph);
2497 if (model->compiled_data)
2498 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
2499 if (model->name)
2500 ccfreefree(model->name);
2501 ccfreefree(model);
2502}