Bug Summary

File:nnc/./_ccv_nnc_stream.h
Warning:line 57, column 1
Array access (via field 'flags') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/13.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2021-11-16-194228-199598-1 -x c ccv_cnnp_model.c

ccv_cnnp_model.c

1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6
7// MARK - Level-5 API
8
9ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size)
10{
11 assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ (
{ if (input_size > 0) ; else __assert_fail ("input_size > 0"
, "ccv_cnnp_model.c", 11, __extension__ __PRETTY_FUNCTION__);
}))
;
12 if (!model->io)
13 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
14 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size);
15 model_io->param_ref = 0;
16 model_io->param_sel = 0;
17 model_io->visit = 0;
18 model_io->model = model;
19 model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
20 model_io->outgoings = 0;
21 model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1);
22 ccv_array_push(model->io, &model_io);
23 int i;
24 ccv_array_resize(model_io->incomings, input_size);
25 memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t
)(model_io->incomings)->rsize * (size_t)(0)))
, inputs, sizeof(ccv_cnnp_model_io_t) * input_size);
26 for (i = 0; i < input_size; i++)
27 {
28 if (!inputs[i]->outgoings)
29 inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
30 ccv_array_push(inputs[i]->outgoings, &model_io);
31 }
32 return model_io;
33}
34
35int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model)
36{
37 return model->output_size;
38}
39
40ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index)
41{
42 if (!model->io)
43 model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0);
44 ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s));
45 model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1;
46 model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1;
47 model_io->visit = 0;
48 model_io->model = model;
49 model_io->outputs = 0;
50 model_io->incomings = 0;
51 model_io->outgoings = 0;
52 ccv_array_push(model->io, &model_io);
53 return model_io;
54}
55
56void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context)
57{
58 model->notify_hook.func = func;
59 model->notify_hook.context = context;
60}
61
62void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload)
63{
64 if (model->notify_hook.func)
65 model->notify_hook.func(model, tag, payload, model->notify_hook.context);
66 if (model->isa->notify)
67 model->isa->notify(model, tag, payload);
68}
69
70static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size)
71{
72 int i, j;
73 for (i = 0; i < graph_exec_symbol_size; i++)
74 {
75 ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i;
76 // Check whether this tensor symbol has any duplicate.
77 for (j = i + 1; j < graph_exec_symbol_size;)
78 {
79 ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j;
80 // If there is a same tensor symbol, remove it.
81 if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph)
82 {
83 if (j + 1 < graph_exec_symbol_size)
84 *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1];
85 --graph_exec_symbol_size;
86 continue;
87 }
88 ++j;
89 }
90 }
91 return graph_exec_symbol_size;
92}
93
94typedef struct {
95 ccv_cnnp_model_sequence_t* sequence;
96 char prefix;
97 ccv_array_t* symbols;
98 ccv_array_t* ids;
99} ccv_cnnp_model_add_to_array_context_t;
100
101static void _ccv_cnnp_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol)
102{
103 ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context;
104 ccv_cnnp_model_t* const model = add_to_array_context->sequence->model;
105 int i;
106 if (!model->parameter_indices)
107 model->parameter_indices = ccv_array_new(sizeof(int), 0, 0);
108 for (i = 0; i < add_to_array_context->symbols->rnum; i++)
109 {
110 const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data
)) + (size_t)(add_to_array_context->symbols)->rsize * (
size_t)(i)))
;
111 if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph)
112 {
113 // Only add to parameter_indices if it is trainable.
114 if (add_to_array_context->prefix == 't')
115 ccv_array_add_unique_int(model->parameter_indices, i);
116 // Found it, return, don't add it.
117 return;
118 }
119 }
120 // Only add to parameter_indices if it is trainable.
121 if (add_to_array_context->prefix == 't')
122 ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum);
123 // This is a new one, no need to add_unique_int, it is unique.
124 ccv_array_push(add_to_array_context->symbols, &symbol);
125 char id[2048];
126 id[0] = add_to_array_context->prefix;
127 id[1] = '-';
128 int total_len = 2;
129 for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++)
130 {
131 const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences
)->data)) + (size_t)(add_to_array_context->sequence->
sequences)->rsize * (size_t)(i)))
;
132 int len;
133 if (name->name && name->name[0] != '\0')
134 len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence);
135 else
136 len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence);
137 total_len += len;
138 if (total_len >= 2047)
139 break;
140 }
141 if (total_len < 2047)
142 total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it);
143 assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__
({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048"
, "ccv_cnnp_model.c", 143, __extension__ __PRETTY_FUNCTION__)
; }))
;
144 char *heap_id = (char*)ccmallocmalloc(total_len + 1);
145 memcpy(heap_id, id, total_len + 1);
146 ccv_array_push(add_to_array_context->ids, &heap_id);
147 ++add_to_array_context->sequence->it;
148}
149
150static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss)
151{
152 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 152, __extension__ __PRETTY_FUNCTION__); }))
;
153 model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size);
154 int i;
155 for (i = 0; i < input_size; i++)
156 model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0);
157 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
158 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
159 ccv_cnnp_model_sequence_t model_sequence = {
160 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
161 };
162 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
163 .sequence = &model_sequence,
164 .prefix = 't',
165 .symbols = parameters,
166 .ids = parameter_ids,
167 };
168 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
169 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
170 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
171 .sequence = &model_sequence,
172 .prefix = 'r',
173 .symbols = internals,
174 .ids = internal_ids,
175 };
176 ccv_cnnp_model_build_data_t build_data = {
177 .model_sequence = &model_sequence,
178 .add_to_array = _ccv_cnnp_add_to_array,
179 .parameters = parameters,
180 .context = {
181 .add_to_parameter = &add_to_parameter_context,
182 .add_to_output = &add_to_output_context,
183 },
184 };
185 model->data = &build_data;
186 ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0);
187 model->data = 0;
188 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
189 ccv_array_free(model_sequence.sequences);
190 // Assert no parameter is alias.
191 for (i = 0; i < parameters->rnum; i++)
192 {
193 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
194 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter);
195 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 195, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
196 }
197 // Assert no internal is alias.
198 for (i = 0; i < internals->rnum; i++)
199 {
200 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals
)->rsize * (size_t)(i)))
;
201 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(retained.graph, retained);
202 assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__
({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0"
, "ccv_cnnp_model.c", 202, __extension__ __PRETTY_FUNCTION__)
; }))
; // Cannot find the one alias to.
203 }
204 const int output_size = model->output_size;
205 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
206 ccv_nnc_symbolic_graph_simplify(model->graph,
207 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
208 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
209 CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
210 CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
, CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
,
211 model->inputs, input_size,
212 model->outputs, output_size,
213 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
214 ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1));
215 compiled_data->f = compiled_data->fits + output_size;
216 const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph);
217 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 217, __extension__ __PRETTY_FUNCTION__)
; }))
;
218 compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
219 memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size);
220 compiled_data->loss = loss;
221 if (loss.cmd == CCV_NNC_NOOP)
222 {
223 // If no loss function provided, there is no fits.
224 for (i = 0; i < output_size; i++)
225 {
226 compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
227 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]);
228 if (alias_to.d < 0)
229 compiled_data->f[i] = model->outputs[i];
230 else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original.
231 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
232 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
233 ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc);
234 int j;
235 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
236 { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if (
ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c"
, 236, __extension__ __PRETTY_FUNCTION__); }))
; } // There is no ofs.
237 compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet.
238 }
239 }
240 } else {
241 for (i = 0; i < output_size; i++)
242 {
243 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]);
244 const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0);
245 compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0);
246 ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit}
, (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 -1)
, TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, (
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 -1)
, 0);
247 }
248 }
249 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
250 ccv_nnc_symbolic_graph_simplify(model->graph,
251 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, // Only do Ops fusion, in this way, we can fuse the loss function.
252 0, 0, // No need to provide binds at this point.
253 compiled_data->f, model->output_size,
254 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
255 // If inputs are from GPU, stream type is GPU.
256 compiled_data->parameters = parameters;
257 compiled_data->internals = internals;
258 compiled_data->ids.parameters = parameter_ids;
259 compiled_data->ids.internals = internal_ids;
260}
261
262static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
263{
264 ccv_array_t* const stack = (ccv_array_t*)context;
265 ccv_array_push(stack, &symbol.d);
266}
267
268static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
269{
270 const ccv_nnc_tensor_symbol_t src_symbol = {
271 .d = src_index,
272 .graph = src_graph
273 };
274 const ccv_nnc_tensor_symbol_t dest_symbol = {
275 .d = dest_index,
276 .graph = dest_graph
277 };
278 const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
279 ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params);
280 int ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
281 int inc[CCV_NNC_MAX_DIM_ALLOC(12)];
282 if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc))
283 ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc);
284}
285
286static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index)
287{
288 const ccv_nnc_tensor_symbol_t src_symbol = {
289 .d = src_index,
290 .graph = src_graph
291 };
292 const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol);
293 const ccv_nnc_tensor_symbol_t dest_symbol = {
294 .d = dest_index,
295 .graph = dest_graph
296 };
297 const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol);
298 return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0;
299}
300
301static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size);
302static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data);
303
304typedef struct {
305 int parallel_count;
306 ccv_nnc_symbolic_graph_t* graph;
307 ccv_nnc_graph_exec_arena_t* graph_exec_arena;
308} ccv_nnc_graph_exec_update_t;
309
310static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint)
311{
312 ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context;
313 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena;
314 ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol);
315 ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd);
316 ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint);
317 const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph;
318 const int parallel_count = graph_exec_update->parallel_count;
319 int i;
320 for (i = 1; i < parallel_count; i++)
321 {
322 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i));
323 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
324 {
325 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
326 ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint);
327 }
328 }
329}
330
331void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size)
332{
333 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 333, __extension__ __PRETTY_FUNCTION__); }))
;
334 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 334, __extension__ __PRETTY_FUNCTION__)
; }))
;
335 assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if
(!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c"
, 335, __extension__ __PRETTY_FUNCTION__); }))
;
336 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
337 init->graph = ccv_nnc_symbolic_graph_new();
338 ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0);
339 ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack);
340 _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss);
341 init->parallel_count = model->parallel_count;
342 init->memory_compression = model->memory_compression;
343 init->compiled_data->stream_type = model->compiled_data->stream_type;
344 init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer;
345 init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size;
346 if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
347 _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0);
348 ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0);
349 ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0);
350 int i, j;
351 // Verify parameters, internals and saved_aux in both graph has the same dimensionality.
352 for (i = 0; i < compiled_data->parameters->rnum; i++)
353 {
354 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
355 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 355, __extension__ __PRETTY_FUNCTION__)
; }))
;
356 }
357 for (i = 0; i < compiled_data->internals->rnum; i++)
358 {
359 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
360 assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph
, init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim
(model->graph, init->graph, d, d)) ; else __assert_fail
("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)"
, "ccv_cnnp_model.c", 360, __extension__ __PRETTY_FUNCTION__)
; }))
;
361 }
362 // Update inputs.
363 assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size)
? 1 : 0), __extension__ ({ if (model->input_size == init->
input_size) ; else __assert_fail ("model->input_size == init->input_size"
, "ccv_cnnp_model.c", 363, __extension__ __PRETTY_FUNCTION__)
; }))
;
364 for (i = 0; i < model->input_size; i++)
365 if (model->inputs[i].d >= 0)
366 {
367 assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0"
, "ccv_cnnp_model.c", 367, __extension__ __PRETTY_FUNCTION__)
; }))
;
368 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d);
369 }
370 // Update outputs.
371 assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size
) ? 1 : 0), __extension__ ({ if (model->output_size == init
->output_size) ; else __assert_fail ("model->output_size == init->output_size"
, "ccv_cnnp_model.c", 371, __extension__ __PRETTY_FUNCTION__)
; }))
;
372 for (i = 0; i < model->output_size; i++)
373 {
374 if (model->outputs[i].d >= 0)
375 {
376 assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__
({ if (init->outputs[i].d >= 0) ; else __assert_fail (
"init->outputs[i].d >= 0", "ccv_cnnp_model.c", 376, __extension__
__PRETTY_FUNCTION__); }))
;
377 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d);
378 }
379 if (model->outputs[i].d != model->compiled_data->f[i].d)
380 {
381 assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data
->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[
i].d != init->compiled_data->f[i].d) ; else __assert_fail
("init->outputs[i].d != init->compiled_data->f[i].d"
, "ccv_cnnp_model.c", 381, __extension__ __PRETTY_FUNCTION__)
; }))
;
382 if (model->compiled_data->f[i].d >= 0)
383 {
384 assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ?
1 : 0), __extension__ ({ if (init->compiled_data->f[i]
.d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0"
, "ccv_cnnp_model.c", 384, __extension__ __PRETTY_FUNCTION__)
; }))
;
385 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d);
386 }
387 }
388 }
389 // Go through the graph to set tensor on matching symbols
390 for (i = 0; i < stack->rnum; i++)
391 {
392 const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize
* (size_t)(i)))
;
393 // If exceed range, skip.
394 if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) ||
395 d >= ccv_nnc_graph_exec_symbol_count(model->graph))
396 continue;
397 const ccv_nnc_graph_exec_symbol_t src_symbol = {
398 .d = d,
399 .graph = init->graph
400 };
401 const ccv_nnc_graph_exec_symbol_t dest_symbol = {
402 .d = d,
403 .graph = model->graph
404 };
405 const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol);
406 const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol);
407 // If the name doesn't match, skip.
408 if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP)
409 continue;
410 // Now get all the inputs and outputs, if matches, set them.
411 const int* src_inputs;
412 int src_input_size;
413 const int* src_outputs;
414 int src_output_size;
415 ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size);
416 const int* dest_inputs;
417 int dest_input_size;
418 const int* dest_outputs;
419 int dest_output_size;
420 ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size);
421 // We may have unmatched input / output size because this is the minimizer and it has
422 // different saved_aux (for example, when we shrunk with CMD_NOOP).
423 if (src_input_size != dest_input_size)
424 continue;
425 if (src_output_size != dest_output_size)
426 continue;
427 ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd);
428 // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because
429 // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original
430 // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That
431 // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as
432 // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec
433 // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not
434 // a new exec symbol.
435 for (j = 0; j < src_input_size; j++)
436 if (src_inputs[j] >= 0)
437 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]);
438 for (j = 0; j < src_output_size; j++)
439 if (src_outputs[j] >= 0)
440 _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]);
441 }
442 ccv_array_free(stack);
443 // After this, we get all tensors in the model graph resolved through tensor_auto.
444 ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0);
445 // Verify symbols we get matches.
446 const int parameter_size = compiled_data->parameters->rnum;
447 for (i = 0; i < parameter_size; i++)
448 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->parameters)->data)) + (size_t)(compiled_data
->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (
((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data->
parameters)->data)) + (size_t)(compiled_data->parameters
)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->parameters)->
data)) + (size_t)(init->compiled_data->parameters)->
rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d"
, "ccv_cnnp_model.c", 448, __extension__ __PRETTY_FUNCTION__)
; }))
; }
449 const int internal_size = compiled_data->internals->rnum;
450 for (i = 0; i < internal_size; i++)
451 { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*)
((compiled_data->internals)->data)) + (size_t)(compiled_data
->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((init->compiled_data->internals)->
data)) + (size_t)(init->compiled_data->internals)->rsize
* (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t
*)((void*)(((char*)((compiled_data->internals)->data)) +
(size_t)(compiled_data->internals)->rsize * (size_t)(i
))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init
->compiled_data->internals)->data)) + (size_t)(init->
compiled_data->internals)->rsize * (size_t)(i))))->d
) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d"
, "ccv_cnnp_model.c", 451, __extension__ __PRETTY_FUNCTION__)
; }))
; }
452 // Go through compiled data.
453 if (compiled_data->tensor_arena)
454 {
455 const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph);
456 if (flag == 0 && compiled_data->graph_exec_arena)
457 {
458 ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph);
459 // Since we will reinit, if we previously set is_test, we need to set it again.
460 if (compiled_data->is_test)
461 {
462 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
463 ccv_nnc_graph_exec_update_t update = {
464 .parallel_count = parallel_count,
465 .graph = model->graph,
466 .graph_exec_arena = compiled_data->graph_exec_arena,
467 };
468 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
469 }
470 } else
471 // Free-up tensor arena & graph exec arena.
472 _ccv_cnnp_compiled_data_graph_free(compiled_data);
473 }
474 // There are other compiled graphs, for accum and apply gradients.
475 // However, the main conclusion is, these absorb operations shouldn't impact parameters.
476 // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we
477 // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot
478 // be changed otherwise parameters' shape will be meaningless. The same goes to internals.
479 // That is why we don't update these compiled graphs at all this point.
480 // Free the model, we've already "absorbed" it.
481 ccv_cnnp_model_free(init);
482}
483
484void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss)
485{
486 assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model->
input_size == 0) ? 1 : 0), __extension__ ({ if (input_size ==
model->input_size || model->input_size == 0) ; else __assert_fail
("input_size == model->input_size || model->input_size == 0"
, "ccv_cnnp_model.c", 486, __extension__ __PRETTY_FUNCTION__)
; }))
;
487 if (model->input_size == 0)
488 model->input_size = input_size;
489 if (!model->graph) // The graph is not compiled yet.
490 {
491 model->graph = ccv_nnc_symbolic_graph_new();
492 _ccv_cnnp_model_compile(model, inputs, input_size, loss);
493 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 493, __extension__ __PRETTY_FUNCTION__)
; }))
;
494 int i, flag = 0;
495 for (i = 0; !flag && i < input_size; i++)
496 flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY);
497 // If inputs are from GPU, stream type is GPU.
498 model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
499 model->compiled_data->minimize.minimizer = minimizer;
500 model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
501 } else {
502 // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model.
503 // And then absorb the "new model" to the old one.
504 ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model);
505 ccv_cnnp_model_absorb(model, init, inputs, input_size);
506 // Reset minimizer.
507 ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0);
508 }
509}
510
511ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model)
512{
513 return _ccv_cnnp_model_copy(model, 0);
514}
515
516void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size)
517{
518 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 518, __extension__ __PRETTY_FUNCTION__); }))
;
519 assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0
), __extension__ ({ if (output_size == model->output_size)
; else __assert_fail ("output_size == model->output_size"
, "ccv_cnnp_model.c", 519, __extension__ __PRETTY_FUNCTION__)
; }))
;
520 ccv_nnc_symbolic_graph_t* const graph = model->graph;
521 ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0);
522 int i;
523 for (i = 0; i < output_size; i++)
524 {
525 assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL
) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL"
, "ccv_cnnp_model.c", 525, __extension__ __PRETTY_FUNCTION__)
; }))
;
526 outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]);
527 }
528}
529
530void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size)
531{
532 if (workspace_size == model->workspace_size)
533 return;
534 model->workspace_size = workspace_size;
535 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
536 if (compiled_data && compiled_data->graph)
537 ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0);
538}
539
540void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel)
541{
542 if (parallel == 0)
543 model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
544 else
545 model->parallel_count = parallel;
546 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
547 if (compiled_data)
548 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 548, __extension__ __PRETTY_FUNCTION__)
; }))
; }
549}
550
551void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression)
552{
553 model->memory_compression = memory_compression;
554 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
555 if (compiled_data)
556 { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__
({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph"
, "ccv_cnnp_model.c", 556, __extension__ __PRETTY_FUNCTION__)
; }))
; }
557}
558
559typedef struct {
560 int parallel_count;
561 ccv_nnc_symbolic_graph_t* graph;
562 ccv_cnnp_compiled_data_t* compiled_data;
563 ccv_nnc_tensor_arena_t* tensor_arena;
564} ccv_nnc_tensor_init_states_t;
565
566static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data)
567{
568 int i;
569 for (i = 0; i < compiled_data->parameters->rnum; i++)
570 {
571 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
)->d;
572 if (!(compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f))))
573 return 1;
574 }
575 for (i = 0; i < compiled_data->internals->rnum; i++)
576 {
577 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
)->d;
578 if (!(compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f))))
579 return 1;
580 }
581 return 0;
582}
583
584static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol)
585{
586 ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context;
587 ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena;
588 ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol);
589 if (!output_tensor)
590 return;
591 const int d = output_symbol.d;
592 assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data->
tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states
->compiled_data->tensors_init.size) ; else __assert_fail
("d < tensor_init_states->compiled_data->tensors_init.size"
, "ccv_cnnp_model.c", 592, __extension__ __PRETTY_FUNCTION__)
; }))
;
593 if (tensor_init_states->compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f)))
594 return;
595 tensor_init_states->compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
596 ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0);
597 const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph;
598 const int parallel_count = tensor_init_states->parallel_count;
599 int i;
600 for (i = 1; i < parallel_count; i++)
601 {
602 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i));
603 if (copy)
604 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &output_tensor, 1, &copy, 1, 0);
605 }
606}
607
608// This method can only handle cases we added new tensors and exec, never delete. This invariant is true because
609// we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup.
610static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model)
611{
612 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 612, __extension__ __PRETTY_FUNCTION__); }))
;
613 assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__
({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data"
, "ccv_cnnp_model.c", 613, __extension__ __PRETTY_FUNCTION__)
; }))
;
614 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
615 assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__
({ if (compiled_data->rewindables) ; else __assert_fail (
"compiled_data->rewindables", "ccv_cnnp_model.c", 615, __extension__
__PRETTY_FUNCTION__); }))
;
616 int i;
617 for (i = 0; i < compiled_data->rewindables->rnum; i++)
618 {
619 const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) +
(size_t)(compiled_data->rewindables)->rsize * (size_t)
(i)))
;
620 if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC)
621 ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec);
622 else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR)
623 ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor);
624 }
625 ccv_array_clear(compiled_data->rewindables);
626 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
627}
628
629
630static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
631{
632 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
633 .type = CCV_CNNP_REWIND_TENSOR,
634 .tensor = symbol
635 };
636 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
637 ccv_array_push(rewind_symbols, &rewind_symbol);
638}
639
640static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
641{
642 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
643 .type = CCV_CNNP_REWIND_TENSOR,
644 .tensor = symbol
645 };
646 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
647 ccv_array_push(rewind_symbols, &rewind_symbol);
648}
649
650static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
651{
652 const ccv_cnnp_rewind_symbol_t rewind_symbol = {
653 .type = CCV_CNNP_REWIND_GRAPH_EXEC,
654 .graph_exec = symbol
655 };
656 ccv_array_t* const rewind_symbols = (ccv_array_t*)context;
657 ccv_array_push(rewind_symbols, &rewind_symbol);
658}
659
660static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph)
661{
662 ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol);
663 if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0))
664 ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd);
665 int i;
666 for (i = 1; i < parallel_count; i++)
667 {
668 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
669 const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol);
670 if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0))
671 ccv_nnc_graph_exec_set(copy.graph, copy, cmd);
672 }
673}
674
675static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd)
676{
677 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 677, __extension__ __PRETTY_FUNCTION__); }))
;
678 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 678, __extension__ __PRETTY_FUNCTION__); }))
;
679 ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd);
680 int i;
681 for (i = 1; i < parallel_count; i++)
682 {
683 ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i);
684 if (copy_symbol.graph)
685 ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd);
686 }
687 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena;
688 if (graph_exec_arena)
689 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
690 // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph)
691 ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena;
692 if (gradient_graph_exec_arena)
693 _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph);
694}
695
696static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice)
697{
698 int this_parameter_flag = 0;
699 const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]);
700 int j, k;
701 // For no-op, we can preserve previous saved_aux_size.
702 if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP)
703 {
704 // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous
705 // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between
706 // noop and a minimizer. We don't want that because we do that in high-level frameworks to
707 // make sure some model parameters don't update if we don't want them to.
708 int old_saved_aux_size;
709 if (old_minimizer.cmd == CCV_NNC_NOOP)
710 {
711 int input_size;
712 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0);
713 if (input_size < 2) // This is not legit.
714 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
715 else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters.
716 old_saved_aux_size = input_size - 2;
717 } else
718 old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer);
719 if (old_saved_aux_size != saved_aux_size)
720 {
721 this_parameter_flag = 1;
722 if (saved_aux_size > old_saved_aux_size)
723 {
724 // Allocate new tensor symbols.
725 const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]);
726 for (j = old_saved_aux_size; j < saved_aux_size; j++)
727 {
728 saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0);
729 saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0);
730 for (k = 1; k < parallel_count; k++)
731 {
732 ccv_nnc_tensor_param_t dev_info = info;
733 CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) &
0xfff) << 8))
;
734 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
735 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0);
736 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy);
737 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy);
738 }
739 }
740 } else {
741 for (j = saved_aux_size; j < old_saved_aux_size; j++)
742 {
743 for (k = 1; k < parallel_count; k++)
744 {
745 const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
746 if (src_copy.d >= 0)
747 {
748 ccv_nnc_tensor_symbol_free(graph, src_copy);
749 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
750 }
751 const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
752 if (dest_copy.d >= 0)
753 {
754 ccv_nnc_tensor_symbol_free(graph, dest_copy);
755 ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
756 }
757 }
758 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source);
759 ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination);
760 saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
761 }
762 }
763 }
764 }
765 _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer);
766 if (this_parameter_flag)
767 {
768 ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2];
769 ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1];
770 const int* inputs = 0;
771 int input_size = 0;
772 ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0);
773 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 773, __extension__ __PRETTY_FUNCTION__)
; }))
;
774 update_inputs[0].d = inputs[0];
775 update_inputs[0].graph = graph;
776 update_inputs[1].d = inputs[1];
777 update_inputs[1].graph = graph;
778 update_outputs[0] = updated_parameters[parameter_indice];
779 for (j = 0; j < saved_aux_size; j++)
780 {
781 update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source;
782 update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination;
783 }
784 ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
785 for (k = 1; k < parallel_count; k++)
786 {
787 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k);
788 assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if
(copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c"
, 788, __extension__ __PRETTY_FUNCTION__); }))
;
789 ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0);
790 assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ (
{ if (input_size >= 1) ; else __assert_fail ("input_size >= 1"
, "ccv_cnnp_model.c", 790, __extension__ __PRETTY_FUNCTION__)
; }))
;
791 update_inputs[0].d = inputs[0];
792 update_inputs[0].graph = graph;
793 update_inputs[1].d = inputs[1];
794 update_inputs[1].graph = graph;
795 update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k);
796 for (j = 0; j < saved_aux_size; j++)
797 {
798 update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k);
799 update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k);
800 }
801 ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1);
802 }
803 }
804 return this_parameter_flag;
805}
806
807typedef struct {
808 int parameter_size;
809 ccv_nnc_cmd_t minimizer;
810 ccv_cnnp_model_io_t parameters[1];
811} ccv_cnnp_set_minimizer_for_parameter_t;
812
813static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model)
814{
815 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
816 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 816, __extension__ __PRETTY_FUNCTION__); }))
;
817 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
818 // We update all parameters, at this point, we have one minimizer.
819 const int parameter_size = compiled_data->parameters->rnum;
820 ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes;
821 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
822 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 822, __extension__ __PRETTY_FUNCTION__); }))
;
823 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
824 ccv_array_t* const parameters = compiled_data->minimize.parameters;
825 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
826 int i, j, flag = 0;
827 for (i = 0; i < parameters->rnum; i++)
828 {
829 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(i)))
;
830 for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++)
831 {
832 const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel;
833 assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_sel != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_sel != 0"
, "ccv_cnnp_model.c", 833, __extension__ __PRETTY_FUNCTION__)
; }))
;
834 const int old_rnum = parameter_indices->rnum;
835 ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices);
836 const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref;
837 assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j
]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter
->parameters[j]->param_ref != 0) ; else __assert_fail (
"set_minimizer_for_parameter->parameters[j]->param_ref != 0"
, "ccv_cnnp_model.c", 837, __extension__ __PRETTY_FUNCTION__)
; }))
;
838 if (param_ref >= 0)
839 {
840 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 840, __extension__ __PRETTY_FUNCTION__)
; }))
;
841 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
842 parameter_indices->rnum = old_rnum + 1;
843 }
844 }
845 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer);
846 // We may have duplicated indices, but that is OK, we will set it twice.
847 for (j = 0; j < parameter_indices->rnum; j++)
848 {
849 const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(j)))
;
850 assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__
({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size"
, "ccv_cnnp_model.c", 850, __extension__ __PRETTY_FUNCTION__)
; }))
;
851 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d))
852 flag = 1;
853 }
854 ccv_array_clear(parameter_indices);
855 }
856 ccv_array_free(parameter_indices);
857 return flag;
858}
859
860static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size)
861{
862 if (new_saved_aux_size == old_saved_aux_size)
863 return;
864 assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ?
1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size
) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size"
, "ccv_cnnp_model.c", 864, __extension__ __PRETTY_FUNCTION__)
; }))
;
865 int i, j;
866 for (i = parameter_size - 1; i >= 0; i--)
867 {
868 for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--)
869 saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
870 for (j = old_saved_aux_size - 1; j >= 0; j--)
871 saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j];
872 }
873}
874
875static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model)
876{
877 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
878 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 878, __extension__ __PRETTY_FUNCTION__); }))
;
879 if (!compiled_data->rewindables)
880 compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0);
881 ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables);
882 ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables);
883 ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables);
884}
885
886static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size)
887{
888 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
889 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail
("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 889, __extension__ __PRETTY_FUNCTION__)
; }))
;
890 assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE
) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE"
, "ccv_cnnp_model.c", 890, __extension__ __PRETTY_FUNCTION__)
; }))
;
891 const int evaluate_to_size = compiled_data->evaluate.to_size;
892 assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__
({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0"
, "ccv_cnnp_model.c", 892, __extension__ __PRETTY_FUNCTION__)
; }))
;
893 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
894 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
895 compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count);
896 int i, j;
897 const int output_size = model->output_size;
898 assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size
* parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count"
, "ccv_cnnp_model.c", 898, __extension__ __PRETTY_FUNCTION__)
; }))
;
899 if (fits)
900 for (i = 0; i < output_size; i++)
901 ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info);
902 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
903 const int parameter_size = compiled_data->parameters->rnum;
904 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size);
905 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
906 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
907 int parameter_size_maybe_more = parameter_size;
908 compiled_data->disable_outgrad = disable_outgrad;
909 int outgrad_size;
910 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
911 outgrad_size = 0;
912 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
913 outgrad_size = model->input_size;
914 else {
915 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 915, __extension__ __PRETTY_FUNCTION__)
; }))
; // If it is disable all, gradient mode won't be this.
916 outgrad_size = 0;
917 for (i = 0; i < model->input_size; i++)
918 if (!(disable_outgrad & ((uint64_t)1 << i)))
919 ++outgrad_size;
920 }
921 compiled_data->outgrad_size = outgrad_size;
922 parameter_size_maybe_more += outgrad_size;
923 compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count);
924 compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0;
925 compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more);
926 compiled_data->backward.to_size = parameter_size_maybe_more;
927 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0)
928 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
929 else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs.
930 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
931 else { // Compute minimize with gradients including selected inputs.
932 assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__
({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0"
, "ccv_cnnp_model.c", 932, __extension__ __PRETTY_FUNCTION__)
; }))
;
933 assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL
) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL"
, "ccv_cnnp_model.c", 933, __extension__ __PRETTY_FUNCTION__)
; }))
; // If it is disable all, gradient mode won't be this.
934 assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__
({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0"
, "ccv_cnnp_model.c", 934, __extension__ __PRETTY_FUNCTION__)
; }))
;
935 ccv_nnc_tensor_symbol_t outgrads[outgrad_size];
936 j = 0;
937 for (i = 0; i < model->input_size; i++)
938 if (!(disable_outgrad & ((uint64_t)1 << i)))
939 outgrads[j++] = model->inputs[i];
940 ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes);
941 }
942 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size);
943 if (compiled_data->minimize.parameters)
944 _ccv_cnnp_apply_parameters_with_minimizer(model);
945 for (i = 0; i < output_size; i++)
946 {
947 const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
948 // Init this to 1 so we can backprop.
949 ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES);
950 }
951 for (i = 0; i < parameter_size_maybe_more; i++)
952 compiled_data->backward.tos[i] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]);
953 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS);
954 ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size);
955 for (i = 0; i < parameter_size_maybe_more - parameter_size; i++)
956 {
957 const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]);
958 const int* tos;
959 int to_size;
960 ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size);
961 if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes.
962 {
963 const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph);
964 int flag = 0;
965 for (j = i - 1; !flag && j >= 0; j--)
966 flag = (destinations[j + parameter_size].d == outgrad.d);
967 if (!flag) // Only if we cannot find it, we add it.
968 ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad);
969 }
970 }
971 if (parallel_count > 1)
972 {
973 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
974 0, 0,
975 compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */,
976 compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */,
977 0, 0, 0,
978 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
979 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
980 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
981 for (i = 0; i < evaluate_to_size; i++)
982 for (j = 1; j < parallel_count; j++)
983 {
984 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
985 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
986 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
987 }
988 for (i = 0; i < parameter_size_maybe_more; i++)
989 for (j = 1; j < parallel_count; j++)
990 {
991 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j);
992 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
993 compiled_data->backward.tos[compiled_data->backward.to_size++] = copy;
994 }
995 }
996 // Only use memory compression if we are in gradient parameter mode.
997 if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES && model->memory_compression)
998 ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
999 compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size);
1000 compiled_data->gradient_mode = gradient_mode;
1001}
1002
1003void ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1004{
1005 assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 :
0), __extension__ ({ if (!compiled_data->tensors.parameters
) ; else __assert_fail ("!compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1005, __extension__ __PRETTY_FUNCTION__
); }))
;
1006 const int parameter_size = compiled_data->parameters->rnum;
1007 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1008 const int internal_size = compiled_data->internals->rnum;
1009 compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
1010 compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
1011 compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)ccmallocmalloc((sizeof(ccv_nnc_tensor_t*) * parameter_size + sizeof(ccv_nnc_tensor_t*) * internal_size) * parallel_count);
1012 compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
1013 int i, j;
1014 for (i = 0; i < parameter_size; i++)
1015 {
1016 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1017 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1018 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1019 compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0);
1020 for (j = 1; j < parallel_count; j++)
1021 {
1022 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1023 compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1024 }
1025 }
1026 for (i = 0; i < internal_size; i++)
1027 {
1028 const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(i))
)
;
1029 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
1030 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1031 compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
1032 for (j = 1; j < parallel_count; j++)
1033 {
1034 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1035 compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
1036 }
1037 }
1038}
1039
1040static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1041{
1042 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1042, __extension__ __PRETTY_FUNCTION__
); }))
;
1043 int i, j;
1044 for (i = 0; i < tensor_size; i++)
1045 {
1046 if (!tensors[i])
1047 continue;
1048 const int d = tensor_symbols[i].d;
1049 if (!(tensors_init[d >> 5] & (1u << (d & 0x1f))))
1050 continue;
1051 for (j = 1; j < parallel_count; j++)
1052 if (tensors[i + j * tensor_size])
1053 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, &tensors[i], 1, &tensors[i + j * tensor_size], 1, 0);
1054 }
1055}
1056
1057static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count)
1058{
1059 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1059, __extension__ __PRETTY_FUNCTION__
); }))
;
1060 int i, j;
1061 for (i = 0; i < tensor_size; i++)
1062 {
1063 const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1064 for (j = 1; j < parallel_count; j++)
1065 {
1066 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1067 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1068 if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL)
1069 { // We shouldn't allocate this, free it up.
1070 ccv_nnc_tensor_free(tensors[i + j * tensor_size]);
1071 tensors[i + j * tensor_size] = 0;
1072 }
1073 }
1074 }
1075}
1076
1077static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds)
1078{
1079 assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__
({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0"
, "ccv_cnnp_model.c", 1079, __extension__ __PRETTY_FUNCTION__
); }))
;
1080 int i, j;
1081 for (i = 0; i < tensor_size; i++)
1082 {
1083 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1084 if (graph)
1085 {
1086 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1087 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1088 tensor_symbol = alias_to;
1089 }
1090 ccv_nnc_tensor_t* const tensor = tensors[i];
1091 if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1092 {
1093 const ccv_nnc_tensor_bind_t retained_bind = {
1094 .symbol = tensor_symbol,
1095 .tensor = tensor
1096 };
1097 ccv_array_push(tensor_binds, &retained_bind);
1098 }
1099 for (j = 1; j < parallel_count; j++)
1100 {
1101 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1102 ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size];
1103 if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1104 {
1105 const ccv_nnc_tensor_bind_t bind = {
1106 .symbol = copy,
1107 .tensor = tensors[i + j * tensor_size]
1108 };
1109 ccv_array_push(tensor_binds, &bind);
1110 }
1111 }
1112 }
1113}
1114
1115static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data)
1116{
1117 if (compiled_data->graph)
1118 ccv_nnc_graph_free(compiled_data->graph);
1119 compiled_data->graph = 0;
1120 compiled_data->is_test = 0;
1121 if (compiled_data->tensor_arena)
1122 ccv_nnc_tensor_arena_free(compiled_data->tensor_arena);
1123 compiled_data->tensor_arena = 0;
1124 if (compiled_data->graph_exec_arena)
1125 ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena);
1126 compiled_data->graph_exec_arena = 0;
1127 if (compiled_data->backward.from_ops)
1128 ccfreefree(compiled_data->backward.from_ops);
1129 compiled_data->backward.from_ops = 0;
1130 if (compiled_data->evaluate.schedule)
1131 ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule);
1132 compiled_data->evaluate.schedule = 0;
1133 if (compiled_data->backward.schedule)
1134 ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule);
1135 compiled_data->backward.schedule = 0;
1136}
1137
1138static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data)
1139{
1140 if (compiled_data->gradients)
1141 ccfreefree(compiled_data->gradients);
1142 compiled_data->gradients = 0;
1143 if (compiled_data->updated_parameters)
1144 ccfreefree(compiled_data->updated_parameters);
1145 compiled_data->updated_parameters = 0;
1146 compiled_data->update_nodes = 0;
1147 compiled_data->saved_aux = 0;
1148}
1149
1150static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data)
1151{
1152 if (compiled_data->backward.gradients)
1153 ccfreefree(compiled_data->backward.gradients);
1154 compiled_data->backward.gradients = 0;
1155 if (compiled_data->backward.accum)
1156 ccv_nnc_graph_free(compiled_data->backward.accum);
1157 compiled_data->backward.accum = 0;
1158 if (compiled_data->backward.tensor_arena)
1159 ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena);
1160 compiled_data->backward.tensor_arena = 0;
1161 if (compiled_data->backward.graph_exec_arena)
1162 ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena);
1163 compiled_data->backward.graph_exec_arena = 0;
1164}
1165
1166static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data)
1167{
1168 if (compiled_data->apply_gradients.graph)
1169 ccv_nnc_graph_free(compiled_data->apply_gradients.graph);
1170 compiled_data->apply_gradients.graph = 0;
1171 if (compiled_data->apply_gradients.tensor_arena)
1172 ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena);
1173 compiled_data->apply_gradients.tensor_arena = 0;
1174 if (compiled_data->apply_gradients.graph_exec_arena)
1175 ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena);
1176 compiled_data->apply_gradients.graph_exec_arena = 0;
1177}
1178
1179// Compile the graph to run ccv_cnnp_model_fit
1180static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1181{
1182 int i, j;
1183 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1184 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE"
, "ccv_cnnp_model.c", 1184, __extension__ __PRETTY_FUNCTION__
); }))
;
1185 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE;
1186 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1187 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1187, __extension__ __PRETTY_FUNCTION__
); }))
;
1188 assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__
({ if (!fits || output_size == fit_size) ; else __assert_fail
("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1188
, __extension__ __PRETTY_FUNCTION__); }))
;
1189 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1189, __extension__ __PRETTY_FUNCTION__
); }))
;
1190 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1191 {
1192 _ccv_cnnp_model_set_rewindables(model);
1193 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1194 } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) {
1195 _ccv_cnnp_model_rewind_graph(model);
1196 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1197 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1198 _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size);
1199 }
1200 const int tensors_init = !!compiled_data->tensors_init.v;
1201 if (!tensors_init)
1202 ccv_cnnp_model_tensors_init(model, compiled_data);
1203 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1204 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1204, __extension__ __PRETTY_FUNCTION__); }))
;
1205 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1205, __extension__ __PRETTY_FUNCTION__); }))
;
1206 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1206
, __extension__ __PRETTY_FUNCTION__); }))
;
1207 const int input_size_per_p = input_size / parallel_count;
1208 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1209 const int output_size_per_p = output_size / parallel_count;
1210 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1211 const int fit_size_per_p = fit_size / parallel_count;
1212 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds);
1213 const int parameter_size = compiled_data->parameters->rnum;
1214 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1215 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1216 const int internal_size = compiled_data->internals->rnum;
1217 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1218 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1219 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1220 ccv_array_free(tensor_binds);
1221 if (tensors_init && parallel_count > 1)
1222 _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1223 // If tensor is not init'ed, we need to init states first.
1224 if (_ccv_cnnp_any_to_init(compiled_data))
1225 {
1226 ccv_nnc_tensor_init_states_t tensor_init_states = {
1227 .parallel_count = parallel_count,
1228 .graph = model->graph,
1229 .compiled_data = compiled_data,
1230 .tensor_arena = compiled_data->tensor_arena
1231 };
1232 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1233 }
1234 compiled_data->is_test = 0;
1235 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer);
1236 // No need to set because it is default to training mode.
1237 // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1238 for (i = 0; i < saved_aux_size * parameter_size; i++)
1239 {
1240 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source);
1241 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1242 for (j = 1; j < parallel_count; j++)
1243 {
1244 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1245 if (copy)
1246 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1247 }
1248 }
1249 const int evaluate_to_size = compiled_data->evaluate.to_size;
1250 compiled_data->evaluate.to_op_size = 0;
1251 for (i = 0; i < evaluate_to_size; i++)
1252 {
1253 ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1254 if (to.graph)
1255 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to;
1256 }
1257 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type);
1258 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1259}
1260
1261ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model)
1262{
1263 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1264 if (!compiled_data || !compiled_data->graph)
1265 return 0;
1266 return ccv_nnc_graph_default_stream(compiled_data->graph);
1267}
1268
1269uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model)
1270{
1271 const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1272 if (!compiled_data || !compiled_data->tensor_arena)
1273 return 0;
1274 return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena);
1275}
1276
1277static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
1278{
1279 int i, j;
1280 for (i = 0; i < tensor_size; i++)
1281 {
1282 ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i];
1283 if (graph)
1284 {
1285 const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol);
1286 if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL)
1287 tensor_symbol = alias_to;
1288 }
1289 ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]);
1290 for (j = 1; j < parallel_count; j++)
1291 {
1292 const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j);
1293 if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL)
1294 ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]);
1295 }
1296 }
1297}
1298
1299void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1300{
1301 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1302 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1302, __extension__ __PRETTY_FUNCTION__); }))
;
1303 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1304 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1304, __extension__ __PRETTY_FUNCTION__
); }))
;
1305 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1305, __extension__ __PRETTY_FUNCTION__
); }))
;
1306 assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__
({ if (!fits || fit_size == output_size) ; else __assert_fail
("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1306
, __extension__ __PRETTY_FUNCTION__); }))
;
1307 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1307, __extension__ __PRETTY_FUNCTION__); }))
;
1308 if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)
1309 {
1310 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1311 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1312 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
1313 // Compile the symbolic graph down only when needed.
1314 _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size);
1315 } else {
1316 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1316, __extension__ __PRETTY_FUNCTION__); }))
;
1317 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1317, __extension__ __PRETTY_FUNCTION__); }))
;
1318 assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__
({ if ((fit_size % parallel_count) == 0) ; else __assert_fail
("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1318
, __extension__ __PRETTY_FUNCTION__); }))
;
1319 const int input_size_per_p = input_size / parallel_count;
1320 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1321 const int output_size_per_p = output_size / parallel_count;
1322 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1323 const int fit_size_per_p = fit_size / parallel_count;
1324 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count);
1325 }
1326 if (compiled_data->is_test)
1327 {
1328 compiled_data->is_test = 0;
1329 ccv_nnc_graph_exec_update_t update = {
1330 .parallel_count = parallel_count,
1331 .graph = model->graph,
1332 .graph_exec_arena = compiled_data->graph_exec_arena,
1333 };
1334 ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update);
1335 }
1336 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1337}
1338
1339// Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD).
1340static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1341{
1342 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1343 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD;
1344 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1345 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1345, __extension__ __PRETTY_FUNCTION__
); }))
;
1346 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1346, __extension__ __PRETTY_FUNCTION__
); }))
;
1347 // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather,
1348 // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel.
1349 if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1350 {
1351 const int evaluate_to_size = compiled_data->evaluate.to_size;
1352 compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count);
1353 _ccv_cnnp_model_set_rewindables(model);
1354 ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count,
1355 0, 0,
1356 0, 0, 0,
1357 0, 0, 0,
1358 CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1359 SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size
(model->graph)
);
1360 ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1361 int i, j;
1362 for (i = 0; i < evaluate_to_size; i++)
1363 for (j = 1; j < parallel_count; j++)
1364 {
1365 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j);
1366 if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL)
1367 compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy;
1368 }
1369 }
1370 const int tensors_init = !!compiled_data->tensors_init.v;
1371 if (!tensors_init)
1372 ccv_cnnp_model_tensors_init(model, compiled_data);
1373 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1374 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1374, __extension__ __PRETTY_FUNCTION__); }))
;
1375 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1375, __extension__ __PRETTY_FUNCTION__); }))
;
1376 const int input_size_per_p = input_size / parallel_count;
1377 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1378 const int output_size_per_p = output_size / parallel_count;
1379 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1380 const int parameter_size = compiled_data->parameters->rnum;
1381 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1382 const int internal_size = compiled_data->internals->rnum;
1383 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1384 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1385 // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation.
1386 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1387 ccv_array_free(tensor_binds);
1388 // If tensor is not init'ed, we need to init states first.
1389 if (tensors_init && parallel_count > 1)
1390 _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1391 if (_ccv_cnnp_any_to_init(compiled_data))
1392 {
1393 ccv_nnc_tensor_init_states_t tensor_init_states = {
1394 .parallel_count = parallel_count,
1395 .graph = model->graph,
1396 .compiled_data = compiled_data,
1397 .tensor_arena = compiled_data->tensor_arena
1398 };
1399 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1400 }
1401 compiled_data->is_test = 1;
1402 ccv_nnc_graph_exec_update_t update = {
1403 .parallel_count = parallel_count,
1404 .graph = model->graph,
1405 .graph_exec_arena = compiled_data->graph_exec_arena,
1406 };
1407 ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update);
1408 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type);
1409 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1410}
1411
1412static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
1413{
1414 assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0
), __extension__ ({ if (!compiled_data->tensors.gradients)
; else __assert_fail ("!compiled_data->tensors.gradients"
, "ccv_cnnp_model.c", 1414, __extension__ __PRETTY_FUNCTION__
); }))
;
1415 const int parameter_size = compiled_data->parameters->rnum;
1416 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1417 compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count);
1418 compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count;
1419 int i, j;
1420 for (i = 0; i < parameter_size; i++)
1421 {
1422 const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
1423 ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
1424 CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff
) << 8))
;
1425 compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0);
1426 compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it.
1427 for (j = 1; j < parallel_count; j++)
1428 {
1429 CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff
) << 8))
;
1430 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1431 compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0;
1432 }
1433 }
1434}
1435
1436static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size)
1437{
1438 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
1439 return 1;
1440 if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
1441 return 0;
1442 int i;
1443 for (i = 0; i < input_size; i++)
1444 if (!(disable_outgrad & ((uint64_t)1 << i)))
1445 return 0;
1446 return 1;
1447}
1448
1449// Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1450// Particularly, this method compiles the evaluation and backprop graph (the main graph).
1451static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
1452{
1453 int i, j;
1454 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1455 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1456 assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data->
graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data
->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__
({ if (!compiled_data->graph || compiled_data->graph_mode
!= CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->
gradient_mode != target_gradient_mode) ; else __assert_fail (
"!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode"
, "ccv_cnnp_model.c", 1456, __extension__ __PRETTY_FUNCTION__
); }))
;
1457 compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE;
1458 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1459 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1459, __extension__ __PRETTY_FUNCTION__
); }))
;
1460 assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ (
{ if (output_size > 0) ; else __assert_fail ("output_size > 0"
, "ccv_cnnp_model.c", 1460, __extension__ __PRETTY_FUNCTION__
); }))
;
1461 // There shouldn't be a loss function if we evaluate with multistage jit.
1462 assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ?
1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP"
, "ccv_cnnp_model.c", 1462, __extension__ __PRETTY_FUNCTION__
); }))
;
1463 if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)
1464 {
1465 _ccv_cnnp_model_set_rewindables(model);
1466 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1467 } else if (compiled_data->gradient_mode != target_gradient_mode) {
1468 _ccv_cnnp_model_rewind_graph(model);
1469 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
1470 compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE;
1471 _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here.
1472 }
1473 const int tensors_init = !!compiled_data->tensors_init.v;
1474 if (!tensors_init)
1475 ccv_cnnp_model_tensors_init(model, compiled_data);
1476 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1477 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1477, __extension__ __PRETTY_FUNCTION__); }))
;
1478 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1478, __extension__ __PRETTY_FUNCTION__); }))
;
1479 const int input_size_per_p = input_size / parallel_count;
1480 _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds);
1481 const int output_size_per_p = output_size / parallel_count;
1482 _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds);
1483 const int parameter_size = compiled_data->parameters->rnum;
1484 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1485 const int internal_size = compiled_data->internals->rnum;
1486 _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count);
1487 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + (
size_t)(compiled_data->internals)->rsize * (size_t)(0))
)
, compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds);
1488 if (!compiled_data->tensors.gradients)
1489 _ccv_cnnp_model_gradient_tensors_init(model, compiled_data);
1490 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1491 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size
(model->graph)
, compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena);
1492 ccv_array_free(tensor_binds);
1493 if (tensors_init && parallel_count > 1)
1494 _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count);
1495 // If tensor is not init'ed, we need to init states first.
1496 if (_ccv_cnnp_any_to_init(compiled_data))
1497 {
1498 ccv_nnc_tensor_init_states_t tensor_init_states = {
1499 .parallel_count = parallel_count,
1500 .graph = model->graph,
1501 .compiled_data = compiled_data,
1502 .tensor_arena = compiled_data->tensor_arena
1503 };
1504 ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states);
1505 }
1506 compiled_data->is_test = is_test;
1507 ccv_nnc_graph_exec_update_t update = {
1508 .parallel_count = parallel_count,
1509 .graph = model->graph,
1510 .graph_exec_arena = compiled_data->graph_exec_arena,
1511 };
1512 ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1513 const int evaluate_to_size = compiled_data->evaluate.to_size;
1514 compiled_data->evaluate.to_op_size = 0;
1515 ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0);
1516 for (i = 0; i < evaluate_to_size; i++)
1517 {
1518 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]);
1519 if (to_op.graph)
1520 compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op;
1521 const int* tos;
1522 int to_size;
1523 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size);
1524 for (j = 0; j < to_size; j++)
1525 {
1526 ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1527 .d = tos[j],
1528 .graph = model->graph
1529 });
1530 if (to_op.graph)
1531 ccv_array_add_unique_int(backward_from, to_op.d);
1532 }
1533 }
1534 assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__
({ if (backward_from->rnum > 0) ; else __assert_fail (
"backward_from->rnum > 0", "ccv_cnnp_model.c", 1534, __extension__
__PRETTY_FUNCTION__); }))
;
1535 compiled_data->backward.from_op_size = backward_from->rnum;
1536 compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum);
1537 for (i = 0; i < backward_from->rnum; i++)
1538 compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){
1539 .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from
)->rsize * (size_t)(i)))
,
1540 .graph = compiled_data->graph,
1541 };
1542 ccv_array_free(backward_from);
1543 ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type);
1544 ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0);
1545}
1546
1547void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1548{
1549 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1550 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1550, __extension__ __PRETTY_FUNCTION__); }))
;
1551 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1552 assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count
) ? 1 : 0), __extension__ ({ if (output_size == model->output_size
* parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1552, __extension__ __PRETTY_FUNCTION__
); }))
;
1553 assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count
) ? 1 : 0), __extension__ ({ if (input_size == model->input_size
* parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count"
, "ccv_cnnp_model.c", 1553, __extension__ __PRETTY_FUNCTION__
); }))
;
1554 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1554, __extension__ __PRETTY_FUNCTION__); }))
;
1555 const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS;
1556 const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad));
1557 if (!compiled_data->graph || mode_mismatch)
1558 {
1559 _ccv_cnnp_compiled_data_graph_free(compiled_data);
1560 if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad.
1561 _ccv_cnnp_compiled_data_backward_free(compiled_data);
1562 if (params.requires_grad)
1563 _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size);
1564 else
1565 _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size);
1566 } else {
1567 ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena);
1568 assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0)
, __extension__ ({ if ((input_size % parallel_count) == 0) ; else
__assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1568, __extension__ __PRETTY_FUNCTION__); }))
;
1569 const int input_size_per_p = input_size / parallel_count;
1570 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count);
1571 assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0
), __extension__ ({ if ((output_size % parallel_count) == 0) ;
else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c"
, 1571, __extension__ __PRETTY_FUNCTION__); }))
;
1572 const int output_size_per_p = output_size / parallel_count;
1573 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count);
1574 }
1575 if (compiled_data->is_test != params.is_test)
1576 {
1577 compiled_data->is_test = params.is_test;
1578 ccv_nnc_graph_exec_update_t update = {
1579 .parallel_count = parallel_count,
1580 .graph = model->graph,
1581 .graph_exec_arena = compiled_data->graph_exec_arena,
1582 };
1583 ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
1584 }
1585 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
1586 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
1587 else {
1588 if (!compiled_data->evaluate.schedule)
1589 compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size);
1590 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context);
1591 }
1592}
1593
1594// Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE).
1595// Particularly, this method compiles the accumulator graph.
1596static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model)
1597{
1598 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1599 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1599, __extension__ __PRETTY_FUNCTION__); }))
;
1600 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1600, __extension__ __PRETTY_FUNCTION__
); }))
;
1601 ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new();
1602 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1603 const int parameter_size = compiled_data->parameters->rnum;
1604 int i, j;
1605 compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3);
1606 compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count;
1607 compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count;
1608 for (i = 0; i < parameter_size; i++)
1609 for (j = 0; j < parallel_count; j++)
1610 {
1611 const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info;
1612 // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them.
1613 compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size];
1614 compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
1615 ccv_nnc_tensor_symbol_t inputs[2];
1616 inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1617 inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1618 ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0);
1619 ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0);
1620 }
1621 ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1622 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1623 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1624 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds);
1625 _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds);
1626 ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size
(accum)
, SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size
(accum)
, &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena);
1627 ccv_nnc_symbolic_graph_free(accum);
1628 ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type);
1629 ccv_array_free(tensor_binds);
1630}
1631
1632void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
1633{
1634 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1635 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1635, __extension__ __PRETTY_FUNCTION__); }))
;
1636 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1636, __extension__ __PRETTY_FUNCTION__
); }))
;
1637 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1638 assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model->
output_size * parallel_count) ? 1 : 0), __extension__ ({ if (
ingrad_size == 0 || ingrad_size == model->output_size * parallel_count
) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count"
, "ccv_cnnp_model.c", 1638, __extension__ __PRETTY_FUNCTION__
); }))
;
1639 if (outgrad_size > 0)
1640 { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size
* parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size
== compiled_data->outgrad_size * parallel_count) ; else __assert_fail
("outgrad_size == compiled_data->outgrad_size * parallel_count"
, "ccv_cnnp_model.c", 1640, __extension__ __PRETTY_FUNCTION__
); }))
; }
1641 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1641, __extension__ __PRETTY_FUNCTION__); }))
;
1642 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1642, __extension__ __PRETTY_FUNCTION__
); }))
;
1643 const int parameter_size = compiled_data->parameters->rnum;
1644 // If we need to accumulate the gradients now, do jit on accumulator.
1645 if (compiled_data->backward.count > 0)
1646 {
1647 if (!compiled_data->backward.accum)
1648 _ccv_cnnp_model_multistage_jit_1(model);
1649 else if (compiled_data->backward.count == 1) {
1650 // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly).
1651 int i;
1652 ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena);
1653 for (i = 0; i < parameter_size * parallel_count; i++)
1654 {
1655 ccv_nnc_tensor_t* tensor;
1656 CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), (
compiled_data->tensors.accum_gradients[i]) = (compiled_data
->tensors.gradients[i]), (compiled_data->tensors.gradients
[i]) = (tensor))
;
1657 }
1658 // Do rebind in case we messed up the binding (we switch accum_gradients and gradients).
1659 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1);
1660 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1661 _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1);
1662 }
1663 }
1664 const int ingrad_size_per_p = model->output_size;
1665 const int outgrad_size_per_p = compiled_data->outgrad_size;
1666 int i, j;
1667 for (i = 0; i < ingrad_size_per_p; i++)
1668 {
1669 const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]);
1670 if (!ingrad_size || !ingrads || ingrads[i] == 0)
1671 {
1672 // Set it to 1 if it is not specified.
1673 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad);
1674 if (ingrad_tensor)
1675 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1676 for (j = 1; j < parallel_count; j++)
1677 {
1678 ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j));
1679 if (ingrad_tensor)
1680 ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
1681 }
1682 } else {
1683 // Make sure the length matches, in case it is an alias.
1684 assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) ==
ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->
graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params
(model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))"
, "ccv_cnnp_model.c", 1684, __extension__ __PRETTY_FUNCTION__
); }))
;
1685 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]);
1686 for (j = 1; j < parallel_count; j++)
1687 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]);
1688 }
1689 }
1690 if (outgrad_size > 0)
1691 {
1692 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
&& "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS &&
"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad"
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\""
, "ccv_cnnp_model.c", 1692, __extension__ __PRETTY_FUNCTION__
); }))
;
1693 for (i = 0; i < outgrad_size_per_p; i++)
1694 if (outgrads[i])
1695 {
1696 const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i];
1697 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]);
1698 for (j = 1; j < parallel_count; j++)
1699 ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]);
1700 }
1701 } else {
1702 assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1703, __extension__ __PRETTY_FUNCTION__
); }))
1703 compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES
|| compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode
== CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data
->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS
) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS"
, "ccv_cnnp_model.c", 1703, __extension__ __PRETTY_FUNCTION__
); }))
;
1704 }
1705 // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients.
1706 // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these
1707 // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching.
1708 _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1709 if (!compiled_data->backward.schedule)
1710 compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0);
1711 // Run the backward pass.
1712 ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context);
1713 // If we need to run accumulation round, do that now.
1714 if (compiled_data->backward.count > 0)
1715 ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context);
1716 // Update the count, this determines whether we need to accumulate or not.
1717 ++compiled_data->backward.count;
1718}
1719
1720// Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE).
1721// Particularly, this method compiles the parameter update graph.
1722static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model)
1723{
1724 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1725 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1725, __extension__ __PRETTY_FUNCTION__
); }))
;
1726 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1727 const int parameter_size = compiled_data->parameters->rnum;
1728 ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
1729 _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
0)))
, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1730 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds);
1731 // Bind accumulated gradients.
1732 if (compiled_data->backward.count > 1)
1733 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds);
1734 else
1735 _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds);
1736 ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0);
1737 int i, j;
1738 for (i = 0; i < compiled_data->backward.to_size; i++)
1739 {
1740 const int* tos;
1741 int to_size;
1742 ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size);
1743 for (j = 0; j < to_size; j++)
1744 {
1745 // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply
1746 // gradients graph.
1747 const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){
1748 .d = tos[j],
1749 .graph = model->graph,
1750 });
1751 if (!exec.graph)
1752 ccv_array_add_unique_int(apply_gradients_from, tos[j]);
1753 }
1754 }
1755 const int from_size = apply_gradients_from->rnum;
1756 ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size);
1757 for (i = 0; i < from_size; i++)
1758 froms[i] = (ccv_nnc_graph_exec_symbol_t){
1759 .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t
)(apply_gradients_from)->rsize * (size_t)(i)))
,
1760 .graph = model->graph
1761 };
1762 ccv_array_free(apply_gradients_from);
1763 // It can only ends with updates on the parameters.
1764 ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0);
1765 for (i = 0; i < parameter_size; i++)
1766 {
1767 ccv_array_push(tos, &compiled_data->update_nodes[i]);
1768 for (j = 1; j < parallel_count; j++)
1769 {
1770 const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j);
1771 ccv_array_push(tos, &copy);
1772 }
1773 }
1774 ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds
)->rsize * (size_t)(0)))
, tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize *
(size_t)(0)))
, tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena);
1775 ccv_array_free(tos);
1776 ccv_array_free(tensor_binds);
1777 ccfreefree(froms);
1778 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
1779 for (i = 0; i < max_saved_aux_size * parameter_size; i++)
1780 {
1781 // Skip on no tensor.
1782 if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL)
1783 continue;
1784 ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source);
1785 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0);
1786 for (j = 1; j < parallel_count; j++)
1787 {
1788 ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j));
1789 if (copy)
1790 ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, 0, &copy, 1, 0);
1791 }
1792 }
1793 ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type);
1794}
1795
1796void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context)
1797{
1798 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1799 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 1799, __extension__ __PRETTY_FUNCTION__); }))
;
1800 assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE
) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode
== CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail
("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE"
, "ccv_cnnp_model.c", 1800, __extension__ __PRETTY_FUNCTION__
); }))
;
1801 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1802 assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if
(model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c"
, 1802, __extension__ __PRETTY_FUNCTION__); }))
;
1803 assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__
({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph"
, "ccv_cnnp_model.c", 1803, __extension__ __PRETTY_FUNCTION__
); }))
;
1804 // Skip if there is no backward pass.
1805 if (compiled_data->backward.count <= 0)
1806 return;
1807 // Skip if there is no parameters.
1808 if (compiled_data->parameters->rnum == 0)
1809 {
1810 compiled_data->backward.count = 0;
1811 return;
1812 }
1813 if (!compiled_data->apply_gradients.graph)
1814 _ccv_cnnp_model_multistage_jit_2(model);
1815 else {
1816 const int parameter_size = compiled_data->parameters->rnum;
1817 ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena);
1818 // Change to bind accum_gradients if we do gradient accumulation (run backward more than once).
1819 if (compiled_data->backward.count > 1)
1820 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count);
1821 else
1822 _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count);
1823 }
1824 ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context);
1825 // Reset backward count to 0.
1826 compiled_data->backward.count = 0;
1827}
1828
1829void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor)
1830{
1831 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1832 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
1833 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 1833, __extension__ __PRETTY_FUNCTION__
); }))
;
1834 const int tensors_init = !!compiled_data->tensors_init.v;
1835 if (!tensors_init)
1836 ccv_cnnp_model_tensors_init(model, compiled_data);
1837 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1838 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
1839 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
1840 if (param_ref < 0)
1841 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1841
, __extension__ __PRETTY_FUNCTION__); }))
; }
1842 else
1843 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1843, __extension__ __PRETTY_FUNCTION__
); }))
; }
1844 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
1845 ccv_array_free(parameter_indices);
1846 const int parameter_size = compiled_data->parameters->rnum;
1847 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1847
, __extension__ __PRETTY_FUNCTION__); }))
;
1848 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__
); }))
;
1849 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1850 ccv_nnc_tensor_t* const dest = compiled_data->tensors.parameters[d];
1851 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 1851, __extension__
__PRETTY_FUNCTION__); }))
;
1852 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1
)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1853 int i;
1854 for (i = 1; i < parallel_count; i++)
1855 {
1856 ccv_nnc_tensor_t* const copy_tensor = compiled_data->tensors.parameters[d + i * parameter_size];
1857 if (copy_tensor)
1858 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1859 }
1860 // Mark this symbol as init'ed.
1861 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
d)))
)->d;
1862 compiled_data->tensors_init.v[s >> 5] |= (1u << (s & 0x1f));
1863}
1864
1865void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor)
1866{
1867 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
1868 const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel;
1869 assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__
({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0"
, "ccv_cnnp_model.c", 1869, __extension__ __PRETTY_FUNCTION__
); }))
;
1870 assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->tensors.parameters)
; else __assert_fail ("compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1870, __extension__ __PRETTY_FUNCTION__
); }))
;
1871 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1872 ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
1873 const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
1874 if (param_ref < 0)
1875 { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__
({ if (parameter_indices->rnum == 1) ; else __assert_fail
("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1875
, __extension__ __PRETTY_FUNCTION__); }))
; }
1876 else
1877 { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ?
1 : 0), __extension__ ({ if (param_ref < parameter_indices
->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum"
, "ccv_cnnp_model.c", 1877, __extension__ __PRETTY_FUNCTION__
); }))
; }
1878 const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref >= 0 ? param_ref : 0)))
;
1879 ccv_array_free(parameter_indices);
1880 const int parameter_size = compiled_data->parameters->rnum;
1881 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1881
, __extension__ __PRETTY_FUNCTION__); }))
;
1882 assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__
({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size"
, "ccv_cnnp_model.c", 1882, __extension__ __PRETTY_FUNCTION__
); }))
;
1883 // We don't need to consider parallel_count, every parameter on each device is identical.
1884 ccv_nnc_tensor_t* const src = compiled_data->tensors.parameters[d];
1885 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 1885, __extension__
__PRETTY_FUNCTION__); }))
;
1886 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1887}
1888
1889static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref)
1890{
1891 const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel;
1892 assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__
({ if (parameters->param_sel != 0) ; else __assert_fail (
"parameters->param_sel != 0", "ccv_cnnp_model.c", 1892, __extension__
__PRETTY_FUNCTION__); }))
;
1893 ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0);
1894 ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices);
1895 *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref;
1896 return to_parameter_indices;
1897}
1898
1899static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref)
1900{
1901 // If the model is not compiled yet. Compile them now.
1902 if (!model->graph)
1903 {
1904 model->graph = ccv_nnc_symbolic_graph_new();
1905 assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__
({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data"
, "ccv_cnnp_model.c", 1905, __extension__ __PRETTY_FUNCTION__
); }))
;
1906 const int input_size = from_model->input_size;
1907 ccv_nnc_tensor_param_t input_params[input_size];
1908 int i;
1909 for (i = 0; i < input_size; i++)
1910 input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]);
1911 _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss);
1912 model->parallel_count = from_model->parallel_count;
1913 model->memory_compression = from_model->memory_compression;
1914 model->compiled_data->stream_type = from_model->compiled_data->stream_type;
1915 model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer;
1916 model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size;
1917 }
1918 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
1919 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 1919, __extension__ __PRETTY_FUNCTION__
); }))
;
1920 const int to_tensors_init = !!to_compiled_data->tensors_init.v;
1921 if (!to_tensors_init)
1922 ccv_cnnp_model_tensors_init(model, to_compiled_data);
1923 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 1923, __extension__ __PRETTY_FUNCTION__
); }))
;
1924 *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
1925 *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
1926 if (*from_param_ref < 0 && *param_ref >= 0)
1927 { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1
: 0), __extension__ ({ if ((*from_parameter_indices)->rnum
== 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1"
, "ccv_cnnp_model.c", 1927, __extension__ __PRETTY_FUNCTION__
); }))
; }
1928 else if (*from_param_ref >= 0)
1929 { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref <
(*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum"
, "ccv_cnnp_model.c", 1929, __extension__ __PRETTY_FUNCTION__
); }))
; }
1930 if (*param_ref < 0 && *from_param_ref >= 0)
1931 { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0)
, __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else
__assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c"
, 1931, __extension__ __PRETTY_FUNCTION__); }))
; }
1932 else if (*param_ref >= 0)
1933 { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum
) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices
)->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 1933, __extension__ __PRETTY_FUNCTION__
); }))
; }
1934 // Should be exactly the same tensor.
1935 if (*param_ref < 0 && *from_param_ref < 0)
1936 { assert((*from_parameter_indices)->rnum == (*parameter_indices)->rnum)((void) sizeof (((*from_parameter_indices)->rnum == (*parameter_indices
)->rnum) ? 1 : 0), __extension__ ({ if ((*from_parameter_indices
)->rnum == (*parameter_indices)->rnum) ; else __assert_fail
("(*from_parameter_indices)->rnum == (*parameter_indices)->rnum"
, "ccv_cnnp_model.c", 1936, __extension__ __PRETTY_FUNCTION__
); }))
; }
1937}
1938
1939void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
1940{
1941 ccv_array_t* to_parameter_indices;
1942 int to_param_ref;
1943 ccv_array_t* from_parameter_indices;
1944 int from_param_ref;
1945 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref);
1946 // To models.
1947 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
1948 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 1948, __extension__ __PRETTY_FUNCTION__
); }))
;
1949 // From models.
1950 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
1951 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
1952 const int to_parameter_size = to_compiled_data->parameters->rnum;
1953 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
1954 int i, j;
1955 for (i = 0; i < rnum; i++)
1956 {
1957 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
1958 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 1958, __extension__ __PRETTY_FUNCTION__); }))
;
1959 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 1959, __extension__ __PRETTY_FUNCTION__
); }))
;
1960 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
1961 // If the original is not init'ed. We cannot copy from.
1962 if (!(from_compiled_data->tensors_init.v[s >> 5] & (1u << (s & 0x1f))))
1963 continue;
1964 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
1965 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 1965, __extension__ __PRETTY_FUNCTION__); }))
;
1966 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 1966, __extension__ __PRETTY_FUNCTION__
); }))
;
1967 ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d];
1968 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 1968, __extension__
__PRETTY_FUNCTION__); }))
;
1969 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d];
1970 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 1970, __extension__
__PRETTY_FUNCTION__); }))
;
1971 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1972 for (j = 1; j < parallel_count; j++)
1973 {
1974 ccv_nnc_tensor_t* const copy_tensor = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
1975 if (copy_tensor)
1976 ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto
, 0)
, ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
1977 }
1978 // Mark this symbol as init'ed.
1979 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
1980 to_compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
1981 }
1982 ccv_array_free(to_parameter_indices);
1983 ccv_array_free(from_parameter_indices);
1984}
1985
1986static ccv_nnc_stream_context_t* _ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type)
1987{
1988 if (!compiled_data->stream_map)
27
Assuming field 'stream_map' is null
28
Taking true branch
1989 compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map();
29
Calling 'kh_init_stream_map'
31
Returning from 'kh_init_stream_map'
1990 int ret = 0;
1991 khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret
)
;
32
Calling 'kh_put_stream_map'
1992 assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if (
ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c"
, 1992, __extension__ __PRETTY_FUNCTION__); }))
;
1993 ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
1994 // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
1995 if (ret != 0)
1996 {
1997 stream = ccv_nnc_stream_context_new(type);
1998 kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream;
1999 }
2000 return stream;
2001}
2002
2003void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters)
2004{
2005 ccv_array_t* to_parameter_indices;
2006 int to_param_ref;
2007 ccv_array_t* from_parameter_indices;
2008 int from_param_ref;
2009 _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref);
2010 // To models.
2011 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2012 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2012, __extension__ __PRETTY_FUNCTION__
); }))
;
2013 // From models.
2014 const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data;
2015 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2016 const int to_parameter_size = to_compiled_data->parameters->rnum;
2017 const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1;
2018 int i, j;
2019 for (i = 0; i < rnum; i++)
2020 {
2021 const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t
)(from_parameter_indices)->rsize * (size_t)(from_param_ref
>= 0 ? from_param_ref : i)))
;
2022 assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if
(src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c"
, 2022, __extension__ __PRETTY_FUNCTION__); }))
;
2023 assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters
->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data
->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2023, __extension__ __PRETTY_FUNCTION__
); }))
;
2024 const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data
)) + (size_t)(from_compiled_data->parameters)->rsize * (
size_t)(src_d)))
)->d;
2025 // If the original is not init'ed. We cannot copy from.
2026 if (!(from_compiled_data->tensors_init.v[s >> 5] & (1u << (s & 0x1f))))
2027 continue;
2028 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
2029 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2029, __extension__ __PRETTY_FUNCTION__); }))
;
2030 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2030, __extension__ __PRETTY_FUNCTION__
); }))
;
2031 if (parallel_count > 1)
2032 {
2033 ccv_nnc_stream_context_t* streams[parallel_count];
2034 ccv_nnc_stream_signal_t* signal;
2035 if (stream_context)
2036 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2037 for (j = 0; j < parallel_count; j++)
2038 {
2039 ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d + j * to_parameter_size];
2040 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2041 if (!dest || !src)
2042 {
2043 streams[j] = 0;
2044 continue;
2045 }
2046 // At the moment, can only handle them on the same device.
2047 assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest->
info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src->
info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else
__assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)"
, "ccv_cnnp_model.c", 2047, __extension__ __PRETTY_FUNCTION__
); }))
;
2048 assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >>
8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1
: 0), __extension__ ({ if ((((src->info.type) & 0xfff00
) >> 8) == (((dest->info.type) & 0xfff00) >>
8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)"
, "ccv_cnnp_model.c", 2048, __extension__ __PRETTY_FUNCTION__
); }))
;
2049 const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
2050 const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8);
2051 int type = stream_type;
2052 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2053 ccv_nnc_stream_context_t* const stream_0 = _ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
2054 // Wait signal to finish.
2055 if (stream_context)
2056 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2057 ccv_nnc_cmd_exec(cmd, hint, flags, TENSOR_LIST(dest, src)(ccv_nnc_tensor_t* []){dest, src}, (1 +1 +1 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_0);
2058 if (stream_context)
2059 {
2060 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2061 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2062 }
2063 streams[j] = stream_0;
2064 }
2065 // If this should be blocking, blocking it.
2066 if (!stream_context)
2067 for (j = 0; j < parallel_count; j++)
2068 if (streams[j])
2069 ccv_nnc_stream_context_wait(streams[j]);
2070 } else {
2071 ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d];
2072 assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else
__assert_fail ("src", "ccv_cnnp_model.c", 2072, __extension__
__PRETTY_FUNCTION__); }))
;
2073 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d];
2074 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2074, __extension__
__PRETTY_FUNCTION__); }))
;
2075 ccv_nnc_cmd_exec(cmd, hint, flags, TENSOR_LIST(dest, src)(ccv_nnc_tensor_t* []){dest, src}, (1 +1 +1 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2076 }
2077 // Mark this symbol as init'ed.
2078 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data)
) + (size_t)(to_compiled_data->parameters)->rsize * (size_t
)(dest_d)))
)->d;
2079 to_compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
2080 }
2081 ccv_array_free(to_parameter_indices);
2082 ccv_array_free(from_parameter_indices);
2083}
2084
2085void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_stream_context_t* const stream_context)
2086{
2087 int to_param_ref;
2088 ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref);
2089 // To models.
2090 ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data;
2091 assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({
if (to_compiled_data) ; else __assert_fail ("to_compiled_data"
, "ccv_cnnp_model.c", 2091, __extension__ __PRETTY_FUNCTION__
); }))
;
1
Assuming 'to_compiled_data' is non-null
2
Taking true branch
2092 // Tensor has to be inited already.
2093 assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 :
0), __extension__ ({ if (!!to_compiled_data->tensors_init
.v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v"
, "ccv_cnnp_model.c", 2093, __extension__ __PRETTY_FUNCTION__
); }))
;
3
Assuming field 'v' is non-null
4
Taking true branch
2094 assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1
: 0), __extension__ ({ if (to_compiled_data->tensors.parameters
) ; else __assert_fail ("to_compiled_data->tensors.parameters"
, "ccv_cnnp_model.c", 2094, __extension__ __PRETTY_FUNCTION__
); }))
;
5
Assuming field 'parameters' is non-null
6
Taking true branch
2095 // From models.
2096 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
7
Assuming '_a' is > '_b'
8
'?' condition is true
2097 const int to_parameter_size = to_compiled_data->parameters->rnum;
2098 const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1;
9
Assuming 'to_param_ref' is < 0
10
'?' condition is true
2099 int i, j;
2100 for (i = 0; i < rnum; i++)
11
Assuming 'i' is < 'rnum'
12
Loop condition is true. Entering loop body
2101 {
2102 const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t
)(to_parameter_indices)->rsize * (size_t)(to_param_ref >=
0 ? to_param_ref : i)))
;
13
'?' condition is false
2103 assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if
(dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c"
, 2103, __extension__ __PRETTY_FUNCTION__); }))
;
14
Assuming 'dest_d' is >= 0
15
Taking true branch
2104 assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters->
rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data
->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum"
, "ccv_cnnp_model.c", 2104, __extension__ __PRETTY_FUNCTION__
); }))
;
16
Assuming 'dest_d' is < field 'rnum'
17
Taking true branch
2105 if (parallel_count
17.1
'parallel_count' is > 1
17.1
'parallel_count' is > 1
> 1)
18
Taking true branch
2106 {
2107 ccv_nnc_stream_context_t* streams[parallel_count];
2108 ccv_nnc_stream_signal_t* signal;
2109 if (stream_context)
19
Assuming 'stream_context' is null
20
Taking false branch
2110 signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
2111 for (j = 0; j
20.1
'j' is < 'parallel_count'
20.1
'j' is < 'parallel_count'
< parallel_count; j++)
21
Loop condition is true. Entering loop body
2112 {
2113 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size];
2114 if (!dest)
22
Assuming 'dest' is non-null
23
Taking false branch
2115 {
2116 streams[j] = 0;
2117 continue;
2118 }
2119 const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
24
Assuming the condition is false
25
'?' condition is false
2120 const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8);
2121 int type = stream_type;
2122 CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff
) << 8))
;
2123 ccv_nnc_stream_context_t* const stream_0 = _ccv_cnnp_compiled_data_get_stream(to_compiled_data, type);
26
Calling '_ccv_cnnp_compiled_data_get_stream'
2124 // Wait signal to finish.
2125 if (stream_context)
2126 ccv_nnc_stream_context_wait_signal(stream_0, signal);
2127 ccv_nnc_cmd_exec(cmd, hint, flags, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, 0);
2128 if (stream_context)
2129 {
2130 ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
2131 ccv_nnc_stream_context_wait_signal(stream_context, signal);
2132 }
2133 streams[j] = stream_0;
2134 }
2135 // If this should be blocking, blocking it.
2136 if (!stream_context)
2137 for (j = 0; j < parallel_count; j++)
2138 if (streams[j])
2139 ccv_nnc_stream_context_wait(streams[j]);
2140 } else {
2141 ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d];
2142 assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ;
else __assert_fail ("dest", "ccv_cnnp_model.c", 2142, __extension__
__PRETTY_FUNCTION__); }))
;
2143 ccv_nnc_cmd_exec(cmd, hint, flags, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1)
, stream_context);
2144 }
2145 // No need to mark this symbol as init'ed, it is already.
2146 }
2147 ccv_array_free(to_parameter_indices);
2148}
2149
2150ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model)
2151{
2152 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2153 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2153, __extension__ __PRETTY_FUNCTION__); }))
;
2154 return compiled_data->minimize.minimizer;
2155}
2156
2157void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size)
2158{
2159 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2160 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2160, __extension__ __PRETTY_FUNCTION__); }))
;
2161 const int parameter_size = compiled_data->parameters->rnum;
2162 if (parameter_size == 0)
2163 return;
2164 if (reset)
2165 { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size
== 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 &&
set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0"
, "ccv_cnnp_model.c", 2165, __extension__ __PRETTY_FUNCTION__
); }))
; }
2166 const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2167 const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer);
2168 if (saved_aux_size > compiled_data->minimize.max_saved_aux_size)
2169 compiled_data->minimize.max_saved_aux_size = saved_aux_size;
2170 const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size;
2171 // We update all parameters, at this point, we have one minimizer.
2172 if (set_parameters == 0 || set_parameter_size == 0)
2173 compiled_data->minimize.minimizer = minimizer;
2174 int i;
2175 if (set_parameters && set_parameter_size)
2176 {
2177 // I need to save what's the minimizer along with this.
2178 if (!compiled_data->minimize.parameters)
2179 compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0);
2180 ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t));
2181 set_minimizer_for_parameter->minimizer = minimizer;
2182 set_minimizer_for_parameter->parameter_size = set_parameter_size;
2183 memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size);
2184 ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter);
2185 }
2186 // If reset is true, clear the parameters array.
2187 if (reset && compiled_data->minimize.parameters)
2188 {
2189 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2190 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2191 ccv_array_clear(compiled_data->minimize.parameters);
2192 }
2193 if (!compiled_data->update_nodes)
2194 return;
2195 ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph;
2196 assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if
(symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c"
, 2196, __extension__ __PRETTY_FUNCTION__); }))
;
2197 if (saved_aux_size > old_max_saved_aux_size)
2198 {
2199 assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0
), __extension__ ({ if (compiled_data->updated_parameters)
; else __assert_fail ("compiled_data->updated_parameters"
, "ccv_cnnp_model.c", 2199, __extension__ __PRETTY_FUNCTION__
); }))
;
2200 // Reallocate first, move them around later.
2201 compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size);
2202 compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size);
2203 compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size);
2204 // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap.
2205 _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size);
2206 }
2207 int flag = 0;
2208 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2209 if (set_parameters && set_parameter_size)
2210 {
2211 ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
2212 for (i = 0; i < set_parameter_size; i++)
2213 {
2214 const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel;
2215 assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_sel != 0)
; else __assert_fail ("set_parameters[i]->param_sel != 0"
, "ccv_cnnp_model.c", 2215, __extension__ __PRETTY_FUNCTION__
); }))
;
2216 const int old_rnum = parameter_indices->rnum;
2217 ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices);
2218 const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref;
2219 assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0
), __extension__ ({ if (set_parameters[i]->param_ref != 0)
; else __assert_fail ("set_parameters[i]->param_ref != 0"
, "ccv_cnnp_model.c", 2219, __extension__ __PRETTY_FUNCTION__
); }))
;
2220 if (param_ref >= 0)
2221 {
2222 assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices->
rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum <
parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum"
, "ccv_cnnp_model.c", 2222, __extension__ __PRETTY_FUNCTION__
); }))
;
2223 *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(old_rnum)))
= *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(param_ref + old_rnum)))
;
2224 parameter_indices->rnum = old_rnum + 1;
2225 }
2226 }
2227 // We may have duplicated indices, but that is OK, we will set it twice.
2228 for (i = 0; i < parameter_indices->rnum; i++)
2229 {
2230 const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices
)->rsize * (size_t)(i)))
;
2231 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d))
2232 flag = 1;
2233 }
2234 ccv_array_free(parameter_indices);
2235 } else {
2236 for (i = 0; i < parameter_size; i++)
2237 if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i))
2238 flag = 1;
2239 if (compiled_data->minimize.parameters)
2240 if (_ccv_cnnp_apply_parameters_with_minimizer(model))
2241 flag = 1;
2242 }
2243 if (flag)
2244 {
2245 // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph.
2246 if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE)
2247 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2248 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2249 }
2250}
2251
2252void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params)
2253{
2254 ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
2255 assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if
(compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c"
, 2255, __extension__ __PRETTY_FUNCTION__); }))
;
2256 compiled_data->compile_params = compile_params;
2257}
2258
2259void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size)
2260{
2261 if (model->graph && out_size > 0)
2262 ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]);
2263 if (model->compiled_data && model->compiled_data->graph && out_size > 1)
2264 ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]);
2265 if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2)
2266 ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]);
2267 if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3)
2268 ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]);
2269}
2270
2271static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
2272{
2273 int i;
2274 const int parameter_size = compiled_data->parameters->rnum;
2275 ccv_array_free(compiled_data->parameters);
2276 const int internal_size = compiled_data->internals->rnum;
2277 ccv_array_free(compiled_data->internals);
2278 assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum ==
parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data
->ids.parameters->rnum == parameter_size) ; else __assert_fail
("compiled_data->ids.parameters->rnum == parameter_size"
, "ccv_cnnp_model.c", 2278, __extension__ __PRETTY_FUNCTION__
); }))
;
2279 assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size
) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals
->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size"
, "ccv_cnnp_model.c", 2279, __extension__ __PRETTY_FUNCTION__
); }))
;
2280 for (i = 0; i < parameter_size; i++)
2281 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data
)) + (size_t)(compiled_data->ids.parameters)->rsize * (
size_t)(i)))
);
2282 ccv_array_free(compiled_data->ids.parameters);
2283 for (i = 0; i < internal_size; i++)
2284 ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data)
) + (size_t)(compiled_data->ids.internals)->rsize * (size_t
)(i)))
);
2285 ccv_array_free(compiled_data->ids.internals);
2286 const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count
); typeof (1) _b = (1); (_a > _b) ? _a : _b; })
;
2287 if (compiled_data->tensors.parameters)
2288 {
2289 for (i = 0; i < parameter_size * parallel_count; i++)
2290 ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]);
2291 for (i = 0; i < internal_size * parallel_count; i++)
2292 if (compiled_data->tensors.internals[i])
2293 ccv_nnc_tensor_free(compiled_data->tensors.internals[i]);
2294 ccfreefree(compiled_data->tensors.parameters);
2295 }
2296 if (compiled_data->tensors.gradients)
2297 {
2298 for (i = 0; i < parameter_size * parallel_count; i++)
2299 {
2300 ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]);
2301 if (compiled_data->tensors.accum_gradients[i])
2302 ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]);
2303 }
2304 ccfreefree(compiled_data->tensors.gradients);
2305 }
2306 if (compiled_data->minimize.parameters)
2307 {
2308 for (i = 0; i < compiled_data->minimize.parameters->rnum; i++)
2309 ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)->
data)) + (size_t)(compiled_data->minimize.parameters)->
rsize * (size_t)(i)))
);
2310 ccv_array_free(compiled_data->minimize.parameters);
2311 }
2312 if (compiled_data->rewindables)
2313 ccv_array_free(compiled_data->rewindables);
2314 if (compiled_data->tensors_init.v)
2315 ccfreefree(compiled_data->tensors_init.v);
2316 if (compiled_data->evaluate.tos)
2317 ccfreefree(compiled_data->evaluate.tos);
2318 compiled_data->evaluate.tos = 0;
2319 if (compiled_data->stream_map)
2320 {
2321 khiter_t k;
2322 for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k)
2323 {
2324 if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>>
(((k)&0xfU)<<1))&3))
)
2325 continue;
2326 ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]);
2327 ccv_nnc_stream_context_free(stream);
2328 }
2329 kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map);
2330 }
2331 _ccv_cnnp_compiled_data_graph_free(compiled_data);
2332 _ccv_cnnp_compiled_data_gradient_free(compiled_data);
2333 _ccv_cnnp_compiled_data_backward_free(compiled_data);
2334 _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data);
2335 ccfreefree(compiled_data);
2336}
2337
2338void ccv_cnnp_model_free(ccv_cnnp_model_t* const model)
2339{
2340 if (model->isa->deinit)
2341 model->isa->deinit(model);
2342 if (model->io)
2343 {
2344 int i;
2345 for (i = 0; i < model->io->rnum; i++)
2346 {
2347 ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model->
io)->rsize * (size_t)(i)))
;
2348 if (model_io->outgoings)
2349 ccv_array_free(model_io->outgoings);
2350 if (model_io->incomings)
2351 ccv_array_free(model_io->incomings);
2352 ccfreefree(model_io);
2353 }
2354 ccv_array_free(model->io);
2355 }
2356 if (model->parameter_indices)
2357 ccv_array_free(model->parameter_indices);
2358 if (model->inputs)
2359 ccfreefree(model->inputs);
2360 if (model->graph)
2361 ccv_nnc_symbolic_graph_free(model->graph);
2362 if (model->compiled_data)
2363 _ccv_cnnp_compiled_data_free(model, model->compiled_data);
2364 if (model->name)
2365 ccfreefree(model->name);
2366 ccfreefree(model);
2367}

./_ccv_nnc_stream.h

1/**********************************************************
2 * C-based/Cached/Core Computer Vision Library
3 * Liu Liu, 2010-02-01
4 **********************************************************/
5
6/**********************************************************
7 * CCV - Neural Network Collection
8 **********************************************************/
9
10#ifndef GUARD_ccv_nnc_stream_internal_h
11#define GUARD_ccv_nnc_stream_internal_h
12
13#include "ccv_nnc.h"
14#include "co.h"
15#include "3rdparty/khash/khash.h"
16
17struct ccv_nnc_stream_signal_s {
18 int type;
19 ccv_nnc_stream_context_t* emit_context;
20};
21
22typedef struct {
23 // Empty, this will hold things such as NCCL communicator in subclass.
24} ccv_nnc_stream_resource_container_t;
25
26struct ccv_nnc_stream_context_s {
27 int type;
28 // For resource container
29 ccv_nnc_stream_resource_container_t* _inline_container[1];
30 ccv_nnc_stream_resource_container_t** resource_container;
31 // For scheduler
32 co_routine_t* main; // main task.
33 co_scheduler_t* scheduler;
34 // For neighbor discovery
35 ccv_nnc_stream_context_neighbor_discovery_f neighbor_discovery;
36 void* neighbor_discovery_context;
37 // For hooks
38 ccv_array_t* destructor_hooks;
39 int reuse_destructor_hook;
40 ccv_nnc_stream_signal_t* event;
41 ccv_nnc_stream_signal_t* checkpoint;
42};
43
44// Return the scheduler from a stream (if not created, create one).
45CCV_WARN_UNUSED(co_scheduler_t*)co_scheduler_t* __attribute__((warn_unused_result)) ccv_nnc_stream_context_get_scheduler(ccv_nnc_stream_context_t* const stream_context);
46
47#define co_stream_await(_stream)do { if (!_co_stream_await(_self_, _stream)) { return (co_state_t
){ 47, 0 }; } case 47: ; } while (0)
do { if (!_co_stream_await(_self_, _stream)) { return (co_state_t){ __LINE__47, 0 }; } case __LINE__47: ; } while (0)
48int _co_stream_await(co_routine_t* const self, ccv_nnc_stream_context_t* const stream);
49
50typedef struct {
51 ccv_nnc_callback_f fn;
52 void* callback_context;
53} ccv_nnc_async_callback_t;
54
55typedef void(*ccv_nnc_async_callback_f)(ccv_nnc_async_callback_t* const async);
56
57KHASH_MAP_INIT_INT(stream_map, ccv_nnc_stream_context_t*)typedef struct kh_stream_map_s { khint_t n_buckets, size, n_occupied
, upper_bound; khint32_t *flags; khint32_t *keys; ccv_nnc_stream_context_t
* *vals; } kh_stream_map_t; static inline __attribute__ ((__unused__
)) kh_stream_map_t *kh_init_stream_map(void) { return (kh_stream_map_t
*)calloc(1,sizeof(kh_stream_map_t)); } static inline __attribute__
((__unused__)) void kh_destroy_stream_map(kh_stream_map_t *h
) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_stream_map(kh_stream_map_t *h) {
if (h && h->flags) { memset(h->flags, 0xaa, ((
h->n_buckets) < 16? 1 : (h->n_buckets)>>4) * sizeof
(khint32_t)); h->size = h->n_occupied = 0; } } static inline
__attribute__ ((__unused__)) khint_t kh_get_stream_map(const
kh_stream_map_t *h, khint32_t key) { if (h->n_buckets) { khint_t
k, i, last, mask, step = 0; mask = h->n_buckets - 1; k = (
khint32_t)(key); i = k & mask; last = i; while (!((h->
flags[i>>4]>>((i&0xfU)<<1))&2) &&
(((h->flags[i>>4]>>((i&0xfU)<<1))&
1) || !((h->keys[i]) == (key)))) { i = (i + (++step)) &
mask; if (i == last) return h->n_buckets; } return ((h->
flags[i>>4]>>((i&0xfU)<<1))&3)? h->
n_buckets : i; } else return 0; } static inline __attribute__
((__unused__)) int kh_resize_stream_map(kh_stream_map_t *h, khint_t
new_n_buckets) { khint32_t *new_flags = 0; khint_t j = 1; { (
--(new_n_buckets), (new_n_buckets)|=(new_n_buckets)>>1,
(new_n_buckets)|=(new_n_buckets)>>2, (new_n_buckets)|=
(new_n_buckets)>>4, (new_n_buckets)|=(new_n_buckets)>>
8, (new_n_buckets)|=(new_n_buckets)>>16, ++(new_n_buckets
)); if (new_n_buckets < 4) new_n_buckets = 4; if (h->size
>= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0
; else { new_flags = (khint32_t*)malloc(((new_n_buckets) <
16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t)); if (
!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { khint32_t *new_keys
= (khint32_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(khint32_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { ccv_nnc_stream_context_t* *new_vals
= (ccv_nnc_stream_context_t**)realloc((void *)h->vals,new_n_buckets
* sizeof(ccv_nnc_stream_context_t*)); if (!new_vals) { free(
new_flags); return -1; } h->vals = new_vals; } } } } if (j
) { for (j = 0; j != h->n_buckets; ++j) { if (((h->flags
[j>>4]>>((j&0xfU)<<1))&3) == 0) { khint32_t
key = h->keys[j]; ccv_nnc_stream_context_t* val; khint_t new_mask
; new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (
h->flags[j>>4]|=1ul<<((j&0xfU)<<1));
while (1) { khint_t k, i, step = 0; k = (khint32_t)(key); i =
k & new_mask; while (!((new_flags[i>>4]>>((i
&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { khint32_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { ccv_nnc_stream_context_t* tmp = h->vals[i]; h->vals
[i] = val; val = tmp; } (h->flags[i>>4]|=1ul<<
((i&0xfU)<<1)); } else { h->keys[i] = key; if (1
) h->vals[i] = val; break; } } } } if (h->n_buckets >
new_n_buckets) { h->keys = (khint32_t*)realloc((void *)h->
keys,new_n_buckets * sizeof(khint32_t)); if (1) h->vals = (
ccv_nnc_stream_context_t**)realloc((void *)h->vals,new_n_buckets
* sizeof(ccv_nnc_stream_context_t*)); } free(h->flags); h
->flags = new_flags; h->n_buckets = new_n_buckets; h->
n_occupied = h->size; h->upper_bound = (khint_t)(h->
n_buckets * __ac_HASH_UPPER + 0.5); } return 0; } static inline
__attribute__ ((__unused__)) khint_t kh_put_stream_map(kh_stream_map_t
*h, khint32_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_stream_map(h, h->n_buckets - 1
) < 0) { *ret = -1; return h->n_buckets; } } else if (kh_resize_stream_map
(h, h->n_buckets + 1) < 0) { *ret = -1; return h->n_buckets
; } } { khint_t k, i, site, last, mask = h->n_buckets - 1,
step = 0; x = site = h->n_buckets; k = (khint32_t)(key); i
= k & mask; if (((h->flags[i>>4]>>((i&
0xfU)<<1))&2)) x = i; else { last = i; while (!((h->
flags[i>>4]>>((i&0xfU)<<1))&2) &&
(((h->flags[i>>4]>>((i&0xfU)<<1))&
1) || !((h->keys[i]) == (key)))) { if (((h->flags[i>>
4]>>((i&0xfU)<<1))&1)) site = i; i = (i +
(++step)) & mask; if (i == last) { x = site; break; } } if
(x == h->n_buckets) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&2) && site != h->n_buckets
) x = site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_stream_map(kh_stream_map_t *h, khint_t x) { if (x != h
->n_buckets && !((h->flags[x>>4]>>(
(x&0xfU)<<1))&3)) { (h->flags[x>>4]|=1ul
<<((x&0xfU)<<1)); --h->size; } }
;
30
Null pointer value stored to field 'flags'
33
Taking true branch
34
Taking false branch
35
Calling 'kh_resize_stream_map'
36
Taking true branch
37
Assuming the condition is true
38
Taking true branch
39
Taking false branch
40
Returning without writing to 'h->flags'
41
Returning from 'kh_resize_stream_map'
42
Taking false branch
43
Array access (via field 'flags') results in a null pointer dereference
58
59#endif