File: | nnc/ccv_cnnp_model.c |
Warning: | line 2468, column 1 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" |
2 | #include "ccv_nnc_easy.h" |
3 | #include "ccv_nnc_internal.h" |
4 | #include "ccv_internal.h" |
5 | #include "_ccv_cnnp_model.h" |
6 | #include "_ccv_nnc_graph.h" |
7 | |
8 | // MARK - Level-5 API |
9 | |
10 | ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size) |
11 | { |
12 | if (!model->io) |
13 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); |
14 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size); |
15 | model_io->param_ref = 0; |
16 | model_io->param_sel = 0; |
17 | model_io->visit = 0; |
18 | model_io->model = model; |
19 | model_io->dependencies = 0; |
20 | model_io->dependents = 0; |
21 | model_io->outgoings = 0; |
22 | model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1); |
23 | ccv_array_push(model->io, &model_io); |
24 | if (input_size > 0) |
25 | { |
26 | model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0); |
27 | ccv_array_resize(model_io->incomings, input_size); |
28 | int i; |
29 | memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t )(model_io->incomings)->rsize * (size_t)(0))), inputs, sizeof(ccv_cnnp_model_io_t) * input_size); |
30 | for (i = 0; i < input_size; i++) |
31 | { |
32 | if (!inputs[i]->outgoings) |
33 | inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); |
34 | ccv_array_push(inputs[i]->outgoings, &model_io); |
35 | } |
36 | } else { |
37 | model_io->incomings = 0; |
38 | } |
39 | return model_io; |
40 | } |
41 | |
42 | void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size) |
43 | { |
44 | assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__ ({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0" , "ccv_cnnp_model.c", 44, __extension__ __PRETTY_FUNCTION__); })); |
45 | if (!model_io->dependencies) |
46 | model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0); |
47 | int i, j; |
48 | for (i = 0; i < dependency_size; i++) |
49 | { |
50 | int flag = 0; |
51 | // Check if it is already exist or not. |
52 | for (j = 0; !flag && j < model_io->dependencies->rnum; j++) |
53 | if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t )(model_io->dependencies)->rsize * (size_t)(j))) == dependencies[i]) |
54 | flag = 1; |
55 | if (flag) |
56 | continue; |
57 | ccv_array_push(model_io->dependencies, dependencies + i); |
58 | ++dependencies[i]->dependents; |
59 | } |
60 | } |
61 | |
62 | int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model) |
63 | { |
64 | return model->output_size; |
65 | } |
66 | |
67 | int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model) |
68 | { |
69 | // If the model is compiled, it is default to 1 unless it is not. |
70 | if (model->compiled_data) |
71 | return model->is_trainable >= 0 ? model->is_trainable : 1; |
72 | return model->is_trainable; |
73 | } |
74 | |
75 | ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index) |
76 | { |
77 | if (!model->io) |
78 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); |
79 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s)); |
80 | model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1; |
81 | model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1; |
82 | model_io->visit = 0; |
83 | model_io->model = model; |
84 | model_io->outputs = 0; |
85 | model_io->dependencies = 0; |
86 | model_io->dependents = 0; |
87 | model_io->incomings = 0; |
88 | model_io->outgoings = 0; |
89 | ccv_array_push(model->io, &model_io); |
90 | return model_io; |
91 | } |
92 | |
93 | void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context) |
94 | { |
95 | model->notify_hook.func = func; |
96 | model->notify_hook.context = context; |
97 | } |
98 | |
99 | void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload) |
100 | { |
101 | if (model->notify_hook.func) |
102 | model->notify_hook.func(model, tag, payload, model->notify_hook.context); |
103 | if (model->isa->notify) |
104 | model->isa->notify(model, tag, payload); |
105 | } |
106 | |
107 | static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size) |
108 | { |
109 | int i, j; |
110 | for (i = 0; i < graph_exec_symbol_size; i++) |
111 | { |
112 | ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i; |
113 | // Check whether this tensor symbol has any duplicate. |
114 | for (j = i + 1; j < graph_exec_symbol_size;) |
115 | { |
116 | ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j; |
117 | // If there is a same tensor symbol, remove it. |
118 | if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph) |
119 | { |
120 | if (j + 1 < graph_exec_symbol_size) |
121 | *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1]; |
122 | --graph_exec_symbol_size; |
123 | continue; |
124 | } |
125 | ++j; |
126 | } |
127 | } |
128 | return graph_exec_symbol_size; |
129 | } |
130 | |
131 | void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable) |
132 | { |
133 | ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context; |
134 | ccv_cnnp_model_t* const model = add_to_array_context->sequence->model; |
135 | int i; |
136 | if (add_to_array_context->add_parameter_indices && !model->parameter_indices) |
137 | model->parameter_indices = ccv_array_new(sizeof(int), 0, 0); |
138 | for (i = 0; i < add_to_array_context->symbols->rnum; i++) |
139 | { |
140 | const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data )) + (size_t)(add_to_array_context->symbols)->rsize * ( size_t)(i))); |
141 | if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph) |
142 | { |
143 | // Only add to parameter_indices if it is trainable. |
144 | if (add_to_array_context->add_parameter_indices) |
145 | ccv_array_add_unique_int(model->parameter_indices, i); |
146 | // Found it, return, don't add it. |
147 | return; |
148 | } |
149 | } |
150 | // Only add to parameter_indices if it is trainable. |
151 | if (add_to_array_context->add_parameter_indices) |
152 | ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum); |
153 | // This is a new one, no need to add_unique_int, it is unique. |
154 | ccv_array_push(add_to_array_context->symbols, &symbol); |
155 | if (add_to_array_context->trainables) |
156 | ccv_array_push(add_to_array_context->trainables, &is_trainable); |
157 | char id[2048]; |
158 | id[0] = add_to_array_context->prefix; |
159 | id[1] = '-'; |
160 | int total_len = 2; |
161 | for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++) |
162 | { |
163 | const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences )->data)) + (size_t)(add_to_array_context->sequence-> sequences)->rsize * (size_t)(i))); |
164 | int len; |
165 | if (name->name && name->name[0] != '\0') |
166 | len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence); |
167 | else |
168 | len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence); |
169 | total_len += len; |
170 | if (total_len >= 2047) |
171 | break; |
172 | } |
173 | if (total_len < 2047) |
174 | total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it); |
175 | assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__ ({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048" , "ccv_cnnp_model.c", 175, __extension__ __PRETTY_FUNCTION__) ; })); |
176 | char *heap_id = (char*)ccmallocmalloc(total_len + 1); |
177 | memcpy(heap_id, id, total_len + 1); |
178 | ccv_array_push(add_to_array_context->ids, &heap_id); |
179 | ++add_to_array_context->sequence->it; |
180 | } |
181 | |
182 | static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints) |
183 | { |
184 | compiled_data->f = compiled_data->fits + output_size; |
185 | compiled_data->xpu_alloc.mp_hdr = -1; |
186 | compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str(); |
187 | compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc(); |
188 | compiled_data->gradient_checkpoints = gradient_checkpoints; |
189 | } |
190 | |
191 | typedef struct { |
192 | void* old_graph_exec_symbol_new_hook_context; |
193 | ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook; |
194 | ccv_nnc_symbolic_graph_t* graph; |
195 | ccv_cnnp_model_build_data_t* build_data; |
196 | } ccv_cnnp_model_set_exec_flags_context_t; |
197 | |
198 | static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) |
199 | { |
200 | ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context; |
201 | if (flags_context->build_data->exec_flags) |
202 | ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags); |
203 | if (flags_context->old_graph_exec_symbol_new_hook) |
204 | flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name); |
205 | } |
206 | |
207 | static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss) |
208 | { |
209 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 209, __extension__ __PRETTY_FUNCTION__); })); |
210 | model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size); |
211 | int i; |
212 | for (i = 0; i < input_size; i++) |
213 | model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0); |
214 | ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); |
215 | ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0); |
216 | ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0); |
217 | ccv_cnnp_model_sequence_t model_sequence = { |
218 | .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank() |
219 | }; |
220 | ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = { |
221 | .add_parameter_indices = 1, |
222 | .prefix = 't', |
223 | .sequence = &model_sequence, |
224 | .symbols = parameters, |
225 | .ids = parameter_ids, |
226 | .trainables = parameter_trainables, |
227 | }; |
228 | ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); |
229 | ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0); |
230 | ccv_cnnp_model_add_to_array_context_t add_to_output_context = { |
231 | .add_parameter_indices = 0, |
232 | .prefix = 'r', |
233 | .sequence = &model_sequence, |
234 | .symbols = internals, |
235 | .ids = internal_ids, |
236 | .trainables = 0, |
237 | }; |
238 | ccv_cnnp_model_build_data_t build_data = { |
239 | .exec_flags = 0, |
240 | .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1, |
241 | .model_sequence = &model_sequence, |
242 | .add_to_array = ccv_cnnp_model_add_to_array, |
243 | .parameters = parameters, |
244 | .context = { |
245 | .add_to_parameter = &add_to_parameter_context, |
246 | .add_to_output = &add_to_output_context, |
247 | }, |
248 | .gradient_checkpoints = 0, |
249 | }; |
250 | model->data = &build_data; |
251 | ccv_cnnp_model_set_exec_flags_context_t flags_context = { |
252 | .graph = model->graph, |
253 | .build_data = &build_data, |
254 | .old_graph_exec_symbol_new_hook = 0, |
255 | .old_graph_exec_symbol_new_hook_context = 0 |
256 | }; |
257 | flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook); |
258 | ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0); |
259 | // Reset back to previous hook. |
260 | ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0); |
261 | for (i = 0; i < model->output_size; i++) |
262 | { |
263 | const ccv_nnc_tensor_symbol_t output = model->outputs[i]; |
264 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output); |
265 | if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL) |
266 | continue; |
267 | // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method |
268 | // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be |
269 | // honest, because we cannot handle cases of alias is part of the original tensor but bind differently). |
270 | const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output); |
271 | model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0); |
272 | ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto , 0), &output, 1, model->outputs + i, 1, "contiguous"); |
273 | ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT); |
274 | } |
275 | model->data = 0; |
276 | kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank); |
277 | if (model_sequence.sequences) |
278 | ccv_array_free(model_sequence.sequences); |
279 | // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that. |
280 | int not_trainables = 0; |
281 | // Assert no parameter is alias. |
282 | for (i = 0; i < parameters->rnum; i++) |
283 | { |
284 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); |
285 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter); |
286 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 286, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. |
287 | if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t )(parameter_trainables)->rsize * (size_t)(i))) == 0) |
288 | not_trainables = 1; |
289 | } |
290 | assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables-> rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables ->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum" , "ccv_cnnp_model.c", 290, __extension__ __PRETTY_FUNCTION__) ; })); |
291 | uint64_t* parameter_flags = 0; |
292 | if (not_trainables) |
293 | { |
294 | parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t)); |
295 | for (i = 0; i < parameter_trainables->rnum; i++) |
296 | if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t )(parameter_trainables)->rsize * (size_t)(i)))) |
297 | parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63)); |
298 | } |
299 | ccv_array_free(parameter_trainables); |
300 | // Assert no internal is alias. |
301 | for (i = 0; i < internals->rnum; i++) |
302 | { |
303 | const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals )->rsize * (size_t)(i))); |
304 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal); |
305 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 305, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. |
306 | } |
307 | const int output_size = model->output_size; |
308 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
309 | const int parameters_rnum = parameters->rnum; |
310 | if (input_size > 0) |
311 | { |
312 | ccv_array_resize(parameters, parameters_rnum + input_size); |
313 | memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(parameters_rnum))), model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t)); |
314 | } |
315 | ccv_nnc_symbolic_graph_simplify(model->graph, |
316 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) |
317 | CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) |
318 | CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) |
319 | CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), |
320 | ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(0))), parameters_rnum + input_size, |
321 | model->outputs, output_size, |
322 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); |
323 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
324 | // Size it down. |
325 | parameters->rnum = parameters_rnum; |
326 | ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1)); |
327 | _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints); |
328 | const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph); |
329 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 329, __extension__ __PRETTY_FUNCTION__) ; })); |
330 | compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); |
331 | memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); |
332 | compiled_data->loss = loss; |
333 | if (loss.cmd == CCV_NNC_NOOP) |
334 | { |
335 | // If no loss function provided, there is no fits. |
336 | for (i = 0; i < output_size; i++) |
337 | { |
338 | compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; |
339 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]); |
340 | if (alias_to.d < 0) |
341 | compiled_data->f[i] = model->outputs[i]; |
342 | else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original. |
343 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; |
344 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
345 | ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc); |
346 | int j; |
347 | for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++) |
348 | { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if ( ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c" , 348, __extension__ __PRETTY_FUNCTION__); })); } // There is no ofs. |
349 | compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet. |
350 | } |
351 | } |
352 | } else { |
353 | for (i = 0; i < output_size; i++) |
354 | { |
355 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]); |
356 | const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0); |
357 | compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0); |
358 | ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit} , (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, ( 1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); |
359 | } |
360 | } |
361 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
362 | ccv_nnc_symbolic_graph_simplify(model->graph, |
363 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), // Only do Ops fusion, in this way, we can fuse the loss function. |
364 | 0, 0, // No need to provide binds at this point. |
365 | compiled_data->f, model->output_size, |
366 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); |
367 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
368 | // If inputs are from GPU, stream type is GPU. |
369 | compiled_data->parameters = parameters; |
370 | compiled_data->parameter_flags = parameter_flags; |
371 | compiled_data->internals = internals; |
372 | compiled_data->ids.parameters = parameter_ids; |
373 | compiled_data->ids.internals = internal_ids; |
374 | ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph); |
375 | } |
376 | |
377 | static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) |
378 | { |
379 | ccv_array_t* const stack = (ccv_array_t*)context; |
380 | ccv_array_push(stack, &symbol.d); |
381 | } |
382 | |
383 | static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) |
384 | { |
385 | const ccv_nnc_tensor_symbol_t src_symbol = { |
386 | .d = src_index, |
387 | .graph = src_graph |
388 | }; |
389 | const ccv_nnc_tensor_symbol_t dest_symbol = { |
390 | .d = dest_index, |
391 | .graph = dest_graph |
392 | }; |
393 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); |
394 | ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params); |
395 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; |
396 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
397 | if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc)) |
398 | ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc); |
399 | } |
400 | |
401 | static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) |
402 | { |
403 | const ccv_nnc_tensor_symbol_t src_symbol = { |
404 | .d = src_index, |
405 | .graph = src_graph |
406 | }; |
407 | const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); |
408 | const ccv_nnc_tensor_symbol_t dest_symbol = { |
409 | .d = dest_index, |
410 | .graph = dest_graph |
411 | }; |
412 | const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol); |
413 | return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0; |
414 | } |
415 | |
416 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size); |
417 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data); |
418 | |
419 | typedef struct { |
420 | int parallel_count; |
421 | ccv_nnc_symbolic_graph_t* graph; |
422 | ccv_nnc_graph_exec_arena_t* graph_exec_arena; |
423 | } ccv_nnc_graph_exec_update_t; |
424 | |
425 | static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint) |
426 | { |
427 | ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context; |
428 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena; |
429 | ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol); |
430 | ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd); |
431 | ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint); |
432 | const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph; |
433 | const int parallel_count = graph_exec_update->parallel_count; |
434 | int i; |
435 | for (i = 1; i < parallel_count; i++) |
436 | { |
437 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i)); |
438 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) |
439 | { |
440 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); |
441 | ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint); |
442 | } |
443 | } |
444 | } |
445 | |
446 | void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size) |
447 | { |
448 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 448, __extension__ __PRETTY_FUNCTION__); })); |
449 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 449, __extension__ __PRETTY_FUNCTION__) ; })); |
450 | assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if (!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c" , 450, __extension__ __PRETTY_FUNCTION__); })); |
451 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
452 | init->graph = ccv_nnc_symbolic_graph_new(); |
453 | ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0); |
454 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0); |
455 | _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss); |
456 | init->parallel_count = model->parallel_count; |
457 | init->memory_compression = model->memory_compression; |
458 | init->memory_reduction = model->memory_reduction; |
459 | init->gradient_checkpointing = model->gradient_checkpointing; |
460 | init->compiled_data->stream_type = model->compiled_data->stream_type; |
461 | init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer; |
462 | init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size; |
463 | if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) |
464 | _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0); |
465 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0); |
466 | ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0); |
467 | int i, j; |
468 | // Verify parameters, internals and saved_aux in both graph has the same dimensionality. |
469 | for (i = 0; i < compiled_data->parameters->rnum; i++) |
470 | { |
471 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; |
472 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 472, __extension__ __PRETTY_FUNCTION__) ; })); |
473 | } |
474 | for (i = 0; i < compiled_data->internals->rnum; i++) |
475 | { |
476 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; |
477 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 477, __extension__ __PRETTY_FUNCTION__) ; })); |
478 | } |
479 | // Update inputs. |
480 | assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size) ? 1 : 0), __extension__ ({ if (model->input_size == init-> input_size) ; else __assert_fail ("model->input_size == init->input_size" , "ccv_cnnp_model.c", 480, __extension__ __PRETTY_FUNCTION__) ; })); |
481 | for (i = 0; i < model->input_size; i++) |
482 | if (model->inputs[i].d >= 0) |
483 | { |
484 | assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0" , "ccv_cnnp_model.c", 484, __extension__ __PRETTY_FUNCTION__) ; })); |
485 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d); |
486 | } |
487 | // Update outputs. |
488 | assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size ) ? 1 : 0), __extension__ ({ if (model->output_size == init ->output_size) ; else __assert_fail ("model->output_size == init->output_size" , "ccv_cnnp_model.c", 488, __extension__ __PRETTY_FUNCTION__) ; })); |
489 | for (i = 0; i < model->output_size; i++) |
490 | { |
491 | if (model->outputs[i].d >= 0) |
492 | { |
493 | assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->outputs[i].d >= 0) ; else __assert_fail ( "init->outputs[i].d >= 0", "ccv_cnnp_model.c", 493, __extension__ __PRETTY_FUNCTION__); })); |
494 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d); |
495 | } |
496 | if (model->outputs[i].d != model->compiled_data->f[i].d) |
497 | { |
498 | assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data ->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[ i].d != init->compiled_data->f[i].d) ; else __assert_fail ("init->outputs[i].d != init->compiled_data->f[i].d" , "ccv_cnnp_model.c", 498, __extension__ __PRETTY_FUNCTION__) ; })); |
499 | if (model->compiled_data->f[i].d >= 0) |
500 | { |
501 | assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->compiled_data->f[i] .d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0" , "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__) ; })); |
502 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d); |
503 | } |
504 | } |
505 | } |
506 | // Go through the graph to set tensor on matching symbols |
507 | for (i = 0; i < stack->rnum; i++) |
508 | { |
509 | const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize * (size_t)(i))); |
510 | // If exceed range, skip. |
511 | if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) || |
512 | d >= ccv_nnc_graph_exec_symbol_count(model->graph)) |
513 | continue; |
514 | const ccv_nnc_graph_exec_symbol_t src_symbol = { |
515 | .d = d, |
516 | .graph = init->graph |
517 | }; |
518 | const ccv_nnc_graph_exec_symbol_t dest_symbol = { |
519 | .d = d, |
520 | .graph = model->graph |
521 | }; |
522 | const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol); |
523 | const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol); |
524 | // If the name doesn't match, skip. |
525 | if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP) |
526 | continue; |
527 | // Now get all the inputs and outputs, if matches, set them. |
528 | const int* src_inputs; |
529 | int src_input_size; |
530 | const int* src_outputs; |
531 | int src_output_size; |
532 | ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size); |
533 | const int* dest_inputs; |
534 | int dest_input_size; |
535 | const int* dest_outputs; |
536 | int dest_output_size; |
537 | ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size); |
538 | // We may have unmatched input / output size because this is the minimizer and it has |
539 | // different saved_aux (for example, when we shrunk with CMD_NOOP). |
540 | if (src_input_size != dest_input_size) |
541 | continue; |
542 | if (src_output_size != dest_output_size) |
543 | continue; |
544 | ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd); |
545 | // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because |
546 | // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original |
547 | // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That |
548 | // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as |
549 | // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec |
550 | // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not |
551 | // a new exec symbol. |
552 | for (j = 0; j < src_input_size; j++) |
553 | if (src_inputs[j] >= 0) |
554 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]); |
555 | for (j = 0; j < src_output_size; j++) |
556 | if (src_outputs[j] >= 0) |
557 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]); |
558 | } |
559 | ccv_array_free(stack); |
560 | // After this, we get all tensors in the model graph resolved through tensor_auto. |
561 | ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0); |
562 | // Verify symbols we get matches. |
563 | const int parameter_size = compiled_data->parameters->rnum; |
564 | for (i = 0; i < parameter_size; i++) |
565 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->parameters)->data)) + (size_t)(compiled_data ->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if ( ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data-> parameters)->data)) + (size_t)(compiled_data->parameters )->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d" , "ccv_cnnp_model.c", 565, __extension__ __PRETTY_FUNCTION__) ; })); } |
566 | const int internal_size = compiled_data->internals->rnum; |
567 | for (i = 0; i < internal_size; i++) |
568 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->internals)->data)) + (size_t)(compiled_data ->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->internals)-> data)) + (size_t)(init->compiled_data->internals)->rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((compiled_data->internals)->data)) + (size_t)(compiled_data->internals)->rsize * (size_t)(i ))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init ->compiled_data->internals)->data)) + (size_t)(init-> compiled_data->internals)->rsize * (size_t)(i))))->d ) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d" , "ccv_cnnp_model.c", 568, __extension__ __PRETTY_FUNCTION__) ; })); } |
569 | // Go through compiled data. |
570 | if (compiled_data->tensor_arena) |
571 | { |
572 | const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph); |
573 | if (flag == 0 && compiled_data->graph_exec_arena) |
574 | { |
575 | ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph); |
576 | // Since we will reinit, if we previously set is_test, we need to set it again. |
577 | if (compiled_data->is_test) |
578 | { |
579 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
580 | ccv_nnc_graph_exec_update_t update = { |
581 | .parallel_count = parallel_count, |
582 | .graph = model->graph, |
583 | .graph_exec_arena = compiled_data->graph_exec_arena, |
584 | }; |
585 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); |
586 | } |
587 | } else |
588 | // Free-up tensor arena & graph exec arena. |
589 | _ccv_cnnp_compiled_data_graph_free(compiled_data); |
590 | } |
591 | // There are other compiled graphs, for accum and apply gradients. |
592 | // However, the main conclusion is, these absorb operations shouldn't impact parameters. |
593 | // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we |
594 | // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot |
595 | // be changed otherwise parameters' shape will be meaningless. The same goes to internals. |
596 | // That is why we don't update these compiled graphs at all this point. |
597 | // Free the model, we've already "absorbed" it. |
598 | ccv_cnnp_model_free(init); |
599 | } |
600 | |
601 | void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss) |
602 | { |
603 | assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model-> input_size == 0) ? 1 : 0), __extension__ ({ if (input_size == model->input_size || model->input_size == 0) ; else __assert_fail ("input_size == model->input_size || model->input_size == 0" , "ccv_cnnp_model.c", 603, __extension__ __PRETTY_FUNCTION__) ; })); |
604 | if (model->input_size == 0) |
605 | model->input_size = input_size; |
606 | if (!model->graph) // The graph is not compiled yet. |
607 | { |
608 | model->graph = ccv_nnc_symbolic_graph_new(); |
609 | _ccv_cnnp_model_compile(model, inputs, input_size, loss); |
610 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 610, __extension__ __PRETTY_FUNCTION__) ; })); |
611 | int i, flag = 0; |
612 | for (i = 0; !flag && i < input_size; i++) |
613 | flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY); |
614 | // If inputs are from GPU, stream type is GPU. |
615 | model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; |
616 | model->compiled_data->minimize.minimizer = minimizer; |
617 | model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer); |
618 | } else { |
619 | // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model. |
620 | // And then absorb the "new model" to the old one. |
621 | ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable); |
622 | ccv_cnnp_model_absorb(model, init, inputs, input_size); |
623 | // Reset minimizer. |
624 | ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0); |
625 | } |
626 | } |
627 | |
628 | ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable) |
629 | { |
630 | ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0); |
631 | new_model->is_trainable = is_trainable; |
632 | return new_model; |
633 | } |
634 | |
635 | void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size) |
636 | { |
637 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 637, __extension__ __PRETTY_FUNCTION__); })); |
638 | assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0 ), __extension__ ({ if (output_size == model->output_size) ; else __assert_fail ("output_size == model->output_size" , "ccv_cnnp_model.c", 638, __extension__ __PRETTY_FUNCTION__) ; })); |
639 | ccv_nnc_symbolic_graph_t* const graph = model->graph; |
640 | ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0); |
641 | int i; |
642 | for (i = 0; i < output_size; i++) |
643 | { |
644 | assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL" , "ccv_cnnp_model.c", 644, __extension__ __PRETTY_FUNCTION__) ; })); |
645 | outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]); |
646 | } |
647 | } |
648 | |
649 | void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size) |
650 | { |
651 | if (workspace_size == model->workspace_size) |
652 | return; |
653 | model->workspace_size = workspace_size; |
654 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
655 | if (compiled_data && compiled_data->graph) |
656 | ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0); |
657 | } |
658 | |
659 | size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model) |
660 | { |
661 | return model->workspace_size; |
662 | } |
663 | |
664 | void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel) |
665 | { |
666 | if (parallel == 0) |
667 | model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); |
668 | else |
669 | model->parallel_count = parallel; |
670 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
671 | if (compiled_data) |
672 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 672, __extension__ __PRETTY_FUNCTION__) ; })); } |
673 | } |
674 | |
675 | void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count) |
676 | { |
677 | model->max_stream_count = max_stream_count; |
678 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
679 | if (compiled_data) |
680 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 680, __extension__ __PRETTY_FUNCTION__) ; })); } |
681 | } |
682 | |
683 | void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression) |
684 | { |
685 | model->memory_compression = memory_compression; |
686 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
687 | if (compiled_data) |
688 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 688, __extension__ __PRETTY_FUNCTION__) ; })); } |
689 | } |
690 | |
691 | void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction) |
692 | { |
693 | model->memory_reduction = memory_reduction; |
694 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
695 | if (compiled_data) |
696 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 696, __extension__ __PRETTY_FUNCTION__) ; })); } |
697 | } |
698 | |
699 | void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing) |
700 | { |
701 | model->gradient_checkpointing = gradient_checkpointing; |
702 | } |
703 | |
704 | int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model) |
705 | { |
706 | return model->gradient_checkpointing; |
707 | } |
708 | |
709 | typedef struct { |
710 | int parallel_count; |
711 | ccv_nnc_symbolic_graph_t* graph; |
712 | ccv_cnnp_compiled_data_t* compiled_data; |
713 | ccv_nnc_tensor_arena_t* tensor_arena; |
714 | } ccv_nnc_tensor_init_states_t; |
715 | |
716 | static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data) |
717 | { |
718 | int i; |
719 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
720 | for (i = 0; i < compiled_data->parameters->rnum; i++) |
721 | { |
722 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; |
723 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) |
724 | return 1; |
725 | } |
726 | for (i = 0; i < compiled_data->internals->rnum; i++) |
727 | { |
728 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; |
729 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) |
730 | return 1; |
731 | } |
732 | return 0; |
733 | } |
734 | |
735 | static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol) |
736 | { |
737 | ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context; |
738 | ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena; |
739 | ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol); |
740 | if (!output_tensor) |
741 | return; |
742 | const int d = output_symbol.d; |
743 | assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data-> tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states ->compiled_data->tensors_init.size) ; else __assert_fail ("d < tensor_init_states->compiled_data->tensors_init.size" , "ccv_cnnp_model.c", 743, __extension__ __PRETTY_FUNCTION__) ; })); |
744 | uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data ->tensors_init.v) & ~(uintptr_t)1)); |
745 | if (init_v[d >> 5] & (1u << (d & 0x1f))) |
746 | return; |
747 | init_v[d >> 5] |= (1u << (d & 0x1f)); |
748 | ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0); |
749 | const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph; |
750 | const int parallel_count = tensor_init_states->parallel_count; |
751 | int i; |
752 | for (i = 1; i < parallel_count; i++) |
753 | { |
754 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i)); |
755 | if (copy) |
756 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &output_tensor, 1, ©, 1, 0); |
757 | } |
758 | } |
759 | |
760 | // This method can only handle cases we added new tensors and exec, never delete. This invariant is true because |
761 | // we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup. |
762 | static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model) |
763 | { |
764 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 764, __extension__ __PRETTY_FUNCTION__); })); |
765 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 765, __extension__ __PRETTY_FUNCTION__) ; })); |
766 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
767 | assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__ ({ if (compiled_data->rewindables) ; else __assert_fail ( "compiled_data->rewindables", "ccv_cnnp_model.c", 767, __extension__ __PRETTY_FUNCTION__); })); |
768 | int i; |
769 | for (i = 0; i < compiled_data->rewindables->rnum; i++) |
770 | { |
771 | const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) + (size_t)(compiled_data->rewindables)->rsize * (size_t) (i))); |
772 | if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC) |
773 | ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec); |
774 | else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR) |
775 | ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor); |
776 | } |
777 | ccv_array_clear(compiled_data->rewindables); |
778 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
779 | } |
780 | |
781 | static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name) |
782 | { |
783 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { |
784 | .type = CCV_CNNP_REWIND_TENSOR, |
785 | .tensor = symbol |
786 | }; |
787 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; |
788 | ccv_array_push(rewind_symbols, &rewind_symbol); |
789 | } |
790 | |
791 | static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name) |
792 | { |
793 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { |
794 | .type = CCV_CNNP_REWIND_TENSOR, |
795 | .tensor = symbol |
796 | }; |
797 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; |
798 | ccv_array_push(rewind_symbols, &rewind_symbol); |
799 | } |
800 | |
801 | static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) |
802 | { |
803 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { |
804 | .type = CCV_CNNP_REWIND_GRAPH_EXEC, |
805 | .graph_exec = symbol |
806 | }; |
807 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; |
808 | ccv_array_push(rewind_symbols, &rewind_symbol); |
809 | } |
810 | |
811 | static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph) |
812 | { |
813 | ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol); |
814 | if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0)) |
815 | ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd); |
816 | int i; |
817 | for (i = 1; i < parallel_count; i++) |
818 | { |
819 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); |
820 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol); |
821 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) |
822 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); |
823 | } |
824 | } |
825 | |
826 | static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd) |
827 | { |
828 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 828, __extension__ __PRETTY_FUNCTION__); })); |
829 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 829, __extension__ __PRETTY_FUNCTION__); })); |
830 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd); |
831 | int i; |
832 | for (i = 1; i < parallel_count; i++) |
833 | { |
834 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); |
835 | if (copy_symbol.graph) |
836 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd); |
837 | } |
838 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena; |
839 | if (graph_exec_arena) |
840 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); |
841 | // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph) |
842 | ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena; |
843 | if (gradient_graph_exec_arena) |
844 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); |
845 | } |
846 | |
847 | static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice) |
848 | { |
849 | int this_parameter_flag = 0; |
850 | if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL) |
851 | return this_parameter_flag; |
852 | const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]); |
853 | int j, k; |
854 | // For no-op, we can preserve previous saved_aux_size. |
855 | if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP) |
856 | { |
857 | // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous |
858 | // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between |
859 | // noop and a minimizer. We don't want that because we do that in high-level frameworks to |
860 | // make sure some model parameters don't update if we don't want them to. |
861 | int old_saved_aux_size; |
862 | if (old_minimizer.cmd == CCV_NNC_NOOP) |
863 | { |
864 | int input_size; |
865 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0); |
866 | if (input_size < 2) // This is not legit. |
867 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); |
868 | else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters. |
869 | old_saved_aux_size = input_size - 2; |
870 | } else |
871 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); |
872 | if (old_saved_aux_size != saved_aux_size) |
873 | { |
874 | this_parameter_flag = 1; |
875 | if (saved_aux_size > old_saved_aux_size) |
876 | { |
877 | // Allocate new tensor symbols. |
878 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]); |
879 | for (j = old_saved_aux_size; j < saved_aux_size; j++) |
880 | { |
881 | saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0); |
882 | saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0); |
883 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); |
884 | for (k = 1; k < parallel_count; k++) |
885 | { |
886 | ccv_nnc_tensor_param_t dev_info = info; |
887 | if (k != device_id) |
888 | CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) & 0xfff) << 8)); |
889 | else |
890 | CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) & 0xfff) << 8)); |
891 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); |
892 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); |
893 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy); |
894 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy); |
895 | } |
896 | } |
897 | } else { |
898 | for (j = saved_aux_size; j < old_saved_aux_size; j++) |
899 | { |
900 | for (k = 1; k < parallel_count; k++) |
901 | { |
902 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); |
903 | if (src_copy.d >= 0) |
904 | { |
905 | ccv_nnc_tensor_symbol_free(graph, src_copy); |
906 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); |
907 | } |
908 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); |
909 | if (dest_copy.d >= 0) |
910 | { |
911 | ccv_nnc_tensor_symbol_free(graph, dest_copy); |
912 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); |
913 | } |
914 | } |
915 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source); |
916 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination); |
917 | saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; |
918 | } |
919 | } |
920 | } |
921 | } |
922 | _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer); |
923 | if (this_parameter_flag) |
924 | { |
925 | ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2]; |
926 | ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1]; |
927 | const int* inputs = 0; |
928 | int input_size = 0; |
929 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0); |
930 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 930, __extension__ __PRETTY_FUNCTION__) ; })); |
931 | update_inputs[0].d = inputs[0]; |
932 | update_inputs[0].graph = graph; |
933 | update_inputs[1].d = inputs[1]; |
934 | update_inputs[1].graph = graph; |
935 | update_outputs[0] = updated_parameters[parameter_indice]; |
936 | for (j = 0; j < saved_aux_size; j++) |
937 | { |
938 | update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source; |
939 | update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination; |
940 | } |
941 | ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); |
942 | for (k = 1; k < parallel_count; k++) |
943 | { |
944 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k); |
945 | assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if (copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c" , 945, __extension__ __PRETTY_FUNCTION__); })); |
946 | ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0); |
947 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 947, __extension__ __PRETTY_FUNCTION__) ; })); |
948 | update_inputs[0].d = inputs[0]; |
949 | update_inputs[0].graph = graph; |
950 | update_inputs[1].d = inputs[1]; |
951 | update_inputs[1].graph = graph; |
952 | update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k); |
953 | for (j = 0; j < saved_aux_size; j++) |
954 | { |
955 | update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); |
956 | update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); |
957 | } |
958 | ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); |
959 | } |
960 | } |
961 | return this_parameter_flag; |
962 | } |
963 | |
964 | typedef struct { |
965 | int parameter_size; |
966 | ccv_nnc_cmd_t minimizer; |
967 | ccv_cnnp_model_io_t parameters[1]; |
968 | } ccv_cnnp_set_minimizer_for_parameter_t; |
969 | |
970 | static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model) |
971 | { |
972 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
973 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 973, __extension__ __PRETTY_FUNCTION__); })); |
974 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; |
975 | // We update all parameters, at this point, we have one minimizer. |
976 | const int parameter_size = compiled_data->parameters->rnum; |
977 | ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes; |
978 | ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph; |
979 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 979, __extension__ __PRETTY_FUNCTION__); })); |
980 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
981 | ccv_array_t* const parameters = compiled_data->minimize.parameters; |
982 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); |
983 | int i, j, flag = 0; |
984 | for (i = 0; i < parameters->rnum; i++) |
985 | { |
986 | ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); |
987 | for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++) |
988 | { |
989 | const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel; |
990 | assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_sel != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_sel != 0" , "ccv_cnnp_model.c", 990, __extension__ __PRETTY_FUNCTION__) ; })); |
991 | const int old_rnum = parameter_indices->rnum; |
992 | ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices); |
993 | const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref; |
994 | assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_ref != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_ref != 0" , "ccv_cnnp_model.c", 994, __extension__ __PRETTY_FUNCTION__) ; })); |
995 | if (param_ref >= 0) |
996 | { |
997 | assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices-> rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum < parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum" , "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__) ; })); |
998 | *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(old_rnum))) = *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref + old_rnum))); |
999 | parameter_indices->rnum = old_rnum + 1; |
1000 | } |
1001 | } |
1002 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer); |
1003 | // We may have duplicated indices, but that is OK, we will set it twice. |
1004 | for (j = 0; j < parameter_indices->rnum; j++) |
1005 | { |
1006 | const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(j))); |
1007 | assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__ ({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size" , "ccv_cnnp_model.c", 1007, __extension__ __PRETTY_FUNCTION__ ); })); |
1008 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d)) |
1009 | flag = 1; |
1010 | } |
1011 | ccv_array_clear(parameter_indices); |
1012 | } |
1013 | ccv_array_free(parameter_indices); |
1014 | return flag; |
1015 | } |
1016 | |
1017 | static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size) |
1018 | { |
1019 | if (new_saved_aux_size == old_saved_aux_size) |
1020 | return; |
1021 | assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ? 1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size ) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size" , "ccv_cnnp_model.c", 1021, __extension__ __PRETTY_FUNCTION__ ); })); |
1022 | int i, j; |
1023 | for (i = parameter_size - 1; i >= 0; i--) |
1024 | { |
1025 | for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--) |
1026 | saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; |
1027 | for (j = old_saved_aux_size - 1; j >= 0; j--) |
1028 | saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j]; |
1029 | } |
1030 | } |
1031 | |
1032 | static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model) |
1033 | { |
1034 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1035 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1035, __extension__ __PRETTY_FUNCTION__); })); |
1036 | if (!compiled_data->rewindables) |
1037 | compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0); |
1038 | ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0); |
1039 | ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0); |
1040 | ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0); |
1041 | } |
1042 | |
1043 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size) |
1044 | { |
1045 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1046 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 1046, __extension__ __PRETTY_FUNCTION__ ); })); |
1047 | assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 1047, __extension__ __PRETTY_FUNCTION__ ); })); |
1048 | const int evaluate_to_size = compiled_data->evaluate.to_size; |
1049 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__ ); })); |
1050 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1051 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); |
1052 | compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count); |
1053 | int i, j; |
1054 | const int output_size = model->output_size; |
1055 | assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size * parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count" , "ccv_cnnp_model.c", 1055, __extension__ __PRETTY_FUNCTION__ ); })); |
1056 | if (fits) |
1057 | for (i = 0; i < output_size; i++) |
1058 | ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info); |
1059 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; |
1060 | const int parameter_size = compiled_data->parameters->rnum; |
1061 | compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size); |
1062 | compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size); |
1063 | compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size); |
1064 | int parameter_size_maybe_more = parameter_size; |
1065 | compiled_data->disable_outgrad = disable_outgrad; |
1066 | int outgrad_size; |
1067 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0) |
1068 | outgrad_size = 0; |
1069 | else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs. |
1070 | outgrad_size = model->input_size; |
1071 | else { |
1072 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 1072, __extension__ __PRETTY_FUNCTION__ ); })); // If it is disable all, gradient mode won't be this. |
1073 | outgrad_size = 0; |
1074 | for (i = 0; i < model->input_size; i++) |
1075 | if (!(disable_outgrad & ((uint64_t)1 << i))) |
1076 | ++outgrad_size; |
1077 | } |
1078 | compiled_data->outgrad_size = outgrad_size; |
1079 | parameter_size_maybe_more += outgrad_size; |
1080 | compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count); |
1081 | compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0; |
1082 | compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more); |
1083 | compiled_data->backward.to_size = parameter_size_maybe_more; |
1084 | ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))); |
1085 | if (compiled_data->parameter_flags) |
1086 | { |
1087 | parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size); |
1088 | for (i = 0; i < parameter_size; i++) |
1089 | if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))) |
1090 | parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); |
1091 | else |
1092 | parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; |
1093 | } |
1094 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0) |
1095 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); |
1096 | else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs. |
1097 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); |
1098 | else { // Compute minimize with gradients including selected inputs. |
1099 | assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__ ({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0" , "ccv_cnnp_model.c", 1099, __extension__ __PRETTY_FUNCTION__ ); })); |
1100 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 1100, __extension__ __PRETTY_FUNCTION__ ); })); // If it is disable all, gradient mode won't be this. |
1101 | assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__ ({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0" , "ccv_cnnp_model.c", 1101, __extension__ __PRETTY_FUNCTION__ ); })); |
1102 | ccv_nnc_tensor_symbol_t outgrads[outgrad_size]; |
1103 | j = 0; |
1104 | for (i = 0; i < model->input_size; i++) |
1105 | if (!(disable_outgrad & ((uint64_t)1 << i))) |
1106 | outgrads[j++] = model->inputs[i]; |
1107 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); |
1108 | } |
1109 | if (compiled_data->parameter_flags) |
1110 | ccfreefree(parameters); |
1111 | _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size); |
1112 | if (compiled_data->minimize.parameters) |
1113 | _ccv_cnnp_apply_parameters_with_minimizer(model); |
1114 | // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass. |
1115 | ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph); |
1116 | for (i = 0; i < output_size; i++) |
1117 | { |
1118 | const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); |
1119 | // Init this to 1 so we can backprop. |
1120 | ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES); |
1121 | } |
1122 | compiled_data->backward.to_size = 0; |
1123 | for (i = 0; i < parameter_size_maybe_more; i++) |
1124 | if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL) |
1125 | compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]); |
1126 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS); |
1127 | ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size); |
1128 | for (i = 0; i < parameter_size_maybe_more - parameter_size; i++) |
1129 | { |
1130 | if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads. |
1131 | continue; |
1132 | const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]); |
1133 | const int* tos; |
1134 | int to_size; |
1135 | ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size); |
1136 | if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes. |
1137 | { |
1138 | const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph); |
1139 | const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph); |
1140 | int flag = 0; |
1141 | const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = ( destination_count - i); (_a > _b) ? _a : _b; }); |
1142 | for (j = i - 1; !flag && j >= 0; j--) |
1143 | if (j + outgrad_destination_start < destination_count) |
1144 | flag = (destinations[j + outgrad_destination_start].d == outgrad.d); |
1145 | if (!flag) // Only if we cannot find it, we add it. |
1146 | ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad); |
1147 | } |
1148 | } |
1149 | if (parallel_count > 1) |
1150 | { |
1151 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, |
1152 | 0, 0, |
1153 | compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */, |
1154 | compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */, |
1155 | 0, 0, 0, |
1156 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, |
1157 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); |
1158 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
1159 | for (i = 0; i < evaluate_to_size; i++) |
1160 | for (j = 1; j < parallel_count; j++) |
1161 | { |
1162 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); |
1163 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) |
1164 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; |
1165 | } |
1166 | const int backward_to_size = compiled_data->backward.to_size; |
1167 | for (i = 0; i < backward_to_size; i++) |
1168 | for (j = 1; j < parallel_count; j++) |
1169 | { |
1170 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j); |
1171 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) |
1172 | compiled_data->backward.tos[compiled_data->backward.to_size++] = copy; |
1173 | } |
1174 | } |
1175 | // Only use memory compression if we are in gradient parameter mode. |
1176 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS) |
1177 | { |
1178 | if (model->memory_compression) |
1179 | ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); |
1180 | if (model->memory_reduction) |
1181 | ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); |
1182 | } |
1183 | compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size); |
1184 | compiled_data->gradient_mode = gradient_mode; |
1185 | } |
1186 | |
1187 | void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) |
1188 | { |
1189 | assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (!compiled_data->tensors.parameters ) ; else __assert_fail ("!compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 1189, __extension__ __PRETTY_FUNCTION__ ); })); |
1190 | const int parameter_size = compiled_data->parameters->rnum; |
1191 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1192 | const int internal_size = compiled_data->internals->rnum; |
1193 | compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph); |
1194 | compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t)); |
1195 | compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*)); |
1196 | compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count; |
1197 | } |
1198 | |
1199 | int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) |
1200 | { |
1201 | int i, j; |
1202 | const int parameter_size = compiled_data->parameters->rnum; |
1203 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1204 | const int internal_size = compiled_data->internals->rnum; |
1205 | for (i = 0; i < parameter_size; i++) |
1206 | { |
1207 | // parameters has to be allocated all together. |
1208 | if (compiled_data->tensors.parameters[i]) |
1209 | { |
1210 | for (j = 1; j < parallel_count; j++) |
1211 | { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j * parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data ->tensors.parameters[i + j * parameter_size]) ; else __assert_fail ("compiled_data->tensors.parameters[i + j * parameter_size]" , "ccv_cnnp_model.c", 1211, __extension__ __PRETTY_FUNCTION__ ); })); } |
1212 | continue; |
1213 | } |
1214 | return 1; |
1215 | } |
1216 | for (i = 0; i < internal_size; i++) |
1217 | { |
1218 | if (!compiled_data->tensors.internals[i]) |
1219 | return 1; |
1220 | for (j = 1; j < parallel_count; j++) |
1221 | if (!compiled_data->tensors.internals[i + j * internal_size]) |
1222 | return 1; |
1223 | } |
1224 | return 0; |
1225 | } |
1226 | |
1227 | void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) |
1228 | { |
1229 | int i, j; |
1230 | const int parameter_size = compiled_data->parameters->rnum; |
1231 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1232 | const int internal_size = compiled_data->internals->rnum; |
1233 | for (i = 0; i < parameter_size; i++) |
1234 | { |
1235 | // parameters has to be allocated all together. |
1236 | if (compiled_data->tensors.parameters[i]) |
1237 | { |
1238 | for (j = 1; j < parallel_count; j++) |
1239 | { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j * parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data ->tensors.parameters[i + j * parameter_size]) ; else __assert_fail ("compiled_data->tensors.parameters[i + j * parameter_size]" , "ccv_cnnp_model.c", 1239, __extension__ __PRETTY_FUNCTION__ ); })); } |
1240 | continue; |
1241 | } |
1242 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); |
1243 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); |
1244 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) |
1245 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); |
1246 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); |
1247 | compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0); |
1248 | for (j = 1; j < parallel_count; j++) |
1249 | { |
1250 | if (j != device_id) |
1251 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); |
1252 | else |
1253 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); |
1254 | compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); |
1255 | } |
1256 | } |
1257 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
1258 | for (i = 0; i < internal_size; i++) |
1259 | { |
1260 | const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ); |
1261 | const int d = retained.d; |
1262 | if (init_v[d >> 5] & (1u << (d & 0x1f))) |
1263 | continue; |
1264 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained); |
1265 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) |
1266 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); |
1267 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); |
1268 | if (!compiled_data->tensors.internals[i]) |
1269 | compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0); |
1270 | for (j = 1; j < parallel_count; j++) |
1271 | { |
1272 | if (j != device_id) |
1273 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); |
1274 | else |
1275 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); |
1276 | if (!compiled_data->tensors.internals[i + j * internal_size]) |
1277 | compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0); |
1278 | } |
1279 | } |
1280 | compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); // Remove 1 if any. |
1281 | } |
1282 | |
1283 | static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) |
1284 | { |
1285 | ccv_cnnp_model_tensors_init_0(model, compiled_data); |
1286 | ccv_cnnp_model_tensors_init_1(model, compiled_data); |
1287 | } |
1288 | |
1289 | static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) |
1290 | { |
1291 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1291, __extension__ __PRETTY_FUNCTION__ ); })); |
1292 | int i, j; |
1293 | for (i = 0; i < tensor_size; i++) |
1294 | { |
1295 | if (!tensors[i]) |
1296 | continue; |
1297 | const int d = tensor_symbols[i].d; |
1298 | if (!(tensors_init[d >> 5] & (1u << (d & 0x1f)))) |
1299 | continue; |
1300 | for (j = 1; j < parallel_count; j++) |
1301 | if (tensors[i + j * tensor_size]) |
1302 | { |
1303 | ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t )1)); |
1304 | ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size] ) & ~(uintptr_t)1)); |
1305 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0); |
1306 | } |
1307 | } |
1308 | } |
1309 | |
1310 | static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count) |
1311 | { |
1312 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1312, __extension__ __PRETTY_FUNCTION__ ); })); |
1313 | int i, j; |
1314 | for (i = 0; i < tensor_size; i++) |
1315 | { |
1316 | const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; |
1317 | for (j = 1; j < parallel_count; j++) |
1318 | { |
1319 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); |
1320 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; |
1321 | if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1322 | { // We shouldn't allocate this, free it up. |
1323 | ccv_nnc_tensor_free(tensors[i + j * tensor_size]); |
1324 | tensors[i + j * tensor_size] = 0; |
1325 | } |
1326 | } |
1327 | } |
1328 | } |
1329 | |
1330 | static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds) |
1331 | { |
1332 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1332, __extension__ __PRETTY_FUNCTION__ ); })); |
1333 | int i, j; |
1334 | for (i = 0; i < tensor_size; i++) |
1335 | { |
1336 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; |
1337 | if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1338 | continue; |
1339 | if (graph) |
1340 | { |
1341 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); |
1342 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) |
1343 | tensor_symbol = alias_to; |
1344 | } |
1345 | ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t )1)); |
1346 | if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) |
1347 | { |
1348 | const ccv_nnc_tensor_bind_t retained_bind = { |
1349 | .symbol = tensor_symbol, |
1350 | .tensor = tensor |
1351 | }; |
1352 | ccv_array_push(tensor_binds, &retained_bind); |
1353 | } |
1354 | for (j = 1; j < parallel_count; j++) |
1355 | { |
1356 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); |
1357 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; |
1358 | if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL) |
1359 | { |
1360 | const ccv_nnc_tensor_bind_t bind = { |
1361 | .symbol = copy, |
1362 | .tensor = tensors[i + j * tensor_size] |
1363 | }; |
1364 | ccv_array_push(tensor_binds, &bind); |
1365 | } |
1366 | } |
1367 | } |
1368 | } |
1369 | |
1370 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data) |
1371 | { |
1372 | if (compiled_data->graph) |
1373 | ccv_nnc_graph_free(compiled_data->graph); |
1374 | compiled_data->graph = 0; |
1375 | compiled_data->is_test = 0; |
1376 | if (compiled_data->tensor_arena) |
1377 | ccv_nnc_tensor_arena_free(compiled_data->tensor_arena); |
1378 | compiled_data->tensor_arena = 0; |
1379 | if (compiled_data->graph_exec_arena) |
1380 | ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena); |
1381 | compiled_data->graph_exec_arena = 0; |
1382 | if (compiled_data->backward.from_ops) |
1383 | ccfreefree(compiled_data->backward.from_ops); |
1384 | compiled_data->backward.from_ops = 0; |
1385 | if (compiled_data->evaluate.schedule) |
1386 | ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule); |
1387 | compiled_data->evaluate.schedule = 0; |
1388 | if (compiled_data->backward.schedule) |
1389 | ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule); |
1390 | compiled_data->backward.schedule = 0; |
1391 | } |
1392 | |
1393 | static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data) |
1394 | { |
1395 | if (compiled_data->gradients) |
1396 | ccfreefree(compiled_data->gradients); |
1397 | compiled_data->gradients = 0; |
1398 | if (compiled_data->updated_parameters) |
1399 | ccfreefree(compiled_data->updated_parameters); |
1400 | compiled_data->updated_parameters = 0; |
1401 | compiled_data->update_nodes = 0; |
1402 | compiled_data->saved_aux = 0; |
1403 | } |
1404 | |
1405 | static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data) |
1406 | { |
1407 | if (compiled_data->backward.gradients) |
1408 | ccfreefree(compiled_data->backward.gradients); |
1409 | compiled_data->backward.gradients = 0; |
1410 | if (compiled_data->backward.accum) |
1411 | ccv_nnc_graph_free(compiled_data->backward.accum); |
1412 | compiled_data->backward.accum = 0; |
1413 | if (compiled_data->backward.tensor_arena) |
1414 | ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena); |
1415 | compiled_data->backward.tensor_arena = 0; |
1416 | if (compiled_data->backward.graph_exec_arena) |
1417 | ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena); |
1418 | compiled_data->backward.graph_exec_arena = 0; |
1419 | } |
1420 | |
1421 | static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data) |
1422 | { |
1423 | if (compiled_data->apply_gradients.graph) |
1424 | ccv_nnc_graph_free(compiled_data->apply_gradients.graph); |
1425 | compiled_data->apply_gradients.graph = 0; |
1426 | if (compiled_data->apply_gradients.tensor_arena) |
1427 | ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena); |
1428 | compiled_data->apply_gradients.tensor_arena = 0; |
1429 | if (compiled_data->apply_gradients.graph_exec_arena) |
1430 | ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena); |
1431 | compiled_data->apply_gradients.graph_exec_arena = 0; |
1432 | } |
1433 | |
1434 | // Compile the graph to run ccv_cnnp_model_fit |
1435 | static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) |
1436 | { |
1437 | int i, j; |
1438 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1439 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE" , "ccv_cnnp_model.c", 1439, __extension__ __PRETTY_FUNCTION__ ); })); |
1440 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE; |
1441 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1442 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1442, __extension__ __PRETTY_FUNCTION__ ); })); |
1443 | assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__ ({ if (!fits || output_size == fit_size) ; else __assert_fail ("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1443 , __extension__ __PRETTY_FUNCTION__); })); |
1444 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1444, __extension__ __PRETTY_FUNCTION__ ); })); |
1445 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) |
1446 | { |
1447 | _ccv_cnnp_model_set_rewindables(model); |
1448 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); |
1449 | } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) { |
1450 | _ccv_cnnp_model_rewind_graph(model); |
1451 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); |
1452 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; |
1453 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); |
1454 | } |
1455 | const int tensors_init = !!compiled_data->tensors_init.v; |
1456 | if (!tensors_init) |
1457 | _ccv_cnnp_model_tensors_init(model, compiled_data); |
1458 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) |
1459 | // Check if it is not fully allocated, if it is not, init_1. |
1460 | ccv_cnnp_model_tensors_init_1(model, compiled_data); |
1461 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); |
1462 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1462, __extension__ __PRETTY_FUNCTION__); })); |
1463 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1463, __extension__ __PRETTY_FUNCTION__); })); |
1464 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1464 , __extension__ __PRETTY_FUNCTION__); })); |
1465 | const int input_size_per_p = input_size / parallel_count; |
1466 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); |
1467 | const int output_size_per_p = output_size / parallel_count; |
1468 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); |
1469 | const int fit_size_per_p = fit_size / parallel_count; |
1470 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds); |
1471 | const int parameter_size = compiled_data->parameters->rnum; |
1472 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); |
1473 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); |
1474 | const int internal_size = compiled_data->internals->rnum; |
1475 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); |
1476 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); |
1477 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); |
1478 | ccv_array_free(tensor_binds); |
1479 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
1480 | if (tensors_init && parallel_count > 1) |
1481 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); |
1482 | // If tensor is not init'ed, we need to init states first. |
1483 | if (_ccv_cnnp_any_to_init(compiled_data)) |
1484 | { |
1485 | ccv_nnc_tensor_init_states_t tensor_init_states = { |
1486 | .parallel_count = parallel_count, |
1487 | .graph = model->graph, |
1488 | .compiled_data = compiled_data, |
1489 | .tensor_arena = compiled_data->tensor_arena |
1490 | }; |
1491 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); |
1492 | } |
1493 | compiled_data->is_test = 0; |
1494 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer); |
1495 | // No need to set because it is default to training mode. |
1496 | // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); |
1497 | for (i = 0; i < saved_aux_size * parameter_size; i++) |
1498 | { |
1499 | if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1500 | continue; |
1501 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source); |
1502 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); |
1503 | for (j = 1; j < parallel_count; j++) |
1504 | { |
1505 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); |
1506 | if (copy) |
1507 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); |
1508 | } |
1509 | } |
1510 | const int evaluate_to_size = compiled_data->evaluate.to_size; |
1511 | compiled_data->evaluate.to_op_size = 0; |
1512 | for (i = 0; i < evaluate_to_size; i++) |
1513 | { |
1514 | ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); |
1515 | if (to.graph) |
1516 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to; |
1517 | } |
1518 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); |
1519 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); |
1520 | } |
1521 | |
1522 | ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model) |
1523 | { |
1524 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1525 | if (!compiled_data || !compiled_data->graph) |
1526 | return 0; |
1527 | return ccv_nnc_graph_default_stream(compiled_data->graph); |
1528 | } |
1529 | |
1530 | uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model) |
1531 | { |
1532 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1533 | if (!compiled_data || !compiled_data->tensor_arena) |
1534 | return 0; |
1535 | return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena); |
1536 | } |
1537 | |
1538 | static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) |
1539 | { |
1540 | int i, j; |
1541 | for (i = 0; i < tensor_size; i++) |
1542 | { |
1543 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; |
1544 | if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1545 | continue; |
1546 | if (graph) |
1547 | { |
1548 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); |
1549 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) |
1550 | tensor_symbol = alias_to; |
1551 | } |
1552 | ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]); |
1553 | for (j = 1; j < parallel_count; j++) |
1554 | { |
1555 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); |
1556 | if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL) |
1557 | ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]); |
1558 | } |
1559 | } |
1560 | } |
1561 | |
1562 | void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) |
1563 | { |
1564 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1565 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1565, __extension__ __PRETTY_FUNCTION__); })); |
1566 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1567 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1567, __extension__ __PRETTY_FUNCTION__ ); })); |
1568 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1568, __extension__ __PRETTY_FUNCTION__ ); })); |
1569 | assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size) ; else __assert_fail ("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1569 , __extension__ __PRETTY_FUNCTION__); })); |
1570 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1570, __extension__ __PRETTY_FUNCTION__); })); |
1571 | if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) |
1572 | { |
1573 | _ccv_cnnp_compiled_data_graph_free(compiled_data); |
1574 | _ccv_cnnp_compiled_data_backward_free(compiled_data); |
1575 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); |
1576 | // Compile the symbolic graph down only when needed. |
1577 | _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size); |
1578 | } else { |
1579 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1579, __extension__ __PRETTY_FUNCTION__); })); |
1580 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1580, __extension__ __PRETTY_FUNCTION__); })); |
1581 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1581 , __extension__ __PRETTY_FUNCTION__); })); |
1582 | const int input_size_per_p = input_size / parallel_count; |
1583 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); |
1584 | const int output_size_per_p = output_size / parallel_count; |
1585 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); |
1586 | const int fit_size_per_p = fit_size / parallel_count; |
1587 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count); |
1588 | } |
1589 | if (compiled_data->is_test) |
1590 | { |
1591 | compiled_data->is_test = 0; |
1592 | ccv_nnc_graph_exec_update_t update = { |
1593 | .parallel_count = parallel_count, |
1594 | .graph = model->graph, |
1595 | .graph_exec_arena = compiled_data->graph_exec_arena, |
1596 | }; |
1597 | ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); |
1598 | } |
1599 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); |
1600 | } |
1601 | |
1602 | // Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD). |
1603 | static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) |
1604 | { |
1605 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1606 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD; |
1607 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1608 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1608, __extension__ __PRETTY_FUNCTION__ ); })); |
1609 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1609, __extension__ __PRETTY_FUNCTION__ ); })); |
1610 | // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather, |
1611 | // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel. |
1612 | if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) |
1613 | { |
1614 | const int evaluate_to_size = compiled_data->evaluate.to_size; |
1615 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); |
1616 | _ccv_cnnp_model_set_rewindables(model); |
1617 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, |
1618 | 0, 0, |
1619 | 0, 0, 0, |
1620 | 0, 0, 0, |
1621 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, |
1622 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); |
1623 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
1624 | int i, j; |
1625 | for (i = 0; i < evaluate_to_size; i++) |
1626 | for (j = 1; j < parallel_count; j++) |
1627 | { |
1628 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); |
1629 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) |
1630 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; |
1631 | } |
1632 | } |
1633 | const int tensors_init = !!compiled_data->tensors_init.v; |
1634 | if (!tensors_init) |
1635 | _ccv_cnnp_model_tensors_init(model, compiled_data); |
1636 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) |
1637 | // Check if it is not fully allocated, if it is not, init_1. |
1638 | ccv_cnnp_model_tensors_init_1(model, compiled_data); |
1639 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); |
1640 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1640, __extension__ __PRETTY_FUNCTION__); })); |
1641 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1641, __extension__ __PRETTY_FUNCTION__); })); |
1642 | const int input_size_per_p = input_size / parallel_count; |
1643 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); |
1644 | const int output_size_per_p = output_size / parallel_count; |
1645 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); |
1646 | const int parameter_size = compiled_data->parameters->rnum; |
1647 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); |
1648 | const int internal_size = compiled_data->internals->rnum; |
1649 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); |
1650 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); |
1651 | // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation. |
1652 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); |
1653 | ccv_array_free(tensor_binds); |
1654 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
1655 | // If tensor is not init'ed, we need to init states first. |
1656 | if (tensors_init && parallel_count > 1) |
1657 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); |
1658 | if (_ccv_cnnp_any_to_init(compiled_data)) |
1659 | { |
1660 | ccv_nnc_tensor_init_states_t tensor_init_states = { |
1661 | .parallel_count = parallel_count, |
1662 | .graph = model->graph, |
1663 | .compiled_data = compiled_data, |
1664 | .tensor_arena = compiled_data->tensor_arena |
1665 | }; |
1666 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); |
1667 | } |
1668 | compiled_data->is_test = 1; |
1669 | ccv_nnc_graph_exec_update_t update = { |
1670 | .parallel_count = parallel_count, |
1671 | .graph = model->graph, |
1672 | .graph_exec_arena = compiled_data->graph_exec_arena, |
1673 | }; |
1674 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); |
1675 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); |
1676 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); |
1677 | } |
1678 | |
1679 | static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) |
1680 | { |
1681 | assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0 ), __extension__ ({ if (!compiled_data->tensors.gradients) ; else __assert_fail ("!compiled_data->tensors.gradients" , "ccv_cnnp_model.c", 1681, __extension__ __PRETTY_FUNCTION__ ); })); |
1682 | const int parameter_size = compiled_data->parameters->rnum; |
1683 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1684 | compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count); |
1685 | compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count; |
1686 | int i, j; |
1687 | for (i = 0; i < parameter_size; i++) |
1688 | { |
1689 | if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))) |
1690 | { |
1691 | compiled_data->tensors.gradients[i] = 0; |
1692 | compiled_data->tensors.accum_gradients[i] = 0; |
1693 | for (j = 1; j < parallel_count; j++) |
1694 | { |
1695 | compiled_data->tensors.gradients[i + j * parameter_size] = 0; |
1696 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0; |
1697 | } |
1698 | continue; |
1699 | } |
1700 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); |
1701 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); |
1702 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) |
1703 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); |
1704 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); |
1705 | compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0); |
1706 | compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it. |
1707 | for (j = 1; j < parallel_count; j++) |
1708 | { |
1709 | if (j != device_id) |
1710 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); |
1711 | else |
1712 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); |
1713 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); |
1714 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0; |
1715 | } |
1716 | } |
1717 | } |
1718 | |
1719 | static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size) |
1720 | { |
1721 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL) |
1722 | return 1; |
1723 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) |
1724 | return 0; |
1725 | int i; |
1726 | for (i = 0; i < input_size; i++) |
1727 | if (!(disable_outgrad & ((uint64_t)1 << i))) |
1728 | return 0; |
1729 | return 1; |
1730 | } |
1731 | |
1732 | // Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). |
1733 | // Particularly, this method compiles the evaluation and backprop graph (the main graph). |
1734 | static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) |
1735 | { |
1736 | int i, j; |
1737 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1738 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; |
1739 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data ->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data-> gradient_mode != target_gradient_mode) ; else __assert_fail ( "!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode" , "ccv_cnnp_model.c", 1739, __extension__ __PRETTY_FUNCTION__ ); })); |
1740 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE; |
1741 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1742 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1742, __extension__ __PRETTY_FUNCTION__ ); })); |
1743 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1743, __extension__ __PRETTY_FUNCTION__ ); })); |
1744 | // There shouldn't be a loss function if we evaluate with multistage jit. |
1745 | assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP ) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP" , "ccv_cnnp_model.c", 1745, __extension__ __PRETTY_FUNCTION__ ); })); |
1746 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) |
1747 | { |
1748 | _ccv_cnnp_model_set_rewindables(model); |
1749 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. |
1750 | } else if (compiled_data->gradient_mode != target_gradient_mode) { |
1751 | _ccv_cnnp_model_rewind_graph(model); |
1752 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); |
1753 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; |
1754 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. |
1755 | } |
1756 | const int tensors_init = !!compiled_data->tensors_init.v; |
1757 | if (!tensors_init) |
1758 | _ccv_cnnp_model_tensors_init(model, compiled_data); |
1759 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) |
1760 | // Check if it is not fully allocated, if it is not, init_1. |
1761 | ccv_cnnp_model_tensors_init_1(model, compiled_data); |
1762 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); |
1763 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1763, __extension__ __PRETTY_FUNCTION__); })); |
1764 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1764, __extension__ __PRETTY_FUNCTION__); })); |
1765 | const int input_size_per_p = input_size / parallel_count; |
1766 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); |
1767 | const int output_size_per_p = output_size / parallel_count; |
1768 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); |
1769 | const int parameter_size = compiled_data->parameters->rnum; |
1770 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); |
1771 | const int internal_size = compiled_data->internals->rnum; |
1772 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); |
1773 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); |
1774 | if (!compiled_data->tensors.gradients) |
1775 | _ccv_cnnp_model_gradient_tensors_init(model, compiled_data); |
1776 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); |
1777 | if (compiled_data->backward.to_size > 0) |
1778 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); |
1779 | else |
1780 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); |
1781 | ccv_array_free(tensor_binds); |
1782 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
1783 | if (tensors_init && parallel_count > 1) |
1784 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); |
1785 | // If tensor is not init'ed, we need to init states first. |
1786 | if (_ccv_cnnp_any_to_init(compiled_data)) |
1787 | { |
1788 | ccv_nnc_tensor_init_states_t tensor_init_states = { |
1789 | .parallel_count = parallel_count, |
1790 | .graph = model->graph, |
1791 | .compiled_data = compiled_data, |
1792 | .tensor_arena = compiled_data->tensor_arena |
1793 | }; |
1794 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); |
1795 | } |
1796 | compiled_data->is_test = is_test; |
1797 | ccv_nnc_graph_exec_update_t update = { |
1798 | .parallel_count = parallel_count, |
1799 | .graph = model->graph, |
1800 | .graph_exec_arena = compiled_data->graph_exec_arena, |
1801 | }; |
1802 | ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update); |
1803 | const int evaluate_to_size = compiled_data->evaluate.to_size; |
1804 | compiled_data->evaluate.to_op_size = 0; |
1805 | ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0); |
1806 | for (i = 0; i < evaluate_to_size; i++) |
1807 | { |
1808 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); |
1809 | if (to_op.graph) |
1810 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op; |
1811 | const int* tos; |
1812 | int to_size; |
1813 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size); |
1814 | for (j = 0; j < to_size; j++) |
1815 | { |
1816 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ |
1817 | .d = tos[j], |
1818 | .graph = model->graph |
1819 | }); |
1820 | if (to_op.graph) |
1821 | ccv_array_add_unique_int(backward_from, to_op.d); |
1822 | } |
1823 | } |
1824 | assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__ ({ if (backward_from->rnum > 0) ; else __assert_fail ( "backward_from->rnum > 0", "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__); })); |
1825 | compiled_data->backward.from_op_size = backward_from->rnum; |
1826 | compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum); |
1827 | for (i = 0; i < backward_from->rnum; i++) |
1828 | compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){ |
1829 | .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from )->rsize * (size_t)(i))), |
1830 | .graph = compiled_data->graph, |
1831 | }; |
1832 | // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops. |
1833 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)-> data)) + (size_t)(compiled_data->graph->exec_info)-> rsize * (size_t)(0))); |
1834 | const int exec_info_size = compiled_data->graph->exec_info->rnum; |
1835 | uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t)); |
1836 | const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data )) + (size_t)(compiled_data->graph->sources)->rsize * (size_t)(0))); |
1837 | const int source_size = compiled_data->graph->sources->rnum; |
1838 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info )[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)(( void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + ( size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_ ))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_ ++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_]. d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops )[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_ ++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_]. d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { (( void) sizeof (((sources)[_i_].graph == compiled_data->graph ) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size ].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info )[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[ _idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings )->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_ ] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[ _idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void *)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t )((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); -- _incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_ [d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size ); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_ ].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_ [(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0) , __extension__ ({ if (_incomings_[(compiled_data->evaluate .to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1838, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(compiled_data->evaluate.to_ops )[_i_].d].c > 0) continue; _visit_->node[_visit_->size ].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_ ->node[_visit_->size].term = ((_incomings_[(compiled_data ->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if ( _heap_mem_) free(_incomings_); } while (0);; ((void) sizeof ( (_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c" , 1838, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); |
1839 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { |
1840 | visited[(idx >> 5)] |= (1u << (idx & 31)); |
1841 | } ccv_nnc_graph_visit_endfor} } |
1842 | ccv_nnc_graph_visit_free(visit); |
1843 | const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)-> data)) + (size_t)(compiled_data->graph->destinations)-> rsize * (size_t)(0))); |
1844 | const int destination_size = compiled_data->graph->destinations->rnum; |
1845 | visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size) ; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->backward.from_ops)[_i_ ].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } int _exist_size_[2] = { (compiled_data->backward .from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[ _idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size) ; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->backward.from_ops)[_i_ ].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue ; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info )[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_]. outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_ [d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_ [d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof (( _exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c" , 1845, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_ ][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = ( _p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations) [_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0] [_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size); _i_++ ) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data ->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings ) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size) ) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == compiled_data-> graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_ ].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations )[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_cnnp_model.c", 1845, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); |
1846 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { |
1847 | visited[(idx >> 5)] |= (1u << (idx & 31)); |
1848 | } ccv_nnc_graph_visit_endfor} } |
1849 | ccv_nnc_graph_visit_free(visit); |
1850 | visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info )[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)(( void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + ( size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_ ))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0] [_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size ].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info )[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[ _idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings )->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info )[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_]. outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == compiled_data-> graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_ ].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations )[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_cnnp_model.c", 1850, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); |
1851 | // Find any missing nodes to be added as source. Right now, these are only set nodes. |
1852 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { |
1853 | if (!(visited[(idx >> 5)] & (1u << (idx & 31)))) |
1854 | { |
1855 | assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD ) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD ) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD" , "ccv_cnnp_model.c", 1855, __extension__ __PRETTY_FUNCTION__ ); })); |
1856 | if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one. |
1857 | ccv_array_add_unique_int(backward_from, idx); |
1858 | } |
1859 | } ccv_nnc_graph_visit_endfor} } |
1860 | ccv_nnc_graph_visit_free(visit); |
1861 | ccfreefree(visited); |
1862 | if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this. |
1863 | { |
1864 | compiled_data->backward.from_op_size = backward_from->rnum; |
1865 | compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum); |
1866 | for (i = 0; i < backward_from->rnum; i++) |
1867 | compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){ |
1868 | .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from )->rsize * (size_t)(i))), |
1869 | .graph = compiled_data->graph, |
1870 | }; |
1871 | } |
1872 | ccv_array_free(backward_from); |
1873 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); |
1874 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); |
1875 | } |
1876 | |
1877 | void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) |
1878 | { |
1879 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1880 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1880, __extension__ __PRETTY_FUNCTION__); })); |
1881 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1882 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1882, __extension__ __PRETTY_FUNCTION__ ); })); |
1883 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1883, __extension__ __PRETTY_FUNCTION__ ); })); |
1884 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1884, __extension__ __PRETTY_FUNCTION__); })); |
1885 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; |
1886 | const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad)); |
1887 | if (!compiled_data->graph || mode_mismatch) |
1888 | { |
1889 | _ccv_cnnp_compiled_data_graph_free(compiled_data); |
1890 | if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad. |
1891 | _ccv_cnnp_compiled_data_backward_free(compiled_data); |
1892 | if (params.requires_grad) |
1893 | _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size); |
1894 | else |
1895 | _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size); |
1896 | } else { |
1897 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena); |
1898 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1898, __extension__ __PRETTY_FUNCTION__); })); |
1899 | const int input_size_per_p = input_size / parallel_count; |
1900 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); |
1901 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1901, __extension__ __PRETTY_FUNCTION__); })); |
1902 | const int output_size_per_p = output_size / parallel_count; |
1903 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); |
1904 | } |
1905 | if (compiled_data->is_test != params.is_test) |
1906 | { |
1907 | compiled_data->is_test = params.is_test; |
1908 | ccv_nnc_graph_exec_update_t update = { |
1909 | .parallel_count = parallel_count, |
1910 | .graph = model->graph, |
1911 | .graph_exec_arena = compiled_data->graph_exec_arena, |
1912 | }; |
1913 | ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update); |
1914 | } |
1915 | } |
1916 | |
1917 | void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) |
1918 | { |
1919 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1920 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1920, __extension__ __PRETTY_FUNCTION__); })); |
1921 | ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size); |
1922 | if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD) |
1923 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); |
1924 | else { |
1925 | if (!compiled_data->evaluate.schedule) |
1926 | compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size); |
1927 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context); |
1928 | } |
1929 | } |
1930 | |
1931 | // Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). |
1932 | // Particularly, this method compiles the accumulator graph. |
1933 | static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model) |
1934 | { |
1935 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1936 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1936, __extension__ __PRETTY_FUNCTION__); })); |
1937 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1937, __extension__ __PRETTY_FUNCTION__ ); })); |
1938 | ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new(); |
1939 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1940 | const int parameter_size = compiled_data->parameters->rnum; |
1941 | int i, j; |
1942 | compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3); |
1943 | compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count; |
1944 | compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count; |
1945 | for (i = 0; i < parameter_size; i++) |
1946 | for (j = 0; j < parallel_count; j++) |
1947 | if (compiled_data->tensors.gradients[i + j * parameter_size]) |
1948 | { |
1949 | const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info; |
1950 | // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them. |
1951 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size]; |
1952 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); |
1953 | ccv_nnc_tensor_symbol_t inputs[2]; |
1954 | inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); |
1955 | inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); |
1956 | ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); |
1957 | ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0); |
1958 | } else { |
1959 | compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; |
1960 | compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; |
1961 | compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; |
1962 | } |
1963 | ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
1964 | if (ccv_nnc_symbolic_graph_source_size(accum) == 0) |
1965 | { |
1966 | ccv_nnc_symbolic_graph_free(accum); |
1967 | // Create empty graph. |
1968 | compiled_data->backward.accum = ccv_nnc_graph_new(); |
1969 | ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0); |
1970 | return; |
1971 | } |
1972 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); |
1973 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); |
1974 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds); |
1975 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); |
1976 | ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size (accum), SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size (accum), &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena); |
1977 | ccv_nnc_symbolic_graph_free(accum); |
1978 | ccv_array_free(tensor_binds); |
1979 | ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count); |
1980 | } |
1981 | |
1982 | void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) |
1983 | { |
1984 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
1985 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1985, __extension__ __PRETTY_FUNCTION__); })); |
1986 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1986, __extension__ __PRETTY_FUNCTION__ ); })); |
1987 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
1988 | assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model-> output_size * parallel_count) ? 1 : 0), __extension__ ({ if ( ingrad_size == 0 || ingrad_size == model->output_size * parallel_count ) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1988, __extension__ __PRETTY_FUNCTION__ ); })); |
1989 | if (outgrad_size > 0) |
1990 | { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size * parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size == compiled_data->outgrad_size * parallel_count) ; else __assert_fail ("outgrad_size == compiled_data->outgrad_size * parallel_count" , "ccv_cnnp_model.c", 1990, __extension__ __PRETTY_FUNCTION__ ); })); } |
1991 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1991, __extension__ __PRETTY_FUNCTION__); })); |
1992 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 1992, __extension__ __PRETTY_FUNCTION__ ); })); |
1993 | const int parameter_size = compiled_data->parameters->rnum; |
1994 | // If we need to accumulate the gradients now, do jit on accumulator. |
1995 | if (compiled_data->backward.count > 0) |
1996 | { |
1997 | if (!compiled_data->backward.accum) |
1998 | _ccv_cnnp_model_multistage_jit_1(model); |
1999 | else if (compiled_data->backward.count == 1) { |
2000 | // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly). |
2001 | int i; |
2002 | for (i = 0; i < parameter_size * parallel_count; i++) |
2003 | { |
2004 | ccv_nnc_tensor_t* tensor; |
2005 | CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), ( compiled_data->tensors.accum_gradients[i]) = (compiled_data ->tensors.gradients[i]), (compiled_data->tensors.gradients [i]) = (tensor)); |
2006 | } |
2007 | if (compiled_data->backward.tensor_arena) |
2008 | { |
2009 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena); |
2010 | // Do rebind in case we messed up the binding (we switch accum_gradients and gradients). |
2011 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1); |
2012 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); |
2013 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); |
2014 | } |
2015 | } |
2016 | } |
2017 | const int ingrad_size_per_p = model->output_size; |
2018 | const int outgrad_size_per_p = compiled_data->outgrad_size; |
2019 | int i, j; |
2020 | for (i = 0; i < ingrad_size_per_p; i++) |
2021 | { |
2022 | const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); |
2023 | if (!ingrad_size || !ingrads || ingrads[i] == 0) |
2024 | { |
2025 | // Set it to 1 if it is not specified. |
2026 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad); |
2027 | if (ingrad_tensor) |
2028 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); |
2029 | for (j = 1; j < parallel_count; j++) |
2030 | { |
2031 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j)); |
2032 | if (ingrad_tensor) |
2033 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); |
2034 | } |
2035 | } else { |
2036 | // Make sure the length matches, in case it is an alias. |
2037 | assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model-> graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count (ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params (model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))" , "ccv_cnnp_model.c", 2037, __extension__ __PRETTY_FUNCTION__ ); })); |
2038 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]); |
2039 | for (j = 1; j < parallel_count; j++) |
2040 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]); |
2041 | } |
2042 | } |
2043 | if (outgrad_size > 0) |
2044 | { |
2045 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\"" , "ccv_cnnp_model.c", 2045, __extension__ __PRETTY_FUNCTION__ ); })); |
2046 | for (i = 0; i < outgrad_size_per_p; i++) |
2047 | if (outgrads[i]) |
2048 | { |
2049 | const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i]; |
2050 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]); |
2051 | for (j = 1; j < parallel_count; j++) |
2052 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]); |
2053 | } |
2054 | } else { |
2055 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 2056, __extension__ __PRETTY_FUNCTION__ ); })) |
2056 | compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 2056, __extension__ __PRETTY_FUNCTION__ ); })); |
2057 | } |
2058 | // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients. |
2059 | // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these |
2060 | // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching. |
2061 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); |
2062 | if (!compiled_data->backward.schedule) |
2063 | compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0); |
2064 | // Run the backward pass. |
2065 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context); |
2066 | // If we need to run accumulation round, do that now. |
2067 | if (compiled_data->backward.count > 0) |
2068 | ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context); |
2069 | // Update the count, this determines whether we need to accumulate or not. |
2070 | ++compiled_data->backward.count; |
2071 | } |
2072 | |
2073 | // Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE). |
2074 | // Particularly, this method compiles the parameter update graph. |
2075 | static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model) |
2076 | { |
2077 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2078 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 2078, __extension__ __PRETTY_FUNCTION__ ); })); |
2079 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
2080 | const int parameter_size = compiled_data->parameters->rnum; |
2081 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); |
2082 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); |
2083 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); |
2084 | // Bind accumulated gradients. |
2085 | if (compiled_data->backward.count > 1) |
2086 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds); |
2087 | else |
2088 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); |
2089 | ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0); |
2090 | int i, j; |
2091 | for (i = 0; i < compiled_data->backward.to_size; i++) |
2092 | { |
2093 | const int* tos; |
2094 | int to_size; |
2095 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size); |
2096 | for (j = 0; j < to_size; j++) |
2097 | { |
2098 | // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply |
2099 | // gradients graph. |
2100 | const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ |
2101 | .d = tos[j], |
2102 | .graph = model->graph, |
2103 | }); |
2104 | if (!exec.graph) |
2105 | ccv_array_add_unique_int(apply_gradients_from, tos[j]); |
2106 | } |
2107 | } |
2108 | const int from_size = apply_gradients_from->rnum; |
2109 | if (from_size == 0) |
2110 | { |
2111 | ccv_array_free(apply_gradients_from); |
2112 | ccv_array_free(tensor_binds); |
2113 | return; |
2114 | } |
2115 | ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size); |
2116 | for (i = 0; i < from_size; i++) |
2117 | froms[i] = (ccv_nnc_graph_exec_symbol_t){ |
2118 | .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t )(apply_gradients_from)->rsize * (size_t)(i))), |
2119 | .graph = model->graph |
2120 | }; |
2121 | ccv_array_free(apply_gradients_from); |
2122 | // It can only ends with updates on the parameters. |
2123 | ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0); |
2124 | for (i = 0; i < parameter_size; i++) |
2125 | { |
2126 | if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL) |
2127 | continue; |
2128 | ccv_array_push(tos, &compiled_data->update_nodes[i]); |
2129 | for (j = 1; j < parallel_count; j++) |
2130 | { |
2131 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j); |
2132 | ccv_array_push(tos, ©); |
2133 | } |
2134 | } |
2135 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize * (size_t)(0))), tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena); |
2136 | ccv_array_free(tos); |
2137 | ccv_array_free(tensor_binds); |
2138 | ccfreefree(froms); |
2139 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; |
2140 | for (i = 0; i < max_saved_aux_size * parameter_size; i++) |
2141 | { |
2142 | // Skip on no tensor. |
2143 | if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL) |
2144 | continue; |
2145 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source); |
2146 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); |
2147 | for (j = 1; j < parallel_count; j++) |
2148 | { |
2149 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); |
2150 | if (copy) |
2151 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); |
2152 | } |
2153 | } |
2154 | ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count); |
2155 | } |
2156 | |
2157 | void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context) |
2158 | { |
2159 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2160 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2160, __extension__ __PRETTY_FUNCTION__); })); |
2161 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 2161, __extension__ __PRETTY_FUNCTION__ ); })); |
2162 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
2163 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 2163, __extension__ __PRETTY_FUNCTION__); })); |
2164 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 2164, __extension__ __PRETTY_FUNCTION__ ); })); |
2165 | // Skip if there is no backward pass. |
2166 | if (compiled_data->backward.count <= 0) |
2167 | return; |
2168 | // Skip if there is no parameters. |
2169 | if (compiled_data->parameters->rnum == 0) |
2170 | { |
2171 | compiled_data->backward.count = 0; |
2172 | return; |
2173 | } |
2174 | if (!compiled_data->apply_gradients.graph) |
2175 | _ccv_cnnp_model_multistage_jit_2(model); |
2176 | else { |
2177 | const int parameter_size = compiled_data->parameters->rnum; |
2178 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena); |
2179 | // Change to bind accum_gradients if we do gradient accumulation (run backward more than once). |
2180 | if (compiled_data->backward.count > 1) |
2181 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count); |
2182 | else |
2183 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); |
2184 | } |
2185 | if (compiled_data->apply_gradients.graph) |
2186 | ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context); |
2187 | // Reset backward count to 0. |
2188 | compiled_data->backward.count = 0; |
2189 | } |
2190 | |
2191 | void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor) |
2192 | { |
2193 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2194 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; |
2195 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2195, __extension__ __PRETTY_FUNCTION__ ); })); |
2196 | const int tensors_init = !!compiled_data->tensors_init.v; |
2197 | if (!tensors_init) |
2198 | _ccv_cnnp_model_tensors_init(model, compiled_data); |
2199 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) |
2200 | // Check if it is not fully allocated, if it is not, init_1. |
2201 | ccv_cnnp_model_tensors_init_1(model, compiled_data); |
2202 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); |
2203 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); |
2204 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; |
2205 | if (param_ref < 0) |
2206 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2206 , __extension__ __PRETTY_FUNCTION__); })); } |
2207 | else |
2208 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2208, __extension__ __PRETTY_FUNCTION__ ); })); } |
2209 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); |
2210 | ccv_array_free(parameter_indices); |
2211 | const int parameter_size = compiled_data->parameters->rnum; |
2212 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2212 , __extension__ __PRETTY_FUNCTION__); })); |
2213 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2213, __extension__ __PRETTY_FUNCTION__ ); })); |
2214 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
2215 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); |
2216 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2216, __extension__ __PRETTY_FUNCTION__); })); |
2217 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 ), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); |
2218 | int i; |
2219 | for (i = 1; i < parallel_count; i++) |
2220 | { |
2221 | ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d + i * parameter_size]) & ~(uintptr_t)1)); |
2222 | if (copy_tensor) |
2223 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); |
2224 | } |
2225 | // Mark this symbol as init'ed. |
2226 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( d))))->d; |
2227 | uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
2228 | init_v[s >> 5] |= (1u << (s & 0x1f)); |
2229 | } |
2230 | |
2231 | void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor) |
2232 | { |
2233 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2234 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; |
2235 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2235, __extension__ __PRETTY_FUNCTION__ ); })); |
2236 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2236, __extension__ __PRETTY_FUNCTION__ ); })); |
2237 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); |
2238 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); |
2239 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; |
2240 | if (param_ref < 0) |
2241 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2241 , __extension__ __PRETTY_FUNCTION__); })); } |
2242 | else |
2243 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2243, __extension__ __PRETTY_FUNCTION__ ); })); } |
2244 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); |
2245 | ccv_array_free(parameter_indices); |
2246 | const int parameter_size = compiled_data->parameters->rnum; |
2247 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2247 , __extension__ __PRETTY_FUNCTION__); })); |
2248 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2248, __extension__ __PRETTY_FUNCTION__ ); })); |
2249 | // We don't need to consider parallel_count, every parameter on each device is identical. |
2250 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); |
2251 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2251, __extension__ __PRETTY_FUNCTION__); })); |
2252 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); |
2253 | } |
2254 | |
2255 | ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter) |
2256 | { |
2257 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2258 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; |
2259 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2259, __extension__ __PRETTY_FUNCTION__ ); })); |
2260 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2260, __extension__ __PRETTY_FUNCTION__ ); })); |
2261 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); |
2262 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); |
2263 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; |
2264 | if (param_ref < 0) |
2265 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2265 , __extension__ __PRETTY_FUNCTION__); })); } |
2266 | else |
2267 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2267, __extension__ __PRETTY_FUNCTION__ ); })); } |
2268 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); |
2269 | ccv_array_free(parameter_indices); |
2270 | const int parameter_size = compiled_data->parameters->rnum; |
2271 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2271 , __extension__ __PRETTY_FUNCTION__); })); |
2272 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2272, __extension__ __PRETTY_FUNCTION__ ); })); |
2273 | // We don't need to consider parallel_count, every parameter on each device is identical. |
2274 | ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); |
2275 | assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor ) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2275, __extension__ __PRETTY_FUNCTION__); })); |
2276 | return tensor->info; |
2277 | } |
2278 | |
2279 | const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter) |
2280 | { |
2281 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2282 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; |
2283 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2283, __extension__ __PRETTY_FUNCTION__ ); })); |
2284 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); |
2285 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); |
2286 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; |
2287 | if (param_ref < 0) |
2288 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2288 , __extension__ __PRETTY_FUNCTION__); })); } |
2289 | else |
2290 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2290, __extension__ __PRETTY_FUNCTION__ ); })); } |
2291 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); |
2292 | ccv_array_free(parameter_indices); |
2293 | const int parameter_size = compiled_data->parameters->rnum; |
2294 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2294 , __extension__ __PRETTY_FUNCTION__); })); |
2295 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2295, __extension__ __PRETTY_FUNCTION__ ); })); |
2296 | return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(d))); |
2297 | } |
2298 | |
2299 | int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model) |
2300 | { |
2301 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 2301, __extension__ __PRETTY_FUNCTION__ ); })); |
2302 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2303 | return compiled_data->parameters->rnum; |
2304 | } |
2305 | |
2306 | ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context) |
2307 | { |
2308 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2309 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2309, __extension__ __PRETTY_FUNCTION__); })); |
2310 | const int parameter_size = compiled_data->parameters->rnum; |
2311 | int i; |
2312 | for (i = 0; i < parameter_size; i++) |
2313 | { |
2314 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); |
2315 | if (first(model, name, context)) |
2316 | return ccv_cnnp_model_parameters(model, -1, i); |
2317 | } |
2318 | return 0; |
2319 | } |
2320 | |
2321 | ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context) |
2322 | { |
2323 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2324 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2324, __extension__ __PRETTY_FUNCTION__); })); |
2325 | ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0); |
2326 | const int parameter_size = compiled_data->parameters->rnum; |
2327 | int i; |
2328 | for (i = 0; i < parameter_size; i++) |
2329 | { |
2330 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); |
2331 | if (filter(model, name, context)) |
2332 | { |
2333 | ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i); |
2334 | ccv_array_push(parameters, ¶meter); |
2335 | } |
2336 | } |
2337 | return parameters; |
2338 | |
2339 | } |
2340 | |
2341 | CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model) |
2342 | { |
2343 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; |
2344 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2344, __extension__ __PRETTY_FUNCTION__); })); |
2345 | const int tensors_init = !!compiled_data->tensors_init.v; |
2346 | if (!tensors_init) // If nothing initialized, we return parameter 0. |
2347 | return ccv_cnnp_model_parameters(model, -1, 0); |
2348 | const int parameter_size = compiled_data->parameters->rnum; |
2349 | int i; |
2350 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
2351 | for (i = 0; i < parameter_size; i++) |
2352 | { |
2353 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; |
2354 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) |
2355 | return ccv_cnnp_model_parameters(model, -1, i); |
2356 | } |
2357 | return 0; |
2358 | } |
2359 | |
2360 | static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref) |
2361 | { |
2362 | const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel; |
2363 | assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameters->param_sel != 0) ; else __assert_fail ( "parameters->param_sel != 0", "ccv_cnnp_model.c", 2363, __extension__ __PRETTY_FUNCTION__); })); |
2364 | ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0); |
2365 | ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices); |
2366 | *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref; |
2367 | return to_parameter_indices; |
2368 | } |
2369 | |
2370 | static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0) |
2371 | { |
2372 | // If the model is not compiled yet. Compile them now. |
2373 | if (!model->graph) |
2374 | { |
2375 | model->graph = ccv_nnc_symbolic_graph_new(); |
2376 | assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__ ({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data" , "ccv_cnnp_model.c", 2376, __extension__ __PRETTY_FUNCTION__ ); })); |
2377 | const int input_size = from_model->input_size; |
2378 | ccv_nnc_tensor_param_t input_params[input_size]; |
2379 | int i; |
2380 | for (i = 0; i < input_size; i++) |
2381 | input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]); |
2382 | _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss); |
2383 | model->parallel_count = from_model->parallel_count; |
2384 | model->memory_compression = from_model->memory_compression; |
2385 | model->memory_reduction = from_model->memory_reduction; |
2386 | model->gradient_checkpointing = from_model->gradient_checkpointing; |
2387 | model->compiled_data->stream_type = from_model->compiled_data->stream_type; |
2388 | model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer; |
2389 | model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size; |
2390 | } |
2391 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; |
2392 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2392, __extension__ __PRETTY_FUNCTION__ ); })); |
2393 | const int to_tensors_init = !!to_compiled_data->tensors_init.v; |
2394 | if (!to_tensors_init) |
2395 | { |
2396 | if (only_init_0) |
2397 | ccv_cnnp_model_tensors_init_0(model, to_compiled_data); |
2398 | else |
2399 | _ccv_cnnp_model_tensors_init(model, to_compiled_data); |
2400 | } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1) |
2401 | // Check if it is not fully allocated, if it is not, init_1. |
2402 | ccv_cnnp_model_tensors_init_1(model, to_compiled_data); |
2403 | assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (to_compiled_data->tensors.parameters ) ; else __assert_fail ("to_compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2403, __extension__ __PRETTY_FUNCTION__ ); })); |
2404 | *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref); |
2405 | *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref); |
2406 | if (*from_param_ref < 0 && *param_ref >= 0) |
2407 | { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1 : 0), __extension__ ({ if ((*from_parameter_indices)->rnum == 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1" , "ccv_cnnp_model.c", 2407, __extension__ __PRETTY_FUNCTION__ ); })); } |
2408 | else if (*from_param_ref >= 0) |
2409 | { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices )->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref < (*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum" , "ccv_cnnp_model.c", 2409, __extension__ __PRETTY_FUNCTION__ ); })); } |
2410 | if (*param_ref < 0 && *from_param_ref >= 0) |
2411 | { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0) , __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else __assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c" , 2411, __extension__ __PRETTY_FUNCTION__); })); } |
2412 | else if (*param_ref >= 0) |
2413 | { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum ) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices )->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum" , "ccv_cnnp_model.c", 2413, __extension__ __PRETTY_FUNCTION__ ); })); } |
2414 | } |
2415 | |
2416 | void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) |
2417 | { |
2418 | ccv_array_t* to_parameter_indices; |
2419 | int to_param_ref; |
2420 | ccv_array_t* from_parameter_indices; |
2421 | int from_param_ref; |
2422 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0); |
2423 | // Should be exactly the same tensor. |
2424 | if (to_param_ref < 0 && from_param_ref < 0) |
2425 | { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices ->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices ->rnum == to_parameter_indices->rnum) ; else __assert_fail ("from_parameter_indices->rnum == to_parameter_indices->rnum" , "ccv_cnnp_model.c", 2425, __extension__ __PRETTY_FUNCTION__ ); })); } |
2426 | // To models. |
2427 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; |
2428 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2428, __extension__ __PRETTY_FUNCTION__ ); })); |
2429 | // From models. |
2430 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; |
2431 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); |
2432 | const int to_parameter_size = to_compiled_data->parameters->rnum; |
2433 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1; |
2434 | int i, j; |
2435 | const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init. v) & ~(uintptr_t)1)); |
2436 | uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); |
2437 | for (i = 0; i < rnum; i++) |
2438 | { |
2439 | const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))); |
2440 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2440, __extension__ __PRETTY_FUNCTION__); })); |
2441 | assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters ->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data ->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2441, __extension__ __PRETTY_FUNCTION__ ); })); |
2442 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; |
2443 | // If the original is not init'ed. We cannot copy from. |
2444 | if (!(from_init_v[s >> 5] & (1u << (s & 0x1f)))) |
2445 | continue; |
2446 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); |
2447 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2447, __extension__ __PRETTY_FUNCTION__); })); |
2448 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2448, __extension__ __PRETTY_FUNCTION__ ); })); |
2449 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d]) & ~(uintptr_t)1)); |
2450 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2450, __extension__ __PRETTY_FUNCTION__); })); |
2451 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d]) & ~(uintptr_t)1)); |
2452 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2452, __extension__ __PRETTY_FUNCTION__); })); |
2453 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); |
2454 | for (j = 1; j < parallel_count; j++) |
2455 | { |
2456 | ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t )1)); |
2457 | if (copy_tensor) |
2458 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); |
2459 | } |
2460 | // Mark this symbol as init'ed. |
2461 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; |
2462 | to_init_v[d >> 5] |= (1u << (d & 0x1f)); |
2463 | } |
2464 | ccv_array_free(to_parameter_indices); |
2465 | ccv_array_free(from_parameter_indices); |
2466 | } |
2467 | |
2468 |