File: | nnc/ccv_cnnp_model.c |
Warning: | line 2500, column 25 Array access (via field 'vals') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #include "ccv_internal.h" | |||
5 | #include "_ccv_cnnp_model.h" | |||
6 | #include "_ccv_nnc_graph.h" | |||
7 | ||||
8 | // MARK - Level-5 API | |||
9 | ||||
10 | ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size) | |||
11 | { | |||
12 | if (!model->io) | |||
13 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | |||
14 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size); | |||
15 | model_io->param_ref = 0; | |||
16 | model_io->param_sel = 0; | |||
17 | model_io->visit = 0; | |||
18 | model_io->model = model; | |||
19 | model_io->dependencies = 0; | |||
20 | model_io->dependents = 0; | |||
21 | model_io->outgoings = 0; | |||
22 | model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1); | |||
23 | ccv_array_push(model->io, &model_io); | |||
24 | if (input_size > 0) | |||
25 | { | |||
26 | model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0); | |||
27 | ccv_array_resize(model_io->incomings, input_size); | |||
28 | int i; | |||
29 | memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t )(model_io->incomings)->rsize * (size_t)(0))), inputs, sizeof(ccv_cnnp_model_io_t) * input_size); | |||
30 | for (i = 0; i < input_size; i++) | |||
31 | { | |||
32 | if (!inputs[i]->outgoings) | |||
33 | inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | |||
34 | ccv_array_push(inputs[i]->outgoings, &model_io); | |||
35 | } | |||
36 | } else { | |||
37 | model_io->incomings = 0; | |||
38 | } | |||
39 | return model_io; | |||
40 | } | |||
41 | ||||
42 | void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size) | |||
43 | { | |||
44 | assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__ ({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0" , "ccv_cnnp_model.c", 44, __extension__ __PRETTY_FUNCTION__); })); | |||
45 | if (!model_io->dependencies) | |||
46 | model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0); | |||
47 | int i, j; | |||
48 | for (i = 0; i < dependency_size; i++) | |||
49 | { | |||
50 | int flag = 0; | |||
51 | // Check if it is already exist or not. | |||
52 | for (j = 0; !flag && j < model_io->dependencies->rnum; j++) | |||
53 | if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t )(model_io->dependencies)->rsize * (size_t)(j))) == dependencies[i]) | |||
54 | flag = 1; | |||
55 | if (flag) | |||
56 | continue; | |||
57 | ccv_array_push(model_io->dependencies, dependencies + i); | |||
58 | ++dependencies[i]->dependents; | |||
59 | } | |||
60 | } | |||
61 | ||||
62 | int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model) | |||
63 | { | |||
64 | return model->output_size; | |||
65 | } | |||
66 | ||||
67 | int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model) | |||
68 | { | |||
69 | // If the model is compiled, it is default to 1 unless it is not. | |||
70 | if (model->compiled_data) | |||
71 | return model->is_trainable >= 0 ? model->is_trainable : 1; | |||
72 | return model->is_trainable; | |||
73 | } | |||
74 | ||||
75 | ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index) | |||
76 | { | |||
77 | if (!model->io) | |||
78 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | |||
79 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s)); | |||
80 | model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1; | |||
81 | model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1; | |||
82 | model_io->visit = 0; | |||
83 | model_io->model = model; | |||
84 | model_io->outputs = 0; | |||
85 | model_io->dependencies = 0; | |||
86 | model_io->dependents = 0; | |||
87 | model_io->incomings = 0; | |||
88 | model_io->outgoings = 0; | |||
89 | ccv_array_push(model->io, &model_io); | |||
90 | return model_io; | |||
91 | } | |||
92 | ||||
93 | void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context) | |||
94 | { | |||
95 | model->notify_hook.func = func; | |||
96 | model->notify_hook.context = context; | |||
97 | } | |||
98 | ||||
99 | void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload) | |||
100 | { | |||
101 | if (model->notify_hook.func) | |||
102 | model->notify_hook.func(model, tag, payload, model->notify_hook.context); | |||
103 | if (model->isa->notify) | |||
104 | model->isa->notify(model, tag, payload); | |||
105 | } | |||
106 | ||||
107 | static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size) | |||
108 | { | |||
109 | int i, j; | |||
110 | for (i = 0; i < graph_exec_symbol_size; i++) | |||
111 | { | |||
112 | ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i; | |||
113 | // Check whether this tensor symbol has any duplicate. | |||
114 | for (j = i + 1; j < graph_exec_symbol_size;) | |||
115 | { | |||
116 | ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j; | |||
117 | // If there is a same tensor symbol, remove it. | |||
118 | if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph) | |||
119 | { | |||
120 | if (j + 1 < graph_exec_symbol_size) | |||
121 | *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1]; | |||
122 | --graph_exec_symbol_size; | |||
123 | continue; | |||
124 | } | |||
125 | ++j; | |||
126 | } | |||
127 | } | |||
128 | return graph_exec_symbol_size; | |||
129 | } | |||
130 | ||||
131 | void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable) | |||
132 | { | |||
133 | ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context; | |||
134 | ccv_cnnp_model_t* const model = add_to_array_context->sequence->model; | |||
135 | int i; | |||
136 | if (add_to_array_context->add_parameter_indices && !model->parameter_indices) | |||
137 | model->parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
138 | for (i = 0; i < add_to_array_context->symbols->rnum; i++) | |||
139 | { | |||
140 | const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data )) + (size_t)(add_to_array_context->symbols)->rsize * ( size_t)(i))); | |||
141 | if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph) | |||
142 | { | |||
143 | // Only add to parameter_indices if it is trainable. | |||
144 | if (add_to_array_context->add_parameter_indices) | |||
145 | ccv_array_add_unique_int(model->parameter_indices, i); | |||
146 | // Found it, return, don't add it. | |||
147 | return; | |||
148 | } | |||
149 | } | |||
150 | // Only add to parameter_indices if it is trainable. | |||
151 | if (add_to_array_context->add_parameter_indices) | |||
152 | ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum); | |||
153 | // This is a new one, no need to add_unique_int, it is unique. | |||
154 | ccv_array_push(add_to_array_context->symbols, &symbol); | |||
155 | if (add_to_array_context->trainables) | |||
156 | ccv_array_push(add_to_array_context->trainables, &is_trainable); | |||
157 | char id[2048]; | |||
158 | id[0] = add_to_array_context->prefix; | |||
159 | id[1] = '-'; | |||
160 | int total_len = 2; | |||
161 | for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++) | |||
162 | { | |||
163 | const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences )->data)) + (size_t)(add_to_array_context->sequence-> sequences)->rsize * (size_t)(i))); | |||
164 | int len; | |||
165 | if (name->name && name->name[0] != '\0') | |||
166 | len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence); | |||
167 | else | |||
168 | len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence); | |||
169 | total_len += len; | |||
170 | if (total_len >= 2047) | |||
171 | break; | |||
172 | } | |||
173 | if (total_len < 2047) | |||
174 | total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it); | |||
175 | assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__ ({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048" , "ccv_cnnp_model.c", 175, __extension__ __PRETTY_FUNCTION__) ; })); | |||
176 | char *heap_id = (char*)ccmallocmalloc(total_len + 1); | |||
177 | memcpy(heap_id, id, total_len + 1); | |||
178 | ccv_array_push(add_to_array_context->ids, &heap_id); | |||
179 | ++add_to_array_context->sequence->it; | |||
180 | } | |||
181 | ||||
182 | static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints) | |||
183 | { | |||
184 | compiled_data->f = compiled_data->fits + output_size; | |||
185 | compiled_data->xpu_alloc.mp_hdr = -1; | |||
186 | compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str(); | |||
187 | compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc(); | |||
188 | compiled_data->gradient_checkpoints = gradient_checkpoints; | |||
189 | } | |||
190 | ||||
191 | static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss) | |||
192 | { | |||
193 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 193, __extension__ __PRETTY_FUNCTION__); })); | |||
194 | model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size); | |||
195 | int i; | |||
196 | for (i = 0; i < input_size; i++) | |||
197 | model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0); | |||
198 | ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
199 | ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0); | |||
200 | ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0); | |||
201 | ccv_cnnp_model_sequence_t model_sequence = { | |||
202 | .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank() | |||
203 | }; | |||
204 | ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = { | |||
205 | .add_parameter_indices = 1, | |||
206 | .prefix = 't', | |||
207 | .sequence = &model_sequence, | |||
208 | .symbols = parameters, | |||
209 | .ids = parameter_ids, | |||
210 | .trainables = parameter_trainables, | |||
211 | }; | |||
212 | ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
213 | ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0); | |||
214 | ccv_cnnp_model_add_to_array_context_t add_to_output_context = { | |||
215 | .add_parameter_indices = 0, | |||
216 | .prefix = 'r', | |||
217 | .sequence = &model_sequence, | |||
218 | .symbols = internals, | |||
219 | .ids = internal_ids, | |||
220 | .trainables = 0, | |||
221 | }; | |||
222 | ccv_cnnp_model_build_data_t build_data = { | |||
223 | .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1, | |||
224 | .model_sequence = &model_sequence, | |||
225 | .add_to_array = ccv_cnnp_model_add_to_array, | |||
226 | .parameters = parameters, | |||
227 | .context = { | |||
228 | .add_to_parameter = &add_to_parameter_context, | |||
229 | .add_to_output = &add_to_output_context, | |||
230 | }, | |||
231 | .gradient_checkpoints = 0, | |||
232 | }; | |||
233 | model->data = &build_data; | |||
234 | ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0); | |||
235 | for (i = 0; i < model->output_size; i++) | |||
236 | { | |||
237 | const ccv_nnc_tensor_symbol_t output = model->outputs[i]; | |||
238 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output); | |||
239 | if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
240 | continue; | |||
241 | // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method | |||
242 | // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be | |||
243 | // honest, because we cannot handle cases of alias is part of the original tensor but bind differently). | |||
244 | const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output); | |||
245 | model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0); | |||
246 | ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto , 0), &output, 1, model->outputs + i, 1, "contiguous"); | |||
247 | ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT); | |||
248 | } | |||
249 | model->data = 0; | |||
250 | kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank); | |||
251 | if (model_sequence.sequences) | |||
252 | ccv_array_free(model_sequence.sequences); | |||
253 | // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that. | |||
254 | int not_trainables = 0; | |||
255 | // Assert no parameter is alias. | |||
256 | for (i = 0; i < parameters->rnum; i++) | |||
257 | { | |||
258 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); | |||
259 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter); | |||
260 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 260, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. | |||
261 | if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t )(parameter_trainables)->rsize * (size_t)(i))) == 0) | |||
262 | not_trainables = 1; | |||
263 | } | |||
264 | assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables-> rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables ->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum" , "ccv_cnnp_model.c", 264, __extension__ __PRETTY_FUNCTION__) ; })); | |||
265 | uint64_t* parameter_flags = 0; | |||
266 | if (not_trainables) | |||
267 | { | |||
268 | parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t)); | |||
269 | for (i = 0; i < parameter_trainables->rnum; i++) | |||
270 | if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t )(parameter_trainables)->rsize * (size_t)(i)))) | |||
271 | parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
272 | } | |||
273 | ccv_array_free(parameter_trainables); | |||
274 | // Assert no internal is alias. | |||
275 | for (i = 0; i < internals->rnum; i++) | |||
276 | { | |||
277 | const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals )->rsize * (size_t)(i))); | |||
278 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal); | |||
279 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 279, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. | |||
280 | } | |||
281 | const int output_size = model->output_size; | |||
282 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
283 | const int parameters_rnum = parameters->rnum; | |||
284 | if (input_size > 0) | |||
285 | { | |||
286 | ccv_array_resize(parameters, parameters_rnum + input_size); | |||
287 | memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(parameters_rnum))), model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t)); | |||
288 | } | |||
289 | ccv_nnc_symbolic_graph_simplify(model->graph, | |||
290 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | |||
291 | CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | |||
292 | CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | |||
293 | CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), | |||
294 | ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(0))), parameters_rnum + input_size, | |||
295 | model->outputs, output_size, | |||
296 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
297 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
298 | // Size it down. | |||
299 | parameters->rnum = parameters_rnum; | |||
300 | ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1)); | |||
301 | _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints); | |||
302 | const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph); | |||
303 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 303, __extension__ __PRETTY_FUNCTION__) ; })); | |||
304 | compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); | |||
305 | memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); | |||
306 | compiled_data->loss = loss; | |||
307 | if (loss.cmd == CCV_NNC_NOOP) | |||
308 | { | |||
309 | // If no loss function provided, there is no fits. | |||
310 | for (i = 0; i < output_size; i++) | |||
311 | { | |||
312 | compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
313 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]); | |||
314 | if (alias_to.d < 0) | |||
315 | compiled_data->f[i] = model->outputs[i]; | |||
316 | else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original. | |||
317 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
318 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
319 | ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc); | |||
320 | int j; | |||
321 | for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++) | |||
322 | { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if ( ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c" , 322, __extension__ __PRETTY_FUNCTION__); })); } // There is no ofs. | |||
323 | compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet. | |||
324 | } | |||
325 | } | |||
326 | } else { | |||
327 | for (i = 0; i < output_size; i++) | |||
328 | { | |||
329 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]); | |||
330 | const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0); | |||
331 | compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0); | |||
332 | ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit} , (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, ( 1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
333 | } | |||
334 | } | |||
335 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
336 | ccv_nnc_symbolic_graph_simplify(model->graph, | |||
337 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), // Only do Ops fusion, in this way, we can fuse the loss function. | |||
338 | 0, 0, // No need to provide binds at this point. | |||
339 | compiled_data->f, model->output_size, | |||
340 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
341 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
342 | // If inputs are from GPU, stream type is GPU. | |||
343 | compiled_data->parameters = parameters; | |||
344 | compiled_data->parameter_flags = parameter_flags; | |||
345 | compiled_data->internals = internals; | |||
346 | compiled_data->ids.parameters = parameter_ids; | |||
347 | compiled_data->ids.internals = internal_ids; | |||
348 | ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph); | |||
349 | } | |||
350 | ||||
351 | static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) | |||
352 | { | |||
353 | ccv_array_t* const stack = (ccv_array_t*)context; | |||
354 | ccv_array_push(stack, &symbol.d); | |||
355 | } | |||
356 | ||||
357 | static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) | |||
358 | { | |||
359 | const ccv_nnc_tensor_symbol_t src_symbol = { | |||
360 | .d = src_index, | |||
361 | .graph = src_graph | |||
362 | }; | |||
363 | const ccv_nnc_tensor_symbol_t dest_symbol = { | |||
364 | .d = dest_index, | |||
365 | .graph = dest_graph | |||
366 | }; | |||
367 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); | |||
368 | ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params); | |||
369 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
370 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
371 | if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc)) | |||
372 | ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc); | |||
373 | } | |||
374 | ||||
375 | static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) | |||
376 | { | |||
377 | const ccv_nnc_tensor_symbol_t src_symbol = { | |||
378 | .d = src_index, | |||
379 | .graph = src_graph | |||
380 | }; | |||
381 | const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); | |||
382 | const ccv_nnc_tensor_symbol_t dest_symbol = { | |||
383 | .d = dest_index, | |||
384 | .graph = dest_graph | |||
385 | }; | |||
386 | const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol); | |||
387 | return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0; | |||
388 | } | |||
389 | ||||
390 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size); | |||
391 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data); | |||
392 | ||||
393 | typedef struct { | |||
394 | int parallel_count; | |||
395 | ccv_nnc_symbolic_graph_t* graph; | |||
396 | ccv_nnc_graph_exec_arena_t* graph_exec_arena; | |||
397 | } ccv_nnc_graph_exec_update_t; | |||
398 | ||||
399 | static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint) | |||
400 | { | |||
401 | ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context; | |||
402 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena; | |||
403 | ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol); | |||
404 | ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd); | |||
405 | ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint); | |||
406 | const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph; | |||
407 | const int parallel_count = graph_exec_update->parallel_count; | |||
408 | int i; | |||
409 | for (i = 1; i < parallel_count; i++) | |||
410 | { | |||
411 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i)); | |||
412 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) | |||
413 | { | |||
414 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); | |||
415 | ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint); | |||
416 | } | |||
417 | } | |||
418 | } | |||
419 | ||||
420 | void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size) | |||
421 | { | |||
422 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 422, __extension__ __PRETTY_FUNCTION__); })); | |||
423 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 423, __extension__ __PRETTY_FUNCTION__) ; })); | |||
424 | assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if (!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c" , 424, __extension__ __PRETTY_FUNCTION__); })); | |||
425 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
426 | init->graph = ccv_nnc_symbolic_graph_new(); | |||
427 | ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0); | |||
428 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0); | |||
429 | _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss); | |||
430 | init->parallel_count = model->parallel_count; | |||
431 | init->memory_compression = model->memory_compression; | |||
432 | init->memory_reduction = model->memory_reduction; | |||
433 | init->gradient_checkpointing = model->gradient_checkpointing; | |||
434 | init->compiled_data->stream_type = model->compiled_data->stream_type; | |||
435 | init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer; | |||
436 | init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size; | |||
437 | if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
438 | _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0); | |||
439 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0); | |||
440 | ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0); | |||
441 | int i, j; | |||
442 | // Verify parameters, internals and saved_aux in both graph has the same dimensionality. | |||
443 | for (i = 0; i < compiled_data->parameters->rnum; i++) | |||
444 | { | |||
445 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | |||
446 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 446, __extension__ __PRETTY_FUNCTION__) ; })); | |||
447 | } | |||
448 | for (i = 0; i < compiled_data->internals->rnum; i++) | |||
449 | { | |||
450 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; | |||
451 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 451, __extension__ __PRETTY_FUNCTION__) ; })); | |||
452 | } | |||
453 | // Update inputs. | |||
454 | assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size) ? 1 : 0), __extension__ ({ if (model->input_size == init-> input_size) ; else __assert_fail ("model->input_size == init->input_size" , "ccv_cnnp_model.c", 454, __extension__ __PRETTY_FUNCTION__) ; })); | |||
455 | for (i = 0; i < model->input_size; i++) | |||
456 | if (model->inputs[i].d >= 0) | |||
457 | { | |||
458 | assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0" , "ccv_cnnp_model.c", 458, __extension__ __PRETTY_FUNCTION__) ; })); | |||
459 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d); | |||
460 | } | |||
461 | // Update outputs. | |||
462 | assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size ) ? 1 : 0), __extension__ ({ if (model->output_size == init ->output_size) ; else __assert_fail ("model->output_size == init->output_size" , "ccv_cnnp_model.c", 462, __extension__ __PRETTY_FUNCTION__) ; })); | |||
463 | for (i = 0; i < model->output_size; i++) | |||
464 | { | |||
465 | if (model->outputs[i].d >= 0) | |||
466 | { | |||
467 | assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->outputs[i].d >= 0) ; else __assert_fail ( "init->outputs[i].d >= 0", "ccv_cnnp_model.c", 467, __extension__ __PRETTY_FUNCTION__); })); | |||
468 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d); | |||
469 | } | |||
470 | if (model->outputs[i].d != model->compiled_data->f[i].d) | |||
471 | { | |||
472 | assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data ->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[ i].d != init->compiled_data->f[i].d) ; else __assert_fail ("init->outputs[i].d != init->compiled_data->f[i].d" , "ccv_cnnp_model.c", 472, __extension__ __PRETTY_FUNCTION__) ; })); | |||
473 | if (model->compiled_data->f[i].d >= 0) | |||
474 | { | |||
475 | assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->compiled_data->f[i] .d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0" , "ccv_cnnp_model.c", 475, __extension__ __PRETTY_FUNCTION__) ; })); | |||
476 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d); | |||
477 | } | |||
478 | } | |||
479 | } | |||
480 | // Go through the graph to set tensor on matching symbols | |||
481 | for (i = 0; i < stack->rnum; i++) | |||
482 | { | |||
483 | const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize * (size_t)(i))); | |||
484 | // If exceed range, skip. | |||
485 | if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) || | |||
486 | d >= ccv_nnc_graph_exec_symbol_count(model->graph)) | |||
487 | continue; | |||
488 | const ccv_nnc_graph_exec_symbol_t src_symbol = { | |||
489 | .d = d, | |||
490 | .graph = init->graph | |||
491 | }; | |||
492 | const ccv_nnc_graph_exec_symbol_t dest_symbol = { | |||
493 | .d = d, | |||
494 | .graph = model->graph | |||
495 | }; | |||
496 | const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol); | |||
497 | const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol); | |||
498 | // If the name doesn't match, skip. | |||
499 | if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP) | |||
500 | continue; | |||
501 | // Now get all the inputs and outputs, if matches, set them. | |||
502 | const int* src_inputs; | |||
503 | int src_input_size; | |||
504 | const int* src_outputs; | |||
505 | int src_output_size; | |||
506 | ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size); | |||
507 | const int* dest_inputs; | |||
508 | int dest_input_size; | |||
509 | const int* dest_outputs; | |||
510 | int dest_output_size; | |||
511 | ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size); | |||
512 | // We may have unmatched input / output size because this is the minimizer and it has | |||
513 | // different saved_aux (for example, when we shrunk with CMD_NOOP). | |||
514 | if (src_input_size != dest_input_size) | |||
515 | continue; | |||
516 | if (src_output_size != dest_output_size) | |||
517 | continue; | |||
518 | ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd); | |||
519 | // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because | |||
520 | // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original | |||
521 | // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That | |||
522 | // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as | |||
523 | // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec | |||
524 | // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not | |||
525 | // a new exec symbol. | |||
526 | for (j = 0; j < src_input_size; j++) | |||
527 | if (src_inputs[j] >= 0) | |||
528 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]); | |||
529 | for (j = 0; j < src_output_size; j++) | |||
530 | if (src_outputs[j] >= 0) | |||
531 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]); | |||
532 | } | |||
533 | ccv_array_free(stack); | |||
534 | // After this, we get all tensors in the model graph resolved through tensor_auto. | |||
535 | ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0); | |||
536 | // Verify symbols we get matches. | |||
537 | const int parameter_size = compiled_data->parameters->rnum; | |||
538 | for (i = 0; i < parameter_size; i++) | |||
539 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->parameters)->data)) + (size_t)(compiled_data ->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if ( ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data-> parameters)->data)) + (size_t)(compiled_data->parameters )->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d" , "ccv_cnnp_model.c", 539, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
540 | const int internal_size = compiled_data->internals->rnum; | |||
541 | for (i = 0; i < internal_size; i++) | |||
542 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->internals)->data)) + (size_t)(compiled_data ->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->internals)-> data)) + (size_t)(init->compiled_data->internals)->rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((compiled_data->internals)->data)) + (size_t)(compiled_data->internals)->rsize * (size_t)(i ))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init ->compiled_data->internals)->data)) + (size_t)(init-> compiled_data->internals)->rsize * (size_t)(i))))->d ) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d" , "ccv_cnnp_model.c", 542, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
543 | // Go through compiled data. | |||
544 | if (compiled_data->tensor_arena) | |||
545 | { | |||
546 | const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph); | |||
547 | if (flag == 0 && compiled_data->graph_exec_arena) | |||
548 | { | |||
549 | ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph); | |||
550 | // Since we will reinit, if we previously set is_test, we need to set it again. | |||
551 | if (compiled_data->is_test) | |||
552 | { | |||
553 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
554 | ccv_nnc_graph_exec_update_t update = { | |||
555 | .parallel_count = parallel_count, | |||
556 | .graph = model->graph, | |||
557 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
558 | }; | |||
559 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); | |||
560 | } | |||
561 | } else | |||
562 | // Free-up tensor arena & graph exec arena. | |||
563 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
564 | } | |||
565 | // There are other compiled graphs, for accum and apply gradients. | |||
566 | // However, the main conclusion is, these absorb operations shouldn't impact parameters. | |||
567 | // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we | |||
568 | // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot | |||
569 | // be changed otherwise parameters' shape will be meaningless. The same goes to internals. | |||
570 | // That is why we don't update these compiled graphs at all this point. | |||
571 | // Free the model, we've already "absorbed" it. | |||
572 | ccv_cnnp_model_free(init); | |||
573 | } | |||
574 | ||||
575 | void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss) | |||
576 | { | |||
577 | assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model-> input_size == 0) ? 1 : 0), __extension__ ({ if (input_size == model->input_size || model->input_size == 0) ; else __assert_fail ("input_size == model->input_size || model->input_size == 0" , "ccv_cnnp_model.c", 577, __extension__ __PRETTY_FUNCTION__) ; })); | |||
578 | if (model->input_size == 0) | |||
579 | model->input_size = input_size; | |||
580 | if (!model->graph) // The graph is not compiled yet. | |||
581 | { | |||
582 | model->graph = ccv_nnc_symbolic_graph_new(); | |||
583 | _ccv_cnnp_model_compile(model, inputs, input_size, loss); | |||
584 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 584, __extension__ __PRETTY_FUNCTION__) ; })); | |||
585 | int i, flag = 0; | |||
586 | for (i = 0; !flag && i < input_size; i++) | |||
587 | flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY); | |||
588 | // If inputs are from GPU, stream type is GPU. | |||
589 | model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
590 | model->compiled_data->minimize.minimizer = minimizer; | |||
591 | model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer); | |||
592 | } else { | |||
593 | // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model. | |||
594 | // And then absorb the "new model" to the old one. | |||
595 | ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable); | |||
596 | ccv_cnnp_model_absorb(model, init, inputs, input_size); | |||
597 | // Reset minimizer. | |||
598 | ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0); | |||
599 | } | |||
600 | } | |||
601 | ||||
602 | ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable) | |||
603 | { | |||
604 | ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0); | |||
605 | new_model->is_trainable = is_trainable; | |||
606 | return new_model; | |||
607 | } | |||
608 | ||||
609 | void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size) | |||
610 | { | |||
611 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 611, __extension__ __PRETTY_FUNCTION__); })); | |||
612 | assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0 ), __extension__ ({ if (output_size == model->output_size) ; else __assert_fail ("output_size == model->output_size" , "ccv_cnnp_model.c", 612, __extension__ __PRETTY_FUNCTION__) ; })); | |||
613 | ccv_nnc_symbolic_graph_t* const graph = model->graph; | |||
614 | ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0); | |||
615 | int i; | |||
616 | for (i = 0; i < output_size; i++) | |||
617 | { | |||
618 | assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL" , "ccv_cnnp_model.c", 618, __extension__ __PRETTY_FUNCTION__) ; })); | |||
619 | outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]); | |||
620 | } | |||
621 | } | |||
622 | ||||
623 | void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size) | |||
624 | { | |||
625 | if (workspace_size == model->workspace_size) | |||
626 | return; | |||
627 | model->workspace_size = workspace_size; | |||
628 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
629 | if (compiled_data && compiled_data->graph) | |||
630 | ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
631 | } | |||
632 | ||||
633 | size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model) | |||
634 | { | |||
635 | return model->workspace_size; | |||
636 | } | |||
637 | ||||
638 | void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel) | |||
639 | { | |||
640 | if (parallel == 0) | |||
641 | model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); | |||
642 | else | |||
643 | model->parallel_count = parallel; | |||
644 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
645 | if (compiled_data) | |||
646 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 646, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
647 | } | |||
648 | ||||
649 | void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count) | |||
650 | { | |||
651 | model->max_stream_count = max_stream_count; | |||
652 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
653 | if (compiled_data) | |||
654 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 654, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
655 | } | |||
656 | ||||
657 | void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression) | |||
658 | { | |||
659 | model->memory_compression = memory_compression; | |||
660 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
661 | if (compiled_data) | |||
662 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 662, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
663 | } | |||
664 | ||||
665 | void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction) | |||
666 | { | |||
667 | model->memory_reduction = memory_reduction; | |||
668 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
669 | if (compiled_data) | |||
670 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 670, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
671 | } | |||
672 | ||||
673 | void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing) | |||
674 | { | |||
675 | model->gradient_checkpointing = gradient_checkpointing; | |||
676 | } | |||
677 | ||||
678 | int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model) | |||
679 | { | |||
680 | return model->gradient_checkpointing; | |||
681 | } | |||
682 | ||||
683 | typedef struct { | |||
684 | int parallel_count; | |||
685 | ccv_nnc_symbolic_graph_t* graph; | |||
686 | ccv_cnnp_compiled_data_t* compiled_data; | |||
687 | ccv_nnc_tensor_arena_t* tensor_arena; | |||
688 | } ccv_nnc_tensor_init_states_t; | |||
689 | ||||
690 | static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data) | |||
691 | { | |||
692 | int i; | |||
693 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
694 | for (i = 0; i < compiled_data->parameters->rnum; i++) | |||
695 | { | |||
696 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | |||
697 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) | |||
698 | return 1; | |||
699 | } | |||
700 | for (i = 0; i < compiled_data->internals->rnum; i++) | |||
701 | { | |||
702 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; | |||
703 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) | |||
704 | return 1; | |||
705 | } | |||
706 | return 0; | |||
707 | } | |||
708 | ||||
709 | static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol) | |||
710 | { | |||
711 | ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context; | |||
712 | ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena; | |||
713 | ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol); | |||
714 | if (!output_tensor) | |||
715 | return; | |||
716 | const int d = output_symbol.d; | |||
717 | assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data-> tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states ->compiled_data->tensors_init.size) ; else __assert_fail ("d < tensor_init_states->compiled_data->tensors_init.size" , "ccv_cnnp_model.c", 717, __extension__ __PRETTY_FUNCTION__) ; })); | |||
718 | uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data ->tensors_init.v) & ~(uintptr_t)1)); | |||
719 | if (init_v[d >> 5] & (1u << (d & 0x1f))) | |||
720 | return; | |||
721 | init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
722 | ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0); | |||
723 | const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph; | |||
724 | const int parallel_count = tensor_init_states->parallel_count; | |||
725 | int i; | |||
726 | for (i = 1; i < parallel_count; i++) | |||
727 | { | |||
728 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i)); | |||
729 | if (copy) | |||
730 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &output_tensor, 1, ©, 1, 0); | |||
731 | } | |||
732 | } | |||
733 | ||||
734 | // This method can only handle cases we added new tensors and exec, never delete. This invariant is true because | |||
735 | // we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup. | |||
736 | static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model) | |||
737 | { | |||
738 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 738, __extension__ __PRETTY_FUNCTION__); })); | |||
739 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 739, __extension__ __PRETTY_FUNCTION__) ; })); | |||
740 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
741 | assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__ ({ if (compiled_data->rewindables) ; else __assert_fail ( "compiled_data->rewindables", "ccv_cnnp_model.c", 741, __extension__ __PRETTY_FUNCTION__); })); | |||
742 | int i; | |||
743 | for (i = 0; i < compiled_data->rewindables->rnum; i++) | |||
744 | { | |||
745 | const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) + (size_t)(compiled_data->rewindables)->rsize * (size_t) (i))); | |||
746 | if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC) | |||
747 | ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec); | |||
748 | else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR) | |||
749 | ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor); | |||
750 | } | |||
751 | ccv_array_clear(compiled_data->rewindables); | |||
752 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
753 | } | |||
754 | ||||
755 | static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name) | |||
756 | { | |||
757 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | |||
758 | .type = CCV_CNNP_REWIND_TENSOR, | |||
759 | .tensor = symbol | |||
760 | }; | |||
761 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | |||
762 | ccv_array_push(rewind_symbols, &rewind_symbol); | |||
763 | } | |||
764 | ||||
765 | static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name) | |||
766 | { | |||
767 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | |||
768 | .type = CCV_CNNP_REWIND_TENSOR, | |||
769 | .tensor = symbol | |||
770 | }; | |||
771 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | |||
772 | ccv_array_push(rewind_symbols, &rewind_symbol); | |||
773 | } | |||
774 | ||||
775 | static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) | |||
776 | { | |||
777 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | |||
778 | .type = CCV_CNNP_REWIND_GRAPH_EXEC, | |||
779 | .graph_exec = symbol | |||
780 | }; | |||
781 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | |||
782 | ccv_array_push(rewind_symbols, &rewind_symbol); | |||
783 | } | |||
784 | ||||
785 | static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph) | |||
786 | { | |||
787 | ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol); | |||
788 | if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0)) | |||
789 | ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd); | |||
790 | int i; | |||
791 | for (i = 1; i < parallel_count; i++) | |||
792 | { | |||
793 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); | |||
794 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol); | |||
795 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) | |||
796 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); | |||
797 | } | |||
798 | } | |||
799 | ||||
800 | static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd) | |||
801 | { | |||
802 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 802, __extension__ __PRETTY_FUNCTION__); })); | |||
803 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 803, __extension__ __PRETTY_FUNCTION__); })); | |||
804 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd); | |||
805 | int i; | |||
806 | for (i = 1; i < parallel_count; i++) | |||
807 | { | |||
808 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); | |||
809 | if (copy_symbol.graph) | |||
810 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd); | |||
811 | } | |||
812 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena; | |||
813 | if (graph_exec_arena) | |||
814 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); | |||
815 | // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph) | |||
816 | ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena; | |||
817 | if (gradient_graph_exec_arena) | |||
818 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); | |||
819 | } | |||
820 | ||||
821 | static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice) | |||
822 | { | |||
823 | int this_parameter_flag = 0; | |||
824 | if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
825 | return this_parameter_flag; | |||
826 | const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]); | |||
827 | int j, k; | |||
828 | // For no-op, we can preserve previous saved_aux_size. | |||
829 | if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP) | |||
830 | { | |||
831 | // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous | |||
832 | // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between | |||
833 | // noop and a minimizer. We don't want that because we do that in high-level frameworks to | |||
834 | // make sure some model parameters don't update if we don't want them to. | |||
835 | int old_saved_aux_size; | |||
836 | if (old_minimizer.cmd == CCV_NNC_NOOP) | |||
837 | { | |||
838 | int input_size; | |||
839 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0); | |||
840 | if (input_size < 2) // This is not legit. | |||
841 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); | |||
842 | else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters. | |||
843 | old_saved_aux_size = input_size - 2; | |||
844 | } else | |||
845 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); | |||
846 | if (old_saved_aux_size != saved_aux_size) | |||
847 | { | |||
848 | this_parameter_flag = 1; | |||
849 | if (saved_aux_size > old_saved_aux_size) | |||
850 | { | |||
851 | // Allocate new tensor symbols. | |||
852 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]); | |||
853 | for (j = old_saved_aux_size; j < saved_aux_size; j++) | |||
854 | { | |||
855 | saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0); | |||
856 | saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0); | |||
857 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
858 | for (k = 1; k < parallel_count; k++) | |||
859 | { | |||
860 | ccv_nnc_tensor_param_t dev_info = info; | |||
861 | if (k != device_id) | |||
862 | CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) & 0xfff) << 8)); | |||
863 | else | |||
864 | CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) & 0xfff) << 8)); | |||
865 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); | |||
866 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); | |||
867 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy); | |||
868 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy); | |||
869 | } | |||
870 | } | |||
871 | } else { | |||
872 | for (j = saved_aux_size; j < old_saved_aux_size; j++) | |||
873 | { | |||
874 | for (k = 1; k < parallel_count; k++) | |||
875 | { | |||
876 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); | |||
877 | if (src_copy.d >= 0) | |||
878 | { | |||
879 | ccv_nnc_tensor_symbol_free(graph, src_copy); | |||
880 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
881 | } | |||
882 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); | |||
883 | if (dest_copy.d >= 0) | |||
884 | { | |||
885 | ccv_nnc_tensor_symbol_free(graph, dest_copy); | |||
886 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
887 | } | |||
888 | } | |||
889 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source); | |||
890 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination); | |||
891 | saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
892 | } | |||
893 | } | |||
894 | } | |||
895 | } | |||
896 | _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer); | |||
897 | if (this_parameter_flag) | |||
898 | { | |||
899 | ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2]; | |||
900 | ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1]; | |||
901 | const int* inputs = 0; | |||
902 | int input_size = 0; | |||
903 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0); | |||
904 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 904, __extension__ __PRETTY_FUNCTION__) ; })); | |||
905 | update_inputs[0].d = inputs[0]; | |||
906 | update_inputs[0].graph = graph; | |||
907 | update_inputs[1].d = inputs[1]; | |||
908 | update_inputs[1].graph = graph; | |||
909 | update_outputs[0] = updated_parameters[parameter_indice]; | |||
910 | for (j = 0; j < saved_aux_size; j++) | |||
911 | { | |||
912 | update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source; | |||
913 | update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination; | |||
914 | } | |||
915 | ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); | |||
916 | for (k = 1; k < parallel_count; k++) | |||
917 | { | |||
918 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k); | |||
919 | assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if (copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c" , 919, __extension__ __PRETTY_FUNCTION__); })); | |||
920 | ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0); | |||
921 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 921, __extension__ __PRETTY_FUNCTION__) ; })); | |||
922 | update_inputs[0].d = inputs[0]; | |||
923 | update_inputs[0].graph = graph; | |||
924 | update_inputs[1].d = inputs[1]; | |||
925 | update_inputs[1].graph = graph; | |||
926 | update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k); | |||
927 | for (j = 0; j < saved_aux_size; j++) | |||
928 | { | |||
929 | update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); | |||
930 | update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); | |||
931 | } | |||
932 | ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); | |||
933 | } | |||
934 | } | |||
935 | return this_parameter_flag; | |||
936 | } | |||
937 | ||||
938 | typedef struct { | |||
939 | int parameter_size; | |||
940 | ccv_nnc_cmd_t minimizer; | |||
941 | ccv_cnnp_model_io_t parameters[1]; | |||
942 | } ccv_cnnp_set_minimizer_for_parameter_t; | |||
943 | ||||
944 | static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model) | |||
945 | { | |||
946 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
947 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 947, __extension__ __PRETTY_FUNCTION__); })); | |||
948 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
949 | // We update all parameters, at this point, we have one minimizer. | |||
950 | const int parameter_size = compiled_data->parameters->rnum; | |||
951 | ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes; | |||
952 | ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph; | |||
953 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 953, __extension__ __PRETTY_FUNCTION__); })); | |||
954 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
955 | ccv_array_t* const parameters = compiled_data->minimize.parameters; | |||
956 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
957 | int i, j, flag = 0; | |||
958 | for (i = 0; i < parameters->rnum; i++) | |||
959 | { | |||
960 | ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); | |||
961 | for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++) | |||
962 | { | |||
963 | const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel; | |||
964 | assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_sel != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_sel != 0" , "ccv_cnnp_model.c", 964, __extension__ __PRETTY_FUNCTION__) ; })); | |||
965 | const int old_rnum = parameter_indices->rnum; | |||
966 | ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices); | |||
967 | const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref; | |||
968 | assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_ref != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_ref != 0" , "ccv_cnnp_model.c", 968, __extension__ __PRETTY_FUNCTION__) ; })); | |||
969 | if (param_ref >= 0) | |||
970 | { | |||
971 | assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices-> rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum < parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum" , "ccv_cnnp_model.c", 971, __extension__ __PRETTY_FUNCTION__) ; })); | |||
972 | *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(old_rnum))) = *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref + old_rnum))); | |||
973 | parameter_indices->rnum = old_rnum + 1; | |||
974 | } | |||
975 | } | |||
976 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer); | |||
977 | // We may have duplicated indices, but that is OK, we will set it twice. | |||
978 | for (j = 0; j < parameter_indices->rnum; j++) | |||
979 | { | |||
980 | const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(j))); | |||
981 | assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__ ({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size" , "ccv_cnnp_model.c", 981, __extension__ __PRETTY_FUNCTION__) ; })); | |||
982 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d)) | |||
983 | flag = 1; | |||
984 | } | |||
985 | ccv_array_clear(parameter_indices); | |||
986 | } | |||
987 | ccv_array_free(parameter_indices); | |||
988 | return flag; | |||
989 | } | |||
990 | ||||
991 | static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size) | |||
992 | { | |||
993 | if (new_saved_aux_size == old_saved_aux_size) | |||
994 | return; | |||
995 | assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ? 1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size ) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size" , "ccv_cnnp_model.c", 995, __extension__ __PRETTY_FUNCTION__) ; })); | |||
996 | int i, j; | |||
997 | for (i = parameter_size - 1; i >= 0; i--) | |||
998 | { | |||
999 | for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--) | |||
1000 | saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
1001 | for (j = old_saved_aux_size - 1; j >= 0; j--) | |||
1002 | saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j]; | |||
1003 | } | |||
1004 | } | |||
1005 | ||||
1006 | static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model) | |||
1007 | { | |||
1008 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1009 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1009, __extension__ __PRETTY_FUNCTION__); })); | |||
1010 | if (!compiled_data->rewindables) | |||
1011 | compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0); | |||
1012 | ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0); | |||
1013 | ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0); | |||
1014 | ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0); | |||
1015 | } | |||
1016 | ||||
1017 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size) | |||
1018 | { | |||
1019 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1020 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 1020, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1021 | assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 1021, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1022 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
1023 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 1023, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1024 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1025 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); | |||
1026 | compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count); | |||
1027 | int i, j; | |||
1028 | const int output_size = model->output_size; | |||
1029 | assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size * parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count" , "ccv_cnnp_model.c", 1029, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1030 | if (fits) | |||
1031 | for (i = 0; i < output_size; i++) | |||
1032 | ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info); | |||
1033 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
1034 | const int parameter_size = compiled_data->parameters->rnum; | |||
1035 | compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size); | |||
1036 | compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size); | |||
1037 | compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size); | |||
1038 | int parameter_size_maybe_more = parameter_size; | |||
1039 | compiled_data->disable_outgrad = disable_outgrad; | |||
1040 | int outgrad_size; | |||
1041 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0) | |||
1042 | outgrad_size = 0; | |||
1043 | else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs. | |||
1044 | outgrad_size = model->input_size; | |||
1045 | else { | |||
1046 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 1046, __extension__ __PRETTY_FUNCTION__ ); })); // If it is disable all, gradient mode won't be this. | |||
1047 | outgrad_size = 0; | |||
1048 | for (i = 0; i < model->input_size; i++) | |||
1049 | if (!(disable_outgrad & ((uint64_t)1 << i))) | |||
1050 | ++outgrad_size; | |||
1051 | } | |||
1052 | compiled_data->outgrad_size = outgrad_size; | |||
1053 | parameter_size_maybe_more += outgrad_size; | |||
1054 | compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count); | |||
1055 | compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0; | |||
1056 | compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more); | |||
1057 | compiled_data->backward.to_size = parameter_size_maybe_more; | |||
1058 | ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))); | |||
1059 | if (compiled_data->parameter_flags) | |||
1060 | { | |||
1061 | parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size); | |||
1062 | for (i = 0; i < parameter_size; i++) | |||
1063 | if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
1064 | parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | |||
1065 | else | |||
1066 | parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
1067 | } | |||
1068 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0) | |||
1069 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | |||
1070 | else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs. | |||
1071 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | |||
1072 | else { // Compute minimize with gradients including selected inputs. | |||
1073 | assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__ ({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0" , "ccv_cnnp_model.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1074 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 1074, __extension__ __PRETTY_FUNCTION__ ); })); // If it is disable all, gradient mode won't be this. | |||
1075 | assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__ ({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0" , "ccv_cnnp_model.c", 1075, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1076 | ccv_nnc_tensor_symbol_t outgrads[outgrad_size]; | |||
1077 | j = 0; | |||
1078 | for (i = 0; i < model->input_size; i++) | |||
1079 | if (!(disable_outgrad & ((uint64_t)1 << i))) | |||
1080 | outgrads[j++] = model->inputs[i]; | |||
1081 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | |||
1082 | } | |||
1083 | if (compiled_data->parameter_flags) | |||
1084 | ccfreefree(parameters); | |||
1085 | _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size); | |||
1086 | if (compiled_data->minimize.parameters) | |||
1087 | _ccv_cnnp_apply_parameters_with_minimizer(model); | |||
1088 | // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass. | |||
1089 | ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph); | |||
1090 | for (i = 0; i < output_size; i++) | |||
1091 | { | |||
1092 | const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); | |||
1093 | // Init this to 1 so we can backprop. | |||
1094 | ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES); | |||
1095 | } | |||
1096 | compiled_data->backward.to_size = 0; | |||
1097 | for (i = 0; i < parameter_size_maybe_more; i++) | |||
1098 | if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
1099 | compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]); | |||
1100 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS); | |||
1101 | ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size); | |||
1102 | for (i = 0; i < parameter_size_maybe_more - parameter_size; i++) | |||
1103 | { | |||
1104 | if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads. | |||
1105 | continue; | |||
1106 | const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]); | |||
1107 | const int* tos; | |||
1108 | int to_size; | |||
1109 | ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size); | |||
1110 | if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes. | |||
1111 | { | |||
1112 | const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph); | |||
1113 | const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph); | |||
1114 | int flag = 0; | |||
1115 | const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = ( destination_count - i); (_a > _b) ? _a : _b; }); | |||
1116 | for (j = i - 1; !flag && j >= 0; j--) | |||
1117 | if (j + outgrad_destination_start < destination_count) | |||
1118 | flag = (destinations[j + outgrad_destination_start].d == outgrad.d); | |||
1119 | if (!flag) // Only if we cannot find it, we add it. | |||
1120 | ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad); | |||
1121 | } | |||
1122 | } | |||
1123 | if (parallel_count > 1) | |||
1124 | { | |||
1125 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, | |||
1126 | 0, 0, | |||
1127 | compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */, | |||
1128 | compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */, | |||
1129 | 0, 0, 0, | |||
1130 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, | |||
1131 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
1132 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
1133 | for (i = 0; i < evaluate_to_size; i++) | |||
1134 | for (j = 1; j < parallel_count; j++) | |||
1135 | { | |||
1136 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); | |||
1137 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | |||
1138 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; | |||
1139 | } | |||
1140 | const int backward_to_size = compiled_data->backward.to_size; | |||
1141 | for (i = 0; i < backward_to_size; i++) | |||
1142 | for (j = 1; j < parallel_count; j++) | |||
1143 | { | |||
1144 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j); | |||
1145 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | |||
1146 | compiled_data->backward.tos[compiled_data->backward.to_size++] = copy; | |||
1147 | } | |||
1148 | } | |||
1149 | // Only use memory compression if we are in gradient parameter mode. | |||
1150 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS) | |||
1151 | { | |||
1152 | if (model->memory_compression) | |||
1153 | ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
1154 | if (model->memory_reduction) | |||
1155 | ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
1156 | } | |||
1157 | compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size); | |||
1158 | compiled_data->gradient_mode = gradient_mode; | |||
1159 | } | |||
1160 | ||||
1161 | void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
1162 | { | |||
1163 | assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (!compiled_data->tensors.parameters ) ; else __assert_fail ("!compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 1163, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1164 | const int parameter_size = compiled_data->parameters->rnum; | |||
1165 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1166 | const int internal_size = compiled_data->internals->rnum; | |||
1167 | compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph); | |||
1168 | compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t)); | |||
1169 | compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*)); | |||
1170 | compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count; | |||
1171 | } | |||
1172 | ||||
1173 | int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
1174 | { | |||
1175 | int i, j; | |||
1176 | const int parameter_size = compiled_data->parameters->rnum; | |||
1177 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1178 | const int internal_size = compiled_data->internals->rnum; | |||
1179 | for (i = 0; i < parameter_size; i++) | |||
1180 | { | |||
1181 | // parameters has to be allocated all together. | |||
1182 | if (compiled_data->tensors.parameters[i]) | |||
1183 | { | |||
1184 | for (j = 1; j < parallel_count; j++) | |||
1185 | { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j * parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data ->tensors.parameters[i + j * parameter_size]) ; else __assert_fail ("compiled_data->tensors.parameters[i + j * parameter_size]" , "ccv_cnnp_model.c", 1185, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
1186 | continue; | |||
1187 | } | |||
1188 | return 1; | |||
1189 | } | |||
1190 | for (i = 0; i < internal_size; i++) | |||
1191 | { | |||
1192 | if (!compiled_data->tensors.internals[i]) | |||
1193 | return 1; | |||
1194 | for (j = 1; j < parallel_count; j++) | |||
1195 | if (!compiled_data->tensors.internals[i + j * internal_size]) | |||
1196 | return 1; | |||
1197 | } | |||
1198 | return 0; | |||
1199 | } | |||
1200 | ||||
1201 | void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
1202 | { | |||
1203 | int i, j; | |||
1204 | const int parameter_size = compiled_data->parameters->rnum; | |||
1205 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1206 | const int internal_size = compiled_data->internals->rnum; | |||
1207 | for (i = 0; i < parameter_size; i++) | |||
1208 | { | |||
1209 | // parameters has to be allocated all together. | |||
1210 | if (compiled_data->tensors.parameters[i]) | |||
1211 | { | |||
1212 | for (j = 1; j < parallel_count; j++) | |||
1213 | { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j * parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data ->tensors.parameters[i + j * parameter_size]) ; else __assert_fail ("compiled_data->tensors.parameters[i + j * parameter_size]" , "ccv_cnnp_model.c", 1213, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
1214 | continue; | |||
1215 | } | |||
1216 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | |||
1217 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); | |||
1218 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
1219 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
1220 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
1221 | compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0); | |||
1222 | for (j = 1; j < parallel_count; j++) | |||
1223 | { | |||
1224 | if (j != device_id) | |||
1225 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | |||
1226 | else | |||
1227 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
1228 | compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | |||
1229 | } | |||
1230 | } | |||
1231 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
1232 | for (i = 0; i < internal_size; i++) | |||
1233 | { | |||
1234 | const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ); | |||
1235 | const int d = retained.d; | |||
1236 | if (init_v[d >> 5] & (1u << (d & 0x1f))) | |||
1237 | continue; | |||
1238 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained); | |||
1239 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
1240 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
1241 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
1242 | if (!compiled_data->tensors.internals[i]) | |||
1243 | compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0); | |||
1244 | for (j = 1; j < parallel_count; j++) | |||
1245 | { | |||
1246 | if (j != device_id) | |||
1247 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | |||
1248 | else | |||
1249 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
1250 | if (!compiled_data->tensors.internals[i + j * internal_size]) | |||
1251 | compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0); | |||
1252 | } | |||
1253 | } | |||
1254 | compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); // Remove 1 if any. | |||
1255 | } | |||
1256 | ||||
1257 | static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
1258 | { | |||
1259 | ccv_cnnp_model_tensors_init_0(model, compiled_data); | |||
1260 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
1261 | } | |||
1262 | ||||
1263 | static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) | |||
1264 | { | |||
1265 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1265, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1266 | int i, j; | |||
1267 | for (i = 0; i < tensor_size; i++) | |||
1268 | { | |||
1269 | if (!tensors[i]) | |||
1270 | continue; | |||
1271 | const int d = tensor_symbols[i].d; | |||
1272 | if (!(tensors_init[d >> 5] & (1u << (d & 0x1f)))) | |||
1273 | continue; | |||
1274 | for (j = 1; j < parallel_count; j++) | |||
1275 | if (tensors[i + j * tensor_size]) | |||
1276 | { | |||
1277 | ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t )1)); | |||
1278 | ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size] ) & ~(uintptr_t)1)); | |||
1279 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0); | |||
1280 | } | |||
1281 | } | |||
1282 | } | |||
1283 | ||||
1284 | static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count) | |||
1285 | { | |||
1286 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1286, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1287 | int i, j; | |||
1288 | for (i = 0; i < tensor_size; i++) | |||
1289 | { | |||
1290 | const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | |||
1291 | for (j = 1; j < parallel_count; j++) | |||
1292 | { | |||
1293 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | |||
1294 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; | |||
1295 | if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
1296 | { // We shouldn't allocate this, free it up. | |||
1297 | ccv_nnc_tensor_free(tensors[i + j * tensor_size]); | |||
1298 | tensors[i + j * tensor_size] = 0; | |||
1299 | } | |||
1300 | } | |||
1301 | } | |||
1302 | } | |||
1303 | ||||
1304 | static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds) | |||
1305 | { | |||
1306 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1306, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1307 | int i, j; | |||
1308 | for (i = 0; i < tensor_size; i++) | |||
1309 | { | |||
1310 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | |||
1311 | if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
1312 | continue; | |||
1313 | if (graph) | |||
1314 | { | |||
1315 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); | |||
1316 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
1317 | tensor_symbol = alias_to; | |||
1318 | } | |||
1319 | ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t )1)); | |||
1320 | if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
1321 | { | |||
1322 | const ccv_nnc_tensor_bind_t retained_bind = { | |||
1323 | .symbol = tensor_symbol, | |||
1324 | .tensor = tensor | |||
1325 | }; | |||
1326 | ccv_array_push(tensor_binds, &retained_bind); | |||
1327 | } | |||
1328 | for (j = 1; j < parallel_count; j++) | |||
1329 | { | |||
1330 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | |||
1331 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; | |||
1332 | if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
1333 | { | |||
1334 | const ccv_nnc_tensor_bind_t bind = { | |||
1335 | .symbol = copy, | |||
1336 | .tensor = tensors[i + j * tensor_size] | |||
1337 | }; | |||
1338 | ccv_array_push(tensor_binds, &bind); | |||
1339 | } | |||
1340 | } | |||
1341 | } | |||
1342 | } | |||
1343 | ||||
1344 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
1345 | { | |||
1346 | if (compiled_data->graph) | |||
1347 | ccv_nnc_graph_free(compiled_data->graph); | |||
1348 | compiled_data->graph = 0; | |||
1349 | compiled_data->is_test = 0; | |||
1350 | if (compiled_data->tensor_arena) | |||
1351 | ccv_nnc_tensor_arena_free(compiled_data->tensor_arena); | |||
1352 | compiled_data->tensor_arena = 0; | |||
1353 | if (compiled_data->graph_exec_arena) | |||
1354 | ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena); | |||
1355 | compiled_data->graph_exec_arena = 0; | |||
1356 | if (compiled_data->backward.from_ops) | |||
1357 | ccfreefree(compiled_data->backward.from_ops); | |||
1358 | compiled_data->backward.from_ops = 0; | |||
1359 | if (compiled_data->evaluate.schedule) | |||
1360 | ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule); | |||
1361 | compiled_data->evaluate.schedule = 0; | |||
1362 | if (compiled_data->backward.schedule) | |||
1363 | ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule); | |||
1364 | compiled_data->backward.schedule = 0; | |||
1365 | } | |||
1366 | ||||
1367 | static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
1368 | { | |||
1369 | if (compiled_data->gradients) | |||
1370 | ccfreefree(compiled_data->gradients); | |||
1371 | compiled_data->gradients = 0; | |||
1372 | if (compiled_data->updated_parameters) | |||
1373 | ccfreefree(compiled_data->updated_parameters); | |||
1374 | compiled_data->updated_parameters = 0; | |||
1375 | compiled_data->update_nodes = 0; | |||
1376 | compiled_data->saved_aux = 0; | |||
1377 | } | |||
1378 | ||||
1379 | static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
1380 | { | |||
1381 | if (compiled_data->backward.gradients) | |||
1382 | ccfreefree(compiled_data->backward.gradients); | |||
1383 | compiled_data->backward.gradients = 0; | |||
1384 | if (compiled_data->backward.accum) | |||
1385 | ccv_nnc_graph_free(compiled_data->backward.accum); | |||
1386 | compiled_data->backward.accum = 0; | |||
1387 | if (compiled_data->backward.tensor_arena) | |||
1388 | ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena); | |||
1389 | compiled_data->backward.tensor_arena = 0; | |||
1390 | if (compiled_data->backward.graph_exec_arena) | |||
1391 | ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena); | |||
1392 | compiled_data->backward.graph_exec_arena = 0; | |||
1393 | } | |||
1394 | ||||
1395 | static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
1396 | { | |||
1397 | if (compiled_data->apply_gradients.graph) | |||
1398 | ccv_nnc_graph_free(compiled_data->apply_gradients.graph); | |||
1399 | compiled_data->apply_gradients.graph = 0; | |||
1400 | if (compiled_data->apply_gradients.tensor_arena) | |||
1401 | ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena); | |||
1402 | compiled_data->apply_gradients.tensor_arena = 0; | |||
1403 | if (compiled_data->apply_gradients.graph_exec_arena) | |||
1404 | ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena); | |||
1405 | compiled_data->apply_gradients.graph_exec_arena = 0; | |||
1406 | } | |||
1407 | ||||
1408 | // Compile the graph to run ccv_cnnp_model_fit | |||
1409 | static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
1410 | { | |||
1411 | int i, j; | |||
1412 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1413 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE" , "ccv_cnnp_model.c", 1413, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1414 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE; | |||
1415 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1416 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1416, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1417 | assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__ ({ if (!fits || output_size == fit_size) ; else __assert_fail ("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1417 , __extension__ __PRETTY_FUNCTION__); })); | |||
1418 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1418, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1419 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
1420 | { | |||
1421 | _ccv_cnnp_model_set_rewindables(model); | |||
1422 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); | |||
1423 | } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) { | |||
1424 | _ccv_cnnp_model_rewind_graph(model); | |||
1425 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | |||
1426 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; | |||
1427 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); | |||
1428 | } | |||
1429 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
1430 | if (!tensors_init) | |||
1431 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
1432 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
1433 | // Check if it is not fully allocated, if it is not, init_1. | |||
1434 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
1435 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
1436 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1436, __extension__ __PRETTY_FUNCTION__); })); | |||
1437 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1437, __extension__ __PRETTY_FUNCTION__); })); | |||
1438 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1438 , __extension__ __PRETTY_FUNCTION__); })); | |||
1439 | const int input_size_per_p = input_size / parallel_count; | |||
1440 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | |||
1441 | const int output_size_per_p = output_size / parallel_count; | |||
1442 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | |||
1443 | const int fit_size_per_p = fit_size / parallel_count; | |||
1444 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds); | |||
1445 | const int parameter_size = compiled_data->parameters->rnum; | |||
1446 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
1447 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
1448 | const int internal_size = compiled_data->internals->rnum; | |||
1449 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | |||
1450 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | |||
1451 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
1452 | ccv_array_free(tensor_binds); | |||
1453 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
1454 | if (tensors_init && parallel_count > 1) | |||
1455 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | |||
1456 | // If tensor is not init'ed, we need to init states first. | |||
1457 | if (_ccv_cnnp_any_to_init(compiled_data)) | |||
1458 | { | |||
1459 | ccv_nnc_tensor_init_states_t tensor_init_states = { | |||
1460 | .parallel_count = parallel_count, | |||
1461 | .graph = model->graph, | |||
1462 | .compiled_data = compiled_data, | |||
1463 | .tensor_arena = compiled_data->tensor_arena | |||
1464 | }; | |||
1465 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | |||
1466 | } | |||
1467 | compiled_data->is_test = 0; | |||
1468 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer); | |||
1469 | // No need to set because it is default to training mode. | |||
1470 | // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); | |||
1471 | for (i = 0; i < saved_aux_size * parameter_size; i++) | |||
1472 | { | |||
1473 | if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
1474 | continue; | |||
1475 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source); | |||
1476 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); | |||
1477 | for (j = 1; j < parallel_count; j++) | |||
1478 | { | |||
1479 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); | |||
1480 | if (copy) | |||
1481 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); | |||
1482 | } | |||
1483 | } | |||
1484 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
1485 | compiled_data->evaluate.to_op_size = 0; | |||
1486 | for (i = 0; i < evaluate_to_size; i++) | |||
1487 | { | |||
1488 | ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); | |||
1489 | if (to.graph) | |||
1490 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to; | |||
1491 | } | |||
1492 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); | |||
1493 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
1494 | } | |||
1495 | ||||
1496 | ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model) | |||
1497 | { | |||
1498 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1499 | if (!compiled_data || !compiled_data->graph) | |||
1500 | return 0; | |||
1501 | return ccv_nnc_graph_default_stream(compiled_data->graph); | |||
1502 | } | |||
1503 | ||||
1504 | uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model) | |||
1505 | { | |||
1506 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1507 | if (!compiled_data || !compiled_data->tensor_arena) | |||
1508 | return 0; | |||
1509 | return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena); | |||
1510 | } | |||
1511 | ||||
1512 | static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) | |||
1513 | { | |||
1514 | int i, j; | |||
1515 | for (i = 0; i < tensor_size; i++) | |||
1516 | { | |||
1517 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | |||
1518 | if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
1519 | continue; | |||
1520 | if (graph) | |||
1521 | { | |||
1522 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); | |||
1523 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
1524 | tensor_symbol = alias_to; | |||
1525 | } | |||
1526 | ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]); | |||
1527 | for (j = 1; j < parallel_count; j++) | |||
1528 | { | |||
1529 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | |||
1530 | if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
1531 | ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]); | |||
1532 | } | |||
1533 | } | |||
1534 | } | |||
1535 | ||||
1536 | void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
1537 | { | |||
1538 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1539 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1539, __extension__ __PRETTY_FUNCTION__); })); | |||
1540 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1541 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1541, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1542 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1542, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1543 | assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size) ; else __assert_fail ("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1543 , __extension__ __PRETTY_FUNCTION__); })); | |||
1544 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1544, __extension__ __PRETTY_FUNCTION__); })); | |||
1545 | if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) | |||
1546 | { | |||
1547 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
1548 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | |||
1549 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | |||
1550 | // Compile the symbolic graph down only when needed. | |||
1551 | _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size); | |||
1552 | } else { | |||
1553 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1553, __extension__ __PRETTY_FUNCTION__); })); | |||
1554 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1554, __extension__ __PRETTY_FUNCTION__); })); | |||
1555 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1555 , __extension__ __PRETTY_FUNCTION__); })); | |||
1556 | const int input_size_per_p = input_size / parallel_count; | |||
1557 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); | |||
1558 | const int output_size_per_p = output_size / parallel_count; | |||
1559 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); | |||
1560 | const int fit_size_per_p = fit_size / parallel_count; | |||
1561 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count); | |||
1562 | } | |||
1563 | if (compiled_data->is_test) | |||
1564 | { | |||
1565 | compiled_data->is_test = 0; | |||
1566 | ccv_nnc_graph_exec_update_t update = { | |||
1567 | .parallel_count = parallel_count, | |||
1568 | .graph = model->graph, | |||
1569 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
1570 | }; | |||
1571 | ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); | |||
1572 | } | |||
1573 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); | |||
1574 | } | |||
1575 | ||||
1576 | // Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD). | |||
1577 | static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
1578 | { | |||
1579 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1580 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD; | |||
1581 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1582 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1582, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1583 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1583, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1584 | // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather, | |||
1585 | // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel. | |||
1586 | if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
1587 | { | |||
1588 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
1589 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); | |||
1590 | _ccv_cnnp_model_set_rewindables(model); | |||
1591 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, | |||
1592 | 0, 0, | |||
1593 | 0, 0, 0, | |||
1594 | 0, 0, 0, | |||
1595 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, | |||
1596 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
1597 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
1598 | int i, j; | |||
1599 | for (i = 0; i < evaluate_to_size; i++) | |||
1600 | for (j = 1; j < parallel_count; j++) | |||
1601 | { | |||
1602 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); | |||
1603 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | |||
1604 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; | |||
1605 | } | |||
1606 | } | |||
1607 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
1608 | if (!tensors_init) | |||
1609 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
1610 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
1611 | // Check if it is not fully allocated, if it is not, init_1. | |||
1612 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
1613 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
1614 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1614, __extension__ __PRETTY_FUNCTION__); })); | |||
1615 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1615, __extension__ __PRETTY_FUNCTION__); })); | |||
1616 | const int input_size_per_p = input_size / parallel_count; | |||
1617 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | |||
1618 | const int output_size_per_p = output_size / parallel_count; | |||
1619 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | |||
1620 | const int parameter_size = compiled_data->parameters->rnum; | |||
1621 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
1622 | const int internal_size = compiled_data->internals->rnum; | |||
1623 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | |||
1624 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | |||
1625 | // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation. | |||
1626 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
1627 | ccv_array_free(tensor_binds); | |||
1628 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
1629 | // If tensor is not init'ed, we need to init states first. | |||
1630 | if (tensors_init && parallel_count > 1) | |||
1631 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | |||
1632 | if (_ccv_cnnp_any_to_init(compiled_data)) | |||
1633 | { | |||
1634 | ccv_nnc_tensor_init_states_t tensor_init_states = { | |||
1635 | .parallel_count = parallel_count, | |||
1636 | .graph = model->graph, | |||
1637 | .compiled_data = compiled_data, | |||
1638 | .tensor_arena = compiled_data->tensor_arena | |||
1639 | }; | |||
1640 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | |||
1641 | } | |||
1642 | compiled_data->is_test = 1; | |||
1643 | ccv_nnc_graph_exec_update_t update = { | |||
1644 | .parallel_count = parallel_count, | |||
1645 | .graph = model->graph, | |||
1646 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
1647 | }; | |||
1648 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); | |||
1649 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); | |||
1650 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
1651 | } | |||
1652 | ||||
1653 | static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
1654 | { | |||
1655 | assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0 ), __extension__ ({ if (!compiled_data->tensors.gradients) ; else __assert_fail ("!compiled_data->tensors.gradients" , "ccv_cnnp_model.c", 1655, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1656 | const int parameter_size = compiled_data->parameters->rnum; | |||
1657 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1658 | compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count); | |||
1659 | compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count; | |||
1660 | int i, j; | |||
1661 | for (i = 0; i < parameter_size; i++) | |||
1662 | { | |||
1663 | if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
1664 | { | |||
1665 | compiled_data->tensors.gradients[i] = 0; | |||
1666 | compiled_data->tensors.accum_gradients[i] = 0; | |||
1667 | for (j = 1; j < parallel_count; j++) | |||
1668 | { | |||
1669 | compiled_data->tensors.gradients[i + j * parameter_size] = 0; | |||
1670 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0; | |||
1671 | } | |||
1672 | continue; | |||
1673 | } | |||
1674 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | |||
1675 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); | |||
1676 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
1677 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
1678 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
1679 | compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0); | |||
1680 | compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it. | |||
1681 | for (j = 1; j < parallel_count; j++) | |||
1682 | { | |||
1683 | if (j != device_id) | |||
1684 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | |||
1685 | else | |||
1686 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
1687 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | |||
1688 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0; | |||
1689 | } | |||
1690 | } | |||
1691 | } | |||
1692 | ||||
1693 | static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size) | |||
1694 | { | |||
1695 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL) | |||
1696 | return 1; | |||
1697 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) | |||
1698 | return 0; | |||
1699 | int i; | |||
1700 | for (i = 0; i < input_size; i++) | |||
1701 | if (!(disable_outgrad & ((uint64_t)1 << i))) | |||
1702 | return 0; | |||
1703 | return 1; | |||
1704 | } | |||
1705 | ||||
1706 | // Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). | |||
1707 | // Particularly, this method compiles the evaluation and backprop graph (the main graph). | |||
1708 | static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
1709 | { | |||
1710 | int i, j; | |||
1711 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1712 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; | |||
1713 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data ->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data-> gradient_mode != target_gradient_mode) ; else __assert_fail ( "!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode" , "ccv_cnnp_model.c", 1713, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1714 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE; | |||
1715 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1716 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1716, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1717 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1717, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1718 | // There shouldn't be a loss function if we evaluate with multistage jit. | |||
1719 | assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP ) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP" , "ccv_cnnp_model.c", 1719, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1720 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
1721 | { | |||
1722 | _ccv_cnnp_model_set_rewindables(model); | |||
1723 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. | |||
1724 | } else if (compiled_data->gradient_mode != target_gradient_mode) { | |||
1725 | _ccv_cnnp_model_rewind_graph(model); | |||
1726 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | |||
1727 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; | |||
1728 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. | |||
1729 | } | |||
1730 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
1731 | if (!tensors_init) | |||
1732 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
1733 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
1734 | // Check if it is not fully allocated, if it is not, init_1. | |||
1735 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
1736 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
1737 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1737, __extension__ __PRETTY_FUNCTION__); })); | |||
1738 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1738, __extension__ __PRETTY_FUNCTION__); })); | |||
1739 | const int input_size_per_p = input_size / parallel_count; | |||
1740 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | |||
1741 | const int output_size_per_p = output_size / parallel_count; | |||
1742 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | |||
1743 | const int parameter_size = compiled_data->parameters->rnum; | |||
1744 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
1745 | const int internal_size = compiled_data->internals->rnum; | |||
1746 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | |||
1747 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | |||
1748 | if (!compiled_data->tensors.gradients) | |||
1749 | _ccv_cnnp_model_gradient_tensors_init(model, compiled_data); | |||
1750 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); | |||
1751 | if (compiled_data->backward.to_size > 0) | |||
1752 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
1753 | else | |||
1754 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
1755 | ccv_array_free(tensor_binds); | |||
1756 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
1757 | if (tensors_init && parallel_count > 1) | |||
1758 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | |||
1759 | // If tensor is not init'ed, we need to init states first. | |||
1760 | if (_ccv_cnnp_any_to_init(compiled_data)) | |||
1761 | { | |||
1762 | ccv_nnc_tensor_init_states_t tensor_init_states = { | |||
1763 | .parallel_count = parallel_count, | |||
1764 | .graph = model->graph, | |||
1765 | .compiled_data = compiled_data, | |||
1766 | .tensor_arena = compiled_data->tensor_arena | |||
1767 | }; | |||
1768 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | |||
1769 | } | |||
1770 | compiled_data->is_test = is_test; | |||
1771 | ccv_nnc_graph_exec_update_t update = { | |||
1772 | .parallel_count = parallel_count, | |||
1773 | .graph = model->graph, | |||
1774 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
1775 | }; | |||
1776 | ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update); | |||
1777 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
1778 | compiled_data->evaluate.to_op_size = 0; | |||
1779 | ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0); | |||
1780 | for (i = 0; i < evaluate_to_size; i++) | |||
1781 | { | |||
1782 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); | |||
1783 | if (to_op.graph) | |||
1784 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op; | |||
1785 | const int* tos; | |||
1786 | int to_size; | |||
1787 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size); | |||
1788 | for (j = 0; j < to_size; j++) | |||
1789 | { | |||
1790 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ | |||
1791 | .d = tos[j], | |||
1792 | .graph = model->graph | |||
1793 | }); | |||
1794 | if (to_op.graph) | |||
1795 | ccv_array_add_unique_int(backward_from, to_op.d); | |||
1796 | } | |||
1797 | } | |||
1798 | assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__ ({ if (backward_from->rnum > 0) ; else __assert_fail ( "backward_from->rnum > 0", "ccv_cnnp_model.c", 1798, __extension__ __PRETTY_FUNCTION__); })); | |||
1799 | compiled_data->backward.from_op_size = backward_from->rnum; | |||
1800 | compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum); | |||
1801 | for (i = 0; i < backward_from->rnum; i++) | |||
1802 | compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){ | |||
1803 | .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from )->rsize * (size_t)(i))), | |||
1804 | .graph = compiled_data->graph, | |||
1805 | }; | |||
1806 | // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops. | |||
1807 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)-> data)) + (size_t)(compiled_data->graph->exec_info)-> rsize * (size_t)(0))); | |||
1808 | const int exec_info_size = compiled_data->graph->exec_info->rnum; | |||
1809 | uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t)); | |||
1810 | const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data )) + (size_t)(compiled_data->graph->sources)->rsize * (size_t)(0))); | |||
1811 | const int source_size = compiled_data->graph->sources->rnum; | |||
1812 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info )[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)(( void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + ( size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_ ))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_ ++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_]. d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops )[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_ ++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_]. d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { (( void) sizeof (((sources)[_i_].graph == compiled_data->graph ) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size ].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info )[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[ _idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings )->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_ ] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[ _idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void *)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t )((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); -- _incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_ [d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size ); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_ ].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_ [(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0) , __extension__ ({ if (_incomings_[(compiled_data->evaluate .to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1812, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(compiled_data->evaluate.to_ops )[_i_].d].c > 0) continue; _visit_->node[_visit_->size ].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_ ->node[_visit_->size].term = ((_incomings_[(compiled_data ->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if ( _heap_mem_) free(_incomings_); } while (0);; ((void) sizeof ( (_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c" , 1812, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
1813 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
1814 | visited[(idx >> 5)] |= (1u << (idx & 31)); | |||
1815 | } ccv_nnc_graph_visit_endfor} } | |||
1816 | ccv_nnc_graph_visit_free(visit); | |||
1817 | const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)-> data)) + (size_t)(compiled_data->graph->destinations)-> rsize * (size_t)(0))); | |||
1818 | const int destination_size = compiled_data->graph->destinations->rnum; | |||
1819 | visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size) ; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->backward.from_ops)[_i_ ].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } int _exist_size_[2] = { (compiled_data->backward .from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[ _idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size) ; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->backward.from_ops)[_i_ ].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue ; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info )[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_]. outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_ [d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_ [d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof (( _exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c" , 1819, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_ ][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = ( _p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations) [_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0] [_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size); _i_++ ) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data ->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings ) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size) ) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == compiled_data-> graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_ ].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations )[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_cnnp_model.c", 1819, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); | |||
1820 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
1821 | visited[(idx >> 5)] |= (1u << (idx & 31)); | |||
1822 | } ccv_nnc_graph_visit_endfor} } | |||
1823 | ccv_nnc_graph_visit_free(visit); | |||
1824 | visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info )[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)(( void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + ( size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_ ))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0] [_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size ].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info )[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[ _idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings )->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info )[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_]. outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == compiled_data-> graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_ ].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations )[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_cnnp_model.c", 1824, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); | |||
1825 | // Find any missing nodes to be added as source. Right now, these are only set nodes. | |||
1826 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
1827 | if (!(visited[(idx >> 5)] & (1u << (idx & 31)))) | |||
1828 | { | |||
1829 | assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD ) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD ) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD" , "ccv_cnnp_model.c", 1829, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1830 | if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one. | |||
1831 | ccv_array_add_unique_int(backward_from, idx); | |||
1832 | } | |||
1833 | } ccv_nnc_graph_visit_endfor} } | |||
1834 | ccv_nnc_graph_visit_free(visit); | |||
1835 | ccfreefree(visited); | |||
1836 | if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this. | |||
1837 | { | |||
1838 | compiled_data->backward.from_op_size = backward_from->rnum; | |||
1839 | compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum); | |||
1840 | for (i = 0; i < backward_from->rnum; i++) | |||
1841 | compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){ | |||
1842 | .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from )->rsize * (size_t)(i))), | |||
1843 | .graph = compiled_data->graph, | |||
1844 | }; | |||
1845 | } | |||
1846 | ccv_array_free(backward_from); | |||
1847 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); | |||
1848 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
1849 | } | |||
1850 | ||||
1851 | void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
1852 | { | |||
1853 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1854 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1854, __extension__ __PRETTY_FUNCTION__); })); | |||
1855 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1856 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1856, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1857 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1857, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1858 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1858, __extension__ __PRETTY_FUNCTION__); })); | |||
1859 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; | |||
1860 | const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad)); | |||
1861 | if (!compiled_data->graph || mode_mismatch) | |||
1862 | { | |||
1863 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
1864 | if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad. | |||
1865 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | |||
1866 | if (params.requires_grad) | |||
1867 | _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size); | |||
1868 | else | |||
1869 | _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size); | |||
1870 | } else { | |||
1871 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena); | |||
1872 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1872, __extension__ __PRETTY_FUNCTION__); })); | |||
1873 | const int input_size_per_p = input_size / parallel_count; | |||
1874 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); | |||
1875 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1875, __extension__ __PRETTY_FUNCTION__); })); | |||
1876 | const int output_size_per_p = output_size / parallel_count; | |||
1877 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); | |||
1878 | } | |||
1879 | if (compiled_data->is_test != params.is_test) | |||
1880 | { | |||
1881 | compiled_data->is_test = params.is_test; | |||
1882 | ccv_nnc_graph_exec_update_t update = { | |||
1883 | .parallel_count = parallel_count, | |||
1884 | .graph = model->graph, | |||
1885 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
1886 | }; | |||
1887 | ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update); | |||
1888 | } | |||
1889 | } | |||
1890 | ||||
1891 | void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
1892 | { | |||
1893 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1894 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1894, __extension__ __PRETTY_FUNCTION__); })); | |||
1895 | ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size); | |||
1896 | if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD) | |||
1897 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); | |||
1898 | else { | |||
1899 | if (!compiled_data->evaluate.schedule) | |||
1900 | compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size); | |||
1901 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context); | |||
1902 | } | |||
1903 | } | |||
1904 | ||||
1905 | // Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). | |||
1906 | // Particularly, this method compiles the accumulator graph. | |||
1907 | static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model) | |||
1908 | { | |||
1909 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1910 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1910, __extension__ __PRETTY_FUNCTION__); })); | |||
1911 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1911, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1912 | ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new(); | |||
1913 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1914 | const int parameter_size = compiled_data->parameters->rnum; | |||
1915 | int i, j; | |||
1916 | compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3); | |||
1917 | compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count; | |||
1918 | compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count; | |||
1919 | for (i = 0; i < parameter_size; i++) | |||
1920 | for (j = 0; j < parallel_count; j++) | |||
1921 | if (compiled_data->tensors.gradients[i + j * parameter_size]) | |||
1922 | { | |||
1923 | const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info; | |||
1924 | // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them. | |||
1925 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size]; | |||
1926 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | |||
1927 | ccv_nnc_tensor_symbol_t inputs[2]; | |||
1928 | inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | |||
1929 | inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | |||
1930 | ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | |||
1931 | ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0); | |||
1932 | } else { | |||
1933 | compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
1934 | compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
1935 | compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
1936 | } | |||
1937 | ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
1938 | if (ccv_nnc_symbolic_graph_source_size(accum) == 0) | |||
1939 | { | |||
1940 | ccv_nnc_symbolic_graph_free(accum); | |||
1941 | // Create empty graph. | |||
1942 | compiled_data->backward.accum = ccv_nnc_graph_new(); | |||
1943 | ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0); | |||
1944 | return; | |||
1945 | } | |||
1946 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
1947 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); | |||
1948 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds); | |||
1949 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); | |||
1950 | ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size (accum), SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size (accum), &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena); | |||
1951 | ccv_nnc_symbolic_graph_free(accum); | |||
1952 | ccv_array_free(tensor_binds); | |||
1953 | ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count); | |||
1954 | } | |||
1955 | ||||
1956 | void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
1957 | { | |||
1958 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
1959 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1959, __extension__ __PRETTY_FUNCTION__); })); | |||
1960 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1960, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1961 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
1962 | assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model-> output_size * parallel_count) ? 1 : 0), __extension__ ({ if ( ingrad_size == 0 || ingrad_size == model->output_size * parallel_count ) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1962, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1963 | if (outgrad_size > 0) | |||
1964 | { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size * parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size == compiled_data->outgrad_size * parallel_count) ; else __assert_fail ("outgrad_size == compiled_data->outgrad_size * parallel_count" , "ccv_cnnp_model.c", 1964, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
1965 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1965, __extension__ __PRETTY_FUNCTION__); })); | |||
1966 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 1966, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1967 | const int parameter_size = compiled_data->parameters->rnum; | |||
1968 | // If we need to accumulate the gradients now, do jit on accumulator. | |||
1969 | if (compiled_data->backward.count > 0) | |||
1970 | { | |||
1971 | if (!compiled_data->backward.accum) | |||
1972 | _ccv_cnnp_model_multistage_jit_1(model); | |||
1973 | else if (compiled_data->backward.count == 1) { | |||
1974 | // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly). | |||
1975 | int i; | |||
1976 | for (i = 0; i < parameter_size * parallel_count; i++) | |||
1977 | { | |||
1978 | ccv_nnc_tensor_t* tensor; | |||
1979 | CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), ( compiled_data->tensors.accum_gradients[i]) = (compiled_data ->tensors.gradients[i]), (compiled_data->tensors.gradients [i]) = (tensor)); | |||
1980 | } | |||
1981 | if (compiled_data->backward.tensor_arena) | |||
1982 | { | |||
1983 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena); | |||
1984 | // Do rebind in case we messed up the binding (we switch accum_gradients and gradients). | |||
1985 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1); | |||
1986 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); | |||
1987 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); | |||
1988 | } | |||
1989 | } | |||
1990 | } | |||
1991 | const int ingrad_size_per_p = model->output_size; | |||
1992 | const int outgrad_size_per_p = compiled_data->outgrad_size; | |||
1993 | int i, j; | |||
1994 | for (i = 0; i < ingrad_size_per_p; i++) | |||
1995 | { | |||
1996 | const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); | |||
1997 | if (!ingrad_size || !ingrads || ingrads[i] == 0) | |||
1998 | { | |||
1999 | // Set it to 1 if it is not specified. | |||
2000 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad); | |||
2001 | if (ingrad_tensor) | |||
2002 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); | |||
2003 | for (j = 1; j < parallel_count; j++) | |||
2004 | { | |||
2005 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j)); | |||
2006 | if (ingrad_tensor) | |||
2007 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); | |||
2008 | } | |||
2009 | } else { | |||
2010 | // Make sure the length matches, in case it is an alias. | |||
2011 | assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model-> graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count (ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params (model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))" , "ccv_cnnp_model.c", 2011, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2012 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]); | |||
2013 | for (j = 1; j < parallel_count; j++) | |||
2014 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]); | |||
2015 | } | |||
2016 | } | |||
2017 | if (outgrad_size > 0) | |||
2018 | { | |||
2019 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\"" , "ccv_cnnp_model.c", 2019, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2020 | for (i = 0; i < outgrad_size_per_p; i++) | |||
2021 | if (outgrads[i]) | |||
2022 | { | |||
2023 | const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i]; | |||
2024 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]); | |||
2025 | for (j = 1; j < parallel_count; j++) | |||
2026 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]); | |||
2027 | } | |||
2028 | } else { | |||
2029 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 2030, __extension__ __PRETTY_FUNCTION__ ); })) | |||
2030 | compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 2030, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2031 | } | |||
2032 | // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients. | |||
2033 | // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these | |||
2034 | // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching. | |||
2035 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); | |||
2036 | if (!compiled_data->backward.schedule) | |||
2037 | compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0); | |||
2038 | // Run the backward pass. | |||
2039 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context); | |||
2040 | // If we need to run accumulation round, do that now. | |||
2041 | if (compiled_data->backward.count > 0) | |||
2042 | ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context); | |||
2043 | // Update the count, this determines whether we need to accumulate or not. | |||
2044 | ++compiled_data->backward.count; | |||
2045 | } | |||
2046 | ||||
2047 | // Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE). | |||
2048 | // Particularly, this method compiles the parameter update graph. | |||
2049 | static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model) | |||
2050 | { | |||
2051 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2052 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 2052, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2053 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2054 | const int parameter_size = compiled_data->parameters->rnum; | |||
2055 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
2056 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
2057 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
2058 | // Bind accumulated gradients. | |||
2059 | if (compiled_data->backward.count > 1) | |||
2060 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds); | |||
2061 | else | |||
2062 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); | |||
2063 | ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0); | |||
2064 | int i, j; | |||
2065 | for (i = 0; i < compiled_data->backward.to_size; i++) | |||
2066 | { | |||
2067 | const int* tos; | |||
2068 | int to_size; | |||
2069 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size); | |||
2070 | for (j = 0; j < to_size; j++) | |||
2071 | { | |||
2072 | // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply | |||
2073 | // gradients graph. | |||
2074 | const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ | |||
2075 | .d = tos[j], | |||
2076 | .graph = model->graph, | |||
2077 | }); | |||
2078 | if (!exec.graph) | |||
2079 | ccv_array_add_unique_int(apply_gradients_from, tos[j]); | |||
2080 | } | |||
2081 | } | |||
2082 | const int from_size = apply_gradients_from->rnum; | |||
2083 | if (from_size == 0) | |||
2084 | { | |||
2085 | ccv_array_free(apply_gradients_from); | |||
2086 | ccv_array_free(tensor_binds); | |||
2087 | return; | |||
2088 | } | |||
2089 | ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size); | |||
2090 | for (i = 0; i < from_size; i++) | |||
2091 | froms[i] = (ccv_nnc_graph_exec_symbol_t){ | |||
2092 | .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t )(apply_gradients_from)->rsize * (size_t)(i))), | |||
2093 | .graph = model->graph | |||
2094 | }; | |||
2095 | ccv_array_free(apply_gradients_from); | |||
2096 | // It can only ends with updates on the parameters. | |||
2097 | ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0); | |||
2098 | for (i = 0; i < parameter_size; i++) | |||
2099 | { | |||
2100 | if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
2101 | continue; | |||
2102 | ccv_array_push(tos, &compiled_data->update_nodes[i]); | |||
2103 | for (j = 1; j < parallel_count; j++) | |||
2104 | { | |||
2105 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j); | |||
2106 | ccv_array_push(tos, ©); | |||
2107 | } | |||
2108 | } | |||
2109 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize * (size_t)(0))), tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena); | |||
2110 | ccv_array_free(tos); | |||
2111 | ccv_array_free(tensor_binds); | |||
2112 | ccfreefree(froms); | |||
2113 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
2114 | for (i = 0; i < max_saved_aux_size * parameter_size; i++) | |||
2115 | { | |||
2116 | // Skip on no tensor. | |||
2117 | if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
2118 | continue; | |||
2119 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source); | |||
2120 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); | |||
2121 | for (j = 1; j < parallel_count; j++) | |||
2122 | { | |||
2123 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); | |||
2124 | if (copy) | |||
2125 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); | |||
2126 | } | |||
2127 | } | |||
2128 | ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count); | |||
2129 | } | |||
2130 | ||||
2131 | void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context) | |||
2132 | { | |||
2133 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2134 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2134, __extension__ __PRETTY_FUNCTION__); })); | |||
2135 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 2135, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2136 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2137 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 2137, __extension__ __PRETTY_FUNCTION__); })); | |||
2138 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 2138, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2139 | // Skip if there is no backward pass. | |||
2140 | if (compiled_data->backward.count <= 0) | |||
2141 | return; | |||
2142 | // Skip if there is no parameters. | |||
2143 | if (compiled_data->parameters->rnum == 0) | |||
2144 | { | |||
2145 | compiled_data->backward.count = 0; | |||
2146 | return; | |||
2147 | } | |||
2148 | if (!compiled_data->apply_gradients.graph) | |||
2149 | _ccv_cnnp_model_multistage_jit_2(model); | |||
2150 | else { | |||
2151 | const int parameter_size = compiled_data->parameters->rnum; | |||
2152 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena); | |||
2153 | // Change to bind accum_gradients if we do gradient accumulation (run backward more than once). | |||
2154 | if (compiled_data->backward.count > 1) | |||
2155 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count); | |||
2156 | else | |||
2157 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); | |||
2158 | } | |||
2159 | if (compiled_data->apply_gradients.graph) | |||
2160 | ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context); | |||
2161 | // Reset backward count to 0. | |||
2162 | compiled_data->backward.count = 0; | |||
2163 | } | |||
2164 | ||||
2165 | void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor) | |||
2166 | { | |||
2167 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2168 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
2169 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2169, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2170 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
2171 | if (!tensors_init) | |||
2172 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
2173 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
2174 | // Check if it is not fully allocated, if it is not, init_1. | |||
2175 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
2176 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
2177 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
2178 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
2179 | if (param_ref < 0) | |||
2180 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2180 , __extension__ __PRETTY_FUNCTION__); })); } | |||
2181 | else | |||
2182 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2182, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2183 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
2184 | ccv_array_free(parameter_indices); | |||
2185 | const int parameter_size = compiled_data->parameters->rnum; | |||
2186 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2186 , __extension__ __PRETTY_FUNCTION__); })); | |||
2187 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2187, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2188 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2189 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); | |||
2190 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2190, __extension__ __PRETTY_FUNCTION__); })); | |||
2191 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 ), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
2192 | int i; | |||
2193 | for (i = 1; i < parallel_count; i++) | |||
2194 | { | |||
2195 | ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d + i * parameter_size]) & ~(uintptr_t)1)); | |||
2196 | if (copy_tensor) | |||
2197 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
2198 | } | |||
2199 | // Mark this symbol as init'ed. | |||
2200 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( d))))->d; | |||
2201 | uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
2202 | init_v[s >> 5] |= (1u << (s & 0x1f)); | |||
2203 | } | |||
2204 | ||||
2205 | void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor) | |||
2206 | { | |||
2207 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2208 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
2209 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2209, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2210 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2210, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2211 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
2212 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
2213 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
2214 | if (param_ref < 0) | |||
2215 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2215 , __extension__ __PRETTY_FUNCTION__); })); } | |||
2216 | else | |||
2217 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2217, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2218 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
2219 | ccv_array_free(parameter_indices); | |||
2220 | const int parameter_size = compiled_data->parameters->rnum; | |||
2221 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2221 , __extension__ __PRETTY_FUNCTION__); })); | |||
2222 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2222, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2223 | // We don't need to consider parallel_count, every parameter on each device is identical. | |||
2224 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); | |||
2225 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2225, __extension__ __PRETTY_FUNCTION__); })); | |||
2226 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
2227 | } | |||
2228 | ||||
2229 | ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter) | |||
2230 | { | |||
2231 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2232 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
2233 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2233, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2234 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2234, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2235 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
2236 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
2237 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
2238 | if (param_ref < 0) | |||
2239 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2239 , __extension__ __PRETTY_FUNCTION__); })); } | |||
2240 | else | |||
2241 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2241, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2242 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
2243 | ccv_array_free(parameter_indices); | |||
2244 | const int parameter_size = compiled_data->parameters->rnum; | |||
2245 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2245 , __extension__ __PRETTY_FUNCTION__); })); | |||
2246 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2246, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2247 | // We don't need to consider parallel_count, every parameter on each device is identical. | |||
2248 | ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); | |||
2249 | assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor ) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2249, __extension__ __PRETTY_FUNCTION__); })); | |||
2250 | return tensor->info; | |||
2251 | } | |||
2252 | ||||
2253 | const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter) | |||
2254 | { | |||
2255 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2256 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
2257 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2257, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2258 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
2259 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
2260 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
2261 | if (param_ref < 0) | |||
2262 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2262 , __extension__ __PRETTY_FUNCTION__); })); } | |||
2263 | else | |||
2264 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2264, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2265 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
2266 | ccv_array_free(parameter_indices); | |||
2267 | const int parameter_size = compiled_data->parameters->rnum; | |||
2268 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2268 , __extension__ __PRETTY_FUNCTION__); })); | |||
2269 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2269, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2270 | return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(d))); | |||
2271 | } | |||
2272 | ||||
2273 | int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model) | |||
2274 | { | |||
2275 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 2275, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2276 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2277 | return compiled_data->parameters->rnum; | |||
2278 | } | |||
2279 | ||||
2280 | ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context) | |||
2281 | { | |||
2282 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2283 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2283, __extension__ __PRETTY_FUNCTION__); })); | |||
2284 | const int parameter_size = compiled_data->parameters->rnum; | |||
2285 | int i; | |||
2286 | for (i = 0; i < parameter_size; i++) | |||
2287 | { | |||
2288 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); | |||
2289 | if (first(model, name, context)) | |||
2290 | return ccv_cnnp_model_parameters(model, -1, i); | |||
2291 | } | |||
2292 | return 0; | |||
2293 | } | |||
2294 | ||||
2295 | ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context) | |||
2296 | { | |||
2297 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2298 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2298, __extension__ __PRETTY_FUNCTION__); })); | |||
2299 | ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0); | |||
2300 | const int parameter_size = compiled_data->parameters->rnum; | |||
2301 | int i; | |||
2302 | for (i = 0; i < parameter_size; i++) | |||
2303 | { | |||
2304 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); | |||
2305 | if (filter(model, name, context)) | |||
2306 | { | |||
2307 | ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i); | |||
2308 | ccv_array_push(parameters, ¶meter); | |||
2309 | } | |||
2310 | } | |||
2311 | return parameters; | |||
2312 | ||||
2313 | } | |||
2314 | ||||
2315 | CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model) | |||
2316 | { | |||
2317 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2318 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2318, __extension__ __PRETTY_FUNCTION__); })); | |||
2319 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
2320 | if (!tensors_init) // If nothing initialized, we return parameter 0. | |||
2321 | return ccv_cnnp_model_parameters(model, -1, 0); | |||
2322 | const int parameter_size = compiled_data->parameters->rnum; | |||
2323 | int i; | |||
2324 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
2325 | for (i = 0; i < parameter_size; i++) | |||
2326 | { | |||
2327 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | |||
2328 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) | |||
2329 | return ccv_cnnp_model_parameters(model, -1, i); | |||
2330 | } | |||
2331 | return 0; | |||
2332 | } | |||
2333 | ||||
2334 | static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref) | |||
2335 | { | |||
2336 | const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel; | |||
2337 | assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameters->param_sel != 0) ; else __assert_fail ( "parameters->param_sel != 0", "ccv_cnnp_model.c", 2337, __extension__ __PRETTY_FUNCTION__); })); | |||
2338 | ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
2339 | ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices); | |||
2340 | *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref; | |||
2341 | return to_parameter_indices; | |||
2342 | } | |||
2343 | ||||
2344 | static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0) | |||
2345 | { | |||
2346 | // If the model is not compiled yet. Compile them now. | |||
2347 | if (!model->graph) | |||
2348 | { | |||
2349 | model->graph = ccv_nnc_symbolic_graph_new(); | |||
2350 | assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__ ({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data" , "ccv_cnnp_model.c", 2350, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2351 | const int input_size = from_model->input_size; | |||
2352 | ccv_nnc_tensor_param_t input_params[input_size]; | |||
2353 | int i; | |||
2354 | for (i = 0; i < input_size; i++) | |||
2355 | input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]); | |||
2356 | _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss); | |||
2357 | model->parallel_count = from_model->parallel_count; | |||
2358 | model->memory_compression = from_model->memory_compression; | |||
2359 | model->memory_reduction = from_model->memory_reduction; | |||
2360 | model->gradient_checkpointing = from_model->gradient_checkpointing; | |||
2361 | model->compiled_data->stream_type = from_model->compiled_data->stream_type; | |||
2362 | model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer; | |||
2363 | model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size; | |||
2364 | } | |||
2365 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
2366 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2366, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2367 | const int to_tensors_init = !!to_compiled_data->tensors_init.v; | |||
2368 | if (!to_tensors_init) | |||
2369 | { | |||
2370 | if (only_init_0) | |||
2371 | ccv_cnnp_model_tensors_init_0(model, to_compiled_data); | |||
2372 | else | |||
2373 | _ccv_cnnp_model_tensors_init(model, to_compiled_data); | |||
2374 | } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1) | |||
2375 | // Check if it is not fully allocated, if it is not, init_1. | |||
2376 | ccv_cnnp_model_tensors_init_1(model, to_compiled_data); | |||
2377 | assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (to_compiled_data->tensors.parameters ) ; else __assert_fail ("to_compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2377, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2378 | *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref); | |||
2379 | *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref); | |||
2380 | if (*from_param_ref < 0 && *param_ref >= 0) | |||
2381 | { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1 : 0), __extension__ ({ if ((*from_parameter_indices)->rnum == 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1" , "ccv_cnnp_model.c", 2381, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2382 | else if (*from_param_ref >= 0) | |||
2383 | { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices )->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref < (*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum" , "ccv_cnnp_model.c", 2383, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2384 | if (*param_ref < 0 && *from_param_ref >= 0) | |||
2385 | { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0) , __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else __assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c" , 2385, __extension__ __PRETTY_FUNCTION__); })); } | |||
2386 | else if (*param_ref >= 0) | |||
2387 | { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum ) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices )->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum" , "ccv_cnnp_model.c", 2387, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2388 | } | |||
2389 | ||||
2390 | void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) | |||
2391 | { | |||
2392 | ccv_array_t* to_parameter_indices; | |||
2393 | int to_param_ref; | |||
2394 | ccv_array_t* from_parameter_indices; | |||
2395 | int from_param_ref; | |||
2396 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0); | |||
2397 | // Should be exactly the same tensor. | |||
2398 | if (to_param_ref < 0 && from_param_ref < 0) | |||
2399 | { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices ->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices ->rnum == to_parameter_indices->rnum) ; else __assert_fail ("from_parameter_indices->rnum == to_parameter_indices->rnum" , "ccv_cnnp_model.c", 2399, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2400 | // To models. | |||
2401 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
2402 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2402, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2403 | // From models. | |||
2404 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | |||
2405 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2406 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
2407 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1; | |||
2408 | int i, j; | |||
2409 | const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init. v) & ~(uintptr_t)1)); | |||
2410 | uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
2411 | for (i = 0; i < rnum; i++) | |||
2412 | { | |||
2413 | const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))); | |||
2414 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2414, __extension__ __PRETTY_FUNCTION__); })); | |||
2415 | assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters ->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data ->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2415, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2416 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | |||
2417 | // If the original is not init'ed. We cannot copy from. | |||
2418 | if (!(from_init_v[s >> 5] & (1u << (s & 0x1f)))) | |||
2419 | continue; | |||
2420 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
2421 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2421, __extension__ __PRETTY_FUNCTION__); })); | |||
2422 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2422, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2423 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d]) & ~(uintptr_t)1)); | |||
2424 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2424, __extension__ __PRETTY_FUNCTION__); })); | |||
2425 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d]) & ~(uintptr_t)1)); | |||
2426 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2426, __extension__ __PRETTY_FUNCTION__); })); | |||
2427 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
2428 | for (j = 1; j < parallel_count; j++) | |||
2429 | { | |||
2430 | ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
2431 | if (copy_tensor) | |||
2432 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
2433 | } | |||
2434 | // Mark this symbol as init'ed. | |||
2435 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | |||
2436 | to_init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
2437 | } | |||
2438 | ccv_array_free(to_parameter_indices); | |||
2439 | ccv_array_free(from_parameter_indices); | |||
2440 | } | |||
2441 | ||||
2442 | KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets , size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t * keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__ ((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id (void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof( kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ ( (__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h) { if (h) { free((void *)h->keys); free(h->flags); free ((void *)h->vals); free(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h) { if (h && h->flags) { memset(h->flags, 0xaa , ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->size = h->n_occupied = 0; } } static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id (const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h-> n_buckets) { khint_t k, i, last, mask, step = 0; mask = h-> n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask ; last = i; while (!((h->flags[i>>4]>>((i& 0xfU)<<1))&2) && (((h->flags[i>>4] >>((i&0xfU)<<1))&1) || !(strcmp(h->keys [i], key) == 0))) { i = (i + (++step)) & mask; if (i == last ) return h->n_buckets; } return ((h->flags[i>>4]>> ((i&0xfU)<<1))&3)? h->n_buckets : i; } else return 0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id (kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets )|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets) >>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets )|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets) >>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets ) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t)) ; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets ) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t)) ; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys = (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof (kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h ->keys = new_keys; if (1) { int *new_vals = (int*)realloc( (void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals ) { free(new_flags); return -1; } h->vals = new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h-> flags[j>>4]>>((j&0xfU)<<1))&3) == 0 ) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask; new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h ->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while (1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key); i = k & new_mask; while (!((new_flags[i>>4]>> ((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask ; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<< 1))); if (i < h->n_buckets && ((h->flags[i>> 4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1 ) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; } (h->flags[i>>4]|=1ul<<((i&0xfU)<<1) ); } else { h->keys[i] = key; if (1) h->vals[i] = val; break ; } } } } if (h->n_buckets > new_n_buckets) { h->keys = (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof (kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h-> vals,new_n_buckets * sizeof(int)); } free(h->flags); h-> flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied = h->size; h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__ ((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied >= h->upper_bound) { if (h->n_buckets > (h->size <<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets - 1) < 0) { *ret = -1; return h->n_buckets; } } else if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) < 0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site , last, mask = h->n_buckets - 1, step = 0; x = site = h-> n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if (((h->flags[i>>4]>>((i&0xfU)<<1))& 2)) x = i; else { last = i; while (!((h->flags[i>>4] >>((i&0xfU)<<1))&2) && (((h->flags [i>>4]>>((i&0xfU)<<1))&1) || !(strcmp (h->keys[i], key) == 0))) { if (((h->flags[i>>4]>> ((i&0xfU)<<1))&1)) site = i; i = (i + (++step)) & mask; if (i == last) { x = site; break; } } if (x == h ->n_buckets) { if (((h->flags[i>>4]>>((i& 0xfU)<<1))&2) && site != h->n_buckets) x = site; else x = i; } } } if (((h->flags[x>>4]>> ((x&0xfU)<<1))&2)) { h->keys[x] = key; (h-> flags[x>>4]&=~(3ul<<((x&0xfU)<<1))) ; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h-> flags[x>>4]>>((x&0xfU)<<1))&1)) { h ->keys[x] = key; (h->flags[x>>4]&=~(3ul<< ((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret = 0; return x; } static inline __attribute__ ((__unused__)) void kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t x) { if (x != h->n_buckets && !((h->flags[x>> 4]>>((x&0xfU)<<1))&3)) { (h->flags[x>> 4]|=1ul<<((x&0xfU)<<1)); --h->size; } } | |||
2443 | ||||
2444 | void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context) | |||
2445 | { | |||
2446 | ccv_array_t* to_parameter_indices; | |||
2447 | int to_param_ref; | |||
2448 | ccv_array_t* from_parameter_indices; | |||
2449 | int from_param_ref; | |||
2450 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1); | |||
2451 | // Should be exactly the same tensor. | |||
2452 | if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0) | |||
| ||||
2453 | { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices ->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices ->rnum == to_parameter_indices->rnum) ; else __assert_fail ("from_parameter_indices->rnum == to_parameter_indices->rnum" , "ccv_cnnp_model.c", 2453, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2454 | // To models. | |||
2455 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
2456 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2456, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2457 | // From models. | |||
2458 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | |||
2459 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2460 | assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model-> parallel_count) _a = (from_model->parallel_count); typeof ( 1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (parallel_count == ({ typeof (from_model->parallel_count ) _a = (from_model->parallel_count); typeof (1) _b = (1); ( _a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)" , "ccv_cnnp_model.c", 2460, __extension__ __PRETTY_FUNCTION__ ); })); // Should have the same parallel count can share parameters. | |||
2461 | const int from_parameter_size = from_compiled_data->parameters->rnum; | |||
2462 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
2463 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1; | |||
2464 | int i, j; | |||
2465 | khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0; | |||
2466 | char* updated_name = 0; | |||
2467 | const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init. v) & ~(uintptr_t)1)); | |||
2468 | uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
2469 | for (i = 0; i < rnum; i++) | |||
2470 | { | |||
2471 | int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))) : from_parameter_size; | |||
2472 | // Need to figure out how to use the renamer here. | |||
2473 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
2474 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2474, __extension__ __PRETTY_FUNCTION__); })); | |||
2475 | assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__ ({ if (dest_d < to_parameter_size) ; else __assert_fail ( "dest_d < to_parameter_size", "ccv_cnnp_model.c", 2475, __extension__ __PRETTY_FUNCTION__); })); | |||
2476 | if (renamer
| |||
2477 | { | |||
2478 | const char* const src_name = (src_d
data)) + (size_t)(from_compiled_data->ids.parameters)-> rsize * (size_t)(src_d))) : 0; | |||
2479 | const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data )) + (size_t)(to_compiled_data->ids.parameters)->rsize * (size_t)(dest_d))); | |||
2480 | if (!updated_name
| |||
2481 | updated_name = (char*)ccmallocmalloc(1024); | |||
2482 | const size_t src_name_len = src_name
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; }); | |||
2483 | if (src_name_len
| |||
2484 | memcpy(updated_name, src_name, src_name_len); | |||
2485 | updated_name[src_name_len] = 0; | |||
2486 | if (renamer(context, dest_name, updated_name, 1024) != 0) | |||
2487 | continue; // Skip this. | |||
2488 | if (src_name
| |||
2489 | { | |||
2490 | // Nothing changed. | |||
2491 | } else { | |||
2492 | if (!id_map
| |||
2493 | { | |||
2494 | id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id(); | |||
2495 | for (j = 0; j < from_parameter_size; j++) | |||
2496 | { | |||
2497 | int ret; | |||
2498 | const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char *)((from_compiled_data->ids.parameters)->data)) + (size_t )(from_compiled_data->ids.parameters)->rsize * (size_t) (j))), &ret); | |||
2499 | assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret != 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2499 , __extension__ __PRETTY_FUNCTION__); })); | |||
2500 | kh_val(id_map, k)((id_map)->vals[k]) = j; | |||
| ||||
2501 | } | |||
2502 | } | |||
2503 | const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name); | |||
2504 | if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip. | |||
2505 | continue; | |||
2506 | src_d = kh_val(id_map, k)((id_map)->vals[k]); | |||
2507 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2507, __extension__ __PRETTY_FUNCTION__); })); | |||
2508 | assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__ ({ if (src_d < from_parameter_size) ; else __assert_fail ( "src_d < from_parameter_size", "ccv_cnnp_model.c", 2508, __extension__ __PRETTY_FUNCTION__); })); | |||
2509 | } | |||
2510 | } | |||
2511 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2511, __extension__ __PRETTY_FUNCTION__); })); | |||
2512 | assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__ ({ if (src_d < from_parameter_size) ; else __assert_fail ( "src_d < from_parameter_size", "ccv_cnnp_model.c", 2512, __extension__ __PRETTY_FUNCTION__); })); | |||
2513 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | |||
2514 | // If the original is not init'ed. We cannot share from. | |||
2515 | if (!(from_init_v[s >> 5] & (1u << (s & 0x1f)))) | |||
2516 | continue; | |||
2517 | for (j = 0; j < parallel_count; j++) | |||
2518 | { | |||
2519 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d + j * from_parameter_size]) & ~(uintptr_t )1)); | |||
2520 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2520, __extension__ __PRETTY_FUNCTION__); })); | |||
2521 | ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size]; | |||
2522 | if (dest && !((uintptr_t)dest & (uintptr_t)1)) | |||
2523 | ccv_nnc_tensor_free(dest); | |||
2524 | to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1); | |||
2525 | } | |||
2526 | // Mark this symbol as init'ed. | |||
2527 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | |||
2528 | to_init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
2529 | } | |||
2530 | ccv_array_free(to_parameter_indices); | |||
2531 | ccv_array_free(from_parameter_indices); | |||
2532 | if (id_map) | |||
2533 | kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map); | |||
2534 | if (updated_name) | |||
2535 | ccfreefree(updated_name); | |||
2536 | // Mark it as incomplete so we will call init_1. | |||
2537 | if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data)) | |||
2538 | to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1); | |||
2539 | else // Remove the flag. | |||
2540 | to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
2541 | } | |||
2542 | ||||
2543 | ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type) | |||
2544 | { | |||
2545 | if (!compiled_data->stream_map) | |||
2546 | compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map(); | |||
2547 | int ret = 0; | |||
2548 | khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret ); | |||
2549 | assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if ( ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c" , 2549, __extension__ __PRETTY_FUNCTION__); })); | |||
2550 | ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]); | |||
2551 | // If ret == 0, the key already exist, we can return directly, otherwise, create and return. | |||
2552 | if (ret != 0) | |||
2553 | { | |||
2554 | stream = ccv_nnc_stream_context_new(type); | |||
2555 | kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream; | |||
2556 | } | |||
2557 | return stream; | |||
2558 | } | |||
2559 | ||||
2560 | void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) | |||
2561 | { | |||
2562 | ccv_array_t* to_parameter_indices; | |||
2563 | int to_param_ref; | |||
2564 | ccv_array_t* from_parameter_indices; | |||
2565 | int from_param_ref; | |||
2566 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0); | |||
2567 | // Should be exactly the same tensor. | |||
2568 | if (to_param_ref < 0 && from_param_ref < 0) | |||
2569 | { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices ->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices ->rnum == to_parameter_indices->rnum) ; else __assert_fail ("from_parameter_indices->rnum == to_parameter_indices->rnum" , "ccv_cnnp_model.c", 2569, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2570 | // To models. | |||
2571 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
2572 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2572, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2573 | // From models. | |||
2574 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | |||
2575 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2576 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
2577 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1; | |||
2578 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2578, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2579 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2579, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2580 | int i, j; | |||
2581 | ccv_nnc_tensor_t* inputs[aux_in_size + 2]; | |||
2582 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | |||
2583 | for (i = 0; i < aux_in_size; i++) | |||
2584 | inputs[i + 2] = aux_ins[i]; | |||
2585 | for (i = 0; i < aux_out_size; i++) | |||
2586 | outputs[i + 1] = aux_outs[i]; | |||
2587 | const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init. v) & ~(uintptr_t)1)); | |||
2588 | uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
2589 | for (i = 0; i < rnum; i++) | |||
2590 | { | |||
2591 | const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))); | |||
2592 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2592, __extension__ __PRETTY_FUNCTION__); })); | |||
2593 | assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters ->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data ->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2593, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2594 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | |||
2595 | // If the original is not init'ed. We cannot copy from. | |||
2596 | if (!(from_init_v[s >> 5] & (1u << (s & 0x1f)))) | |||
2597 | continue; | |||
2598 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
2599 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2599, __extension__ __PRETTY_FUNCTION__); })); | |||
2600 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2600, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2601 | if (parallel_count > 1) | |||
2602 | { | |||
2603 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
2604 | ccv_nnc_stream_signal_t* signal; | |||
2605 | if (stream_context) | |||
2606 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
2607 | for (j = 0; j < parallel_count; j++) | |||
2608 | { | |||
2609 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
2610 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
2611 | if (!dest || !src) | |||
2612 | { | |||
2613 | streams[j] = 0; | |||
2614 | continue; | |||
2615 | } | |||
2616 | // At the moment, can only handle them on the same device. | |||
2617 | assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest-> info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src-> info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)" , "ccv_cnnp_model.c", 2617, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2618 | assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >> 8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1 : 0), __extension__ ({ if ((((src->info.type) & 0xfff00 ) >> 8) == (((dest->info.type) & 0xfff00) >> 8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)" , "ccv_cnnp_model.c", 2618, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2619 | const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
2620 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8); | |||
2621 | int type = stream_type; | |||
2622 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
2623 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | |||
2624 | // Wait signal to finish. | |||
2625 | if (stream_context) | |||
2626 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
2627 | inputs[0] = outputs[0] = dest; | |||
2628 | inputs[1] = src; | |||
2629 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0); | |||
2630 | if (stream_context) | |||
2631 | { | |||
2632 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
2633 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
2634 | } | |||
2635 | streams[j] = stream_0; | |||
2636 | } | |||
2637 | // If this should be blocking, blocking it. | |||
2638 | if (!stream_context) | |||
2639 | for (j = 0; j < parallel_count; j++) | |||
2640 | if (streams[j]) | |||
2641 | ccv_nnc_stream_context_wait(streams[j]); | |||
2642 | } else { | |||
2643 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d]) & ~(uintptr_t)1)); | |||
2644 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2644, __extension__ __PRETTY_FUNCTION__); })); | |||
2645 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d]) & ~(uintptr_t)1)); | |||
2646 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2646, __extension__ __PRETTY_FUNCTION__); })); | |||
2647 | inputs[0] = outputs[0] = dest; | |||
2648 | inputs[1] = src; | |||
2649 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context); | |||
2650 | } | |||
2651 | // Mark this symbol as init'ed. | |||
2652 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | |||
2653 | to_init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
2654 | } | |||
2655 | ccv_array_free(to_parameter_indices); | |||
2656 | ccv_array_free(from_parameter_indices); | |||
2657 | } | |||
2658 | ||||
2659 | void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context) | |||
2660 | { | |||
2661 | int to_param_ref; | |||
2662 | ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref); | |||
2663 | // To models. | |||
2664 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
2665 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2665, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2666 | // Tensor has to be inited already. | |||
2667 | assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 : 0), __extension__ ({ if (!!to_compiled_data->tensors_init .v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v" , "ccv_cnnp_model.c", 2667, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2668 | assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (to_compiled_data->tensors.parameters ) ; else __assert_fail ("to_compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2668, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2669 | // From models. | |||
2670 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2671 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
2672 | const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1; | |||
2673 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2673, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2674 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2674, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2675 | int i, j; | |||
2676 | ccv_nnc_tensor_t* inputs[aux_in_size + 1]; | |||
2677 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | |||
2678 | for (i = 0; i < aux_in_size; i++) | |||
2679 | inputs[i + 1] = aux_ins[i]; | |||
2680 | for (i = 0; i < aux_out_size; i++) | |||
2681 | outputs[i + 1] = aux_outs[i]; | |||
2682 | for (i = 0; i < rnum; i++) | |||
2683 | { | |||
2684 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
2685 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2685, __extension__ __PRETTY_FUNCTION__); })); | |||
2686 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2686, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2687 | if (parallel_count > 1) | |||
2688 | { | |||
2689 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
2690 | ccv_nnc_stream_signal_t* signal; | |||
2691 | if (stream_context) | |||
2692 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
2693 | for (j = 0; j < parallel_count; j++) | |||
2694 | { | |||
2695 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
2696 | if (!dest) | |||
2697 | { | |||
2698 | streams[j] = 0; | |||
2699 | continue; | |||
2700 | } | |||
2701 | const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
2702 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8); | |||
2703 | int type = stream_type; | |||
2704 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
2705 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | |||
2706 | // Wait signal to finish. | |||
2707 | if (stream_context) | |||
2708 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
2709 | inputs[0] = outputs[0] = dest; | |||
2710 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0); | |||
2711 | if (stream_context) | |||
2712 | { | |||
2713 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
2714 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
2715 | } | |||
2716 | streams[j] = stream_0; | |||
2717 | } | |||
2718 | // If this should be blocking, blocking it. | |||
2719 | if (!stream_context) | |||
2720 | for (j = 0; j < parallel_count; j++) | |||
2721 | if (streams[j]) | |||
2722 | ccv_nnc_stream_context_wait(streams[j]); | |||
2723 | } else { | |||
2724 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d]) & ~(uintptr_t)1)); | |||
2725 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2725, __extension__ __PRETTY_FUNCTION__); })); | |||
2726 | inputs[0] = outputs[0] = dest; | |||
2727 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context); | |||
2728 | } | |||
2729 | // No need to mark this symbol as init'ed, it is already. | |||
2730 | } | |||
2731 | ccv_array_free(to_parameter_indices); | |||
2732 | } | |||
2733 | ||||
2734 | void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context) | |||
2735 | { | |||
2736 | int to_param_ref; | |||
2737 | ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref); | |||
2738 | // To models. | |||
2739 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
2740 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2740, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2741 | // Tensor has to be inited already. | |||
2742 | assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 : 0), __extension__ ({ if (!!to_compiled_data->tensors_init .v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v" , "ccv_cnnp_model.c", 2742, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2743 | ccv_nnc_tensor_t** tensor_gradients; | |||
2744 | if (to_compiled_data->backward.count > 1) | |||
2745 | tensor_gradients = to_compiled_data->tensors.accum_gradients; | |||
2746 | else | |||
2747 | tensor_gradients = to_compiled_data->tensors.gradients; | |||
2748 | assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({ if (tensor_gradients) ; else __assert_fail ("tensor_gradients" , "ccv_cnnp_model.c", 2748, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2749 | // From models. | |||
2750 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2751 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
2752 | const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1; | |||
2753 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2753, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2754 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2754, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2755 | int i, j; | |||
2756 | ccv_nnc_tensor_t* inputs[aux_in_size + 1]; | |||
2757 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | |||
2758 | for (i = 0; i < aux_in_size; i++) | |||
2759 | inputs[i + 1] = aux_ins[i]; | |||
2760 | for (i = 0; i < aux_out_size; i++) | |||
2761 | outputs[i + 1] = aux_outs[i]; | |||
2762 | for (i = 0; i < rnum; i++) | |||
2763 | { | |||
2764 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
2765 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2765, __extension__ __PRETTY_FUNCTION__); })); | |||
2766 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2766, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2767 | if (parallel_count > 1) | |||
2768 | { | |||
2769 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
2770 | ccv_nnc_stream_signal_t* signal; | |||
2771 | if (stream_context) | |||
2772 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
2773 | for (j = 0; j < parallel_count; j++) | |||
2774 | { | |||
2775 | ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size]; | |||
2776 | if (!dest) | |||
2777 | { | |||
2778 | streams[j] = 0; | |||
2779 | continue; | |||
2780 | } | |||
2781 | const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
2782 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8); | |||
2783 | int type = stream_type; | |||
2784 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
2785 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | |||
2786 | // Wait signal to finish. | |||
2787 | if (stream_context) | |||
2788 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
2789 | inputs[0] = outputs[0] = dest; | |||
2790 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0); | |||
2791 | if (stream_context) | |||
2792 | { | |||
2793 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
2794 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
2795 | } | |||
2796 | streams[j] = stream_0; | |||
2797 | } | |||
2798 | // If this should be blocking, blocking it. | |||
2799 | if (!stream_context) | |||
2800 | for (j = 0; j < parallel_count; j++) | |||
2801 | if (streams[j]) | |||
2802 | ccv_nnc_stream_context_wait(streams[j]); | |||
2803 | } else { | |||
2804 | ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d]; | |||
2805 | if (!dest) | |||
2806 | continue; | |||
2807 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2807, __extension__ __PRETTY_FUNCTION__); })); | |||
2808 | inputs[0] = outputs[0] = dest; | |||
2809 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context); | |||
2810 | } | |||
2811 | // No need to mark this symbol as init'ed, it is already. | |||
2812 | } | |||
2813 | ccv_array_free(to_parameter_indices); | |||
2814 | } | |||
2815 | ||||
2816 | ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model) | |||
2817 | { | |||
2818 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2819 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2819, __extension__ __PRETTY_FUNCTION__); })); | |||
2820 | return compiled_data->minimize.minimizer; | |||
2821 | } | |||
2822 | ||||
2823 | void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size) | |||
2824 | { | |||
2825 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2826 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2826, __extension__ __PRETTY_FUNCTION__); })); | |||
2827 | const int parameter_size = compiled_data->parameters->rnum; | |||
2828 | if (parameter_size == 0) | |||
2829 | return; | |||
2830 | if (reset) | |||
2831 | { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size == 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 && set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0" , "ccv_cnnp_model.c", 2831, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
2832 | const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
2833 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer); | |||
2834 | if (saved_aux_size > compiled_data->minimize.max_saved_aux_size) | |||
2835 | compiled_data->minimize.max_saved_aux_size = saved_aux_size; | |||
2836 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
2837 | // We update all parameters, at this point, we have one minimizer. | |||
2838 | if (set_parameters == 0 || set_parameter_size == 0) | |||
2839 | compiled_data->minimize.minimizer = minimizer; | |||
2840 | int i; | |||
2841 | if (set_parameters && set_parameter_size) | |||
2842 | { | |||
2843 | // I need to save what's the minimizer along with this. | |||
2844 | if (!compiled_data->minimize.parameters) | |||
2845 | compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0); | |||
2846 | ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t)); | |||
2847 | set_minimizer_for_parameter->minimizer = minimizer; | |||
2848 | set_minimizer_for_parameter->parameter_size = set_parameter_size; | |||
2849 | memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size); | |||
2850 | ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter); | |||
2851 | } | |||
2852 | // If reset is true, clear the parameters array. | |||
2853 | if (reset && compiled_data->minimize.parameters) | |||
2854 | { | |||
2855 | for (i = 0; i < compiled_data->minimize.parameters->rnum; i++) | |||
2856 | ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)-> data)) + (size_t)(compiled_data->minimize.parameters)-> rsize * (size_t)(i)))); | |||
2857 | ccv_array_clear(compiled_data->minimize.parameters); | |||
2858 | } | |||
2859 | if (!compiled_data->update_nodes) | |||
2860 | return; | |||
2861 | ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph; | |||
2862 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 2862, __extension__ __PRETTY_FUNCTION__); })); | |||
2863 | if (saved_aux_size > old_max_saved_aux_size) | |||
2864 | { | |||
2865 | assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->updated_parameters) ; else __assert_fail ("compiled_data->updated_parameters" , "ccv_cnnp_model.c", 2865, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2866 | // Reallocate first, move them around later. | |||
2867 | compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size); | |||
2868 | compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size); | |||
2869 | compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size); | |||
2870 | // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap. | |||
2871 | _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size); | |||
2872 | } | |||
2873 | int flag = 0; | |||
2874 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2875 | if (set_parameters && set_parameter_size) | |||
2876 | { | |||
2877 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
2878 | for (i = 0; i < set_parameter_size; i++) | |||
2879 | { | |||
2880 | const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel; | |||
2881 | assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0 ), __extension__ ({ if (set_parameters[i]->param_sel != 0) ; else __assert_fail ("set_parameters[i]->param_sel != 0" , "ccv_cnnp_model.c", 2881, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2882 | const int old_rnum = parameter_indices->rnum; | |||
2883 | ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices); | |||
2884 | const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref; | |||
2885 | assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0 ), __extension__ ({ if (set_parameters[i]->param_ref != 0) ; else __assert_fail ("set_parameters[i]->param_ref != 0" , "ccv_cnnp_model.c", 2885, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2886 | if (param_ref >= 0) | |||
2887 | { | |||
2888 | assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices-> rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum < parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum" , "ccv_cnnp_model.c", 2888, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2889 | *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(old_rnum))) = *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref + old_rnum))); | |||
2890 | parameter_indices->rnum = old_rnum + 1; | |||
2891 | } | |||
2892 | } | |||
2893 | // We may have duplicated indices, but that is OK, we will set it twice. | |||
2894 | for (i = 0; i < parameter_indices->rnum; i++) | |||
2895 | { | |||
2896 | const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(i))); | |||
2897 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d)) | |||
2898 | flag = 1; | |||
2899 | } | |||
2900 | ccv_array_free(parameter_indices); | |||
2901 | } else { | |||
2902 | for (i = 0; i < parameter_size; i++) | |||
2903 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i)) | |||
2904 | flag = 1; | |||
2905 | if (compiled_data->minimize.parameters) | |||
2906 | if (_ccv_cnnp_apply_parameters_with_minimizer(model)) | |||
2907 | flag = 1; | |||
2908 | } | |||
2909 | if (flag) | |||
2910 | { | |||
2911 | // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph. | |||
2912 | if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE) | |||
2913 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
2914 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | |||
2915 | } | |||
2916 | } | |||
2917 | ||||
2918 | void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params) | |||
2919 | { | |||
2920 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
2921 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2921, __extension__ __PRETTY_FUNCTION__); })); | |||
2922 | compiled_data->compile_params = compile_params; | |||
2923 | } | |||
2924 | ||||
2925 | void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size) | |||
2926 | { | |||
2927 | if (model->graph && out_size > 0) | |||
2928 | ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]); | |||
2929 | if (model->compiled_data && model->compiled_data->graph && out_size > 1) | |||
2930 | ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]); | |||
2931 | if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2) | |||
2932 | ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]); | |||
2933 | if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3) | |||
2934 | ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]); | |||
2935 | } | |||
2936 | ||||
2937 | void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context) | |||
2938 | { | |||
2939 | if (model->graph) | |||
2940 | ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context); | |||
2941 | } | |||
2942 | ||||
2943 | static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
2944 | { | |||
2945 | int i; | |||
2946 | const int parameter_size = compiled_data->parameters->rnum; | |||
2947 | ccv_array_free(compiled_data->parameters); | |||
2948 | if (compiled_data->parameter_flags) | |||
2949 | ccfreefree(compiled_data->parameter_flags); | |||
2950 | const int internal_size = compiled_data->internals->rnum; | |||
2951 | ccv_array_free(compiled_data->internals); | |||
2952 | assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum == parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data ->ids.parameters->rnum == parameter_size) ; else __assert_fail ("compiled_data->ids.parameters->rnum == parameter_size" , "ccv_cnnp_model.c", 2952, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2953 | assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size ) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals ->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size" , "ccv_cnnp_model.c", 2953, __extension__ __PRETTY_FUNCTION__ ); })); | |||
2954 | for (i = 0; i < parameter_size; i++) | |||
2955 | ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i)))); | |||
2956 | ccv_array_free(compiled_data->ids.parameters); | |||
2957 | for (i = 0; i < internal_size; i++) | |||
2958 | ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data) ) + (size_t)(compiled_data->ids.internals)->rsize * (size_t )(i)))); | |||
2959 | ccv_array_free(compiled_data->ids.internals); | |||
2960 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
2961 | if (compiled_data->tensors.parameters) | |||
2962 | { | |||
2963 | for (i = 0; i < parameter_size * parallel_count; i++) | |||
2964 | // If it is not marked as not belonging, we can free it. | |||
2965 | if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)) | |||
2966 | if (compiled_data->tensors.parameters[i]) | |||
2967 | ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]); | |||
2968 | for (i = 0; i < internal_size * parallel_count; i++) | |||
2969 | if (compiled_data->tensors.internals[i]) | |||
2970 | ccv_nnc_tensor_free(compiled_data->tensors.internals[i]); | |||
2971 | ccfreefree(compiled_data->tensors.parameters); | |||
2972 | } | |||
2973 | if (compiled_data->tensors.gradients) | |||
2974 | { | |||
2975 | for (i = 0; i < parameter_size * parallel_count; i++) | |||
2976 | { | |||
2977 | if (compiled_data->tensors.gradients[i]) | |||
2978 | ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]); | |||
2979 | if (compiled_data->tensors.accum_gradients[i]) | |||
2980 | ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]); | |||
2981 | } | |||
2982 | ccfreefree(compiled_data->tensors.gradients); | |||
2983 | } | |||
2984 | if (compiled_data->minimize.parameters) | |||
2985 | { | |||
2986 | for (i = 0; i < compiled_data->minimize.parameters->rnum; i++) | |||
2987 | ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)-> data)) + (size_t)(compiled_data->minimize.parameters)-> rsize * (size_t)(i)))); | |||
2988 | ccv_array_free(compiled_data->minimize.parameters); | |||
2989 | } | |||
2990 | if (compiled_data->rewindables) | |||
2991 | ccv_array_free(compiled_data->rewindables); | |||
2992 | if (compiled_data->tensors_init.v) | |||
2993 | ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1))); | |||
2994 | if (compiled_data->evaluate.tos) | |||
2995 | ccfreefree(compiled_data->evaluate.tos); | |||
2996 | compiled_data->evaluate.tos = 0; | |||
2997 | if (compiled_data->stream_map) | |||
2998 | { | |||
2999 | khiter_t k; | |||
3000 | for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k) | |||
3001 | { | |||
3002 | if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>> (((k)&0xfU)<<1))&3))) | |||
3003 | continue; | |||
3004 | ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]); | |||
3005 | ccv_nnc_stream_context_free(stream); | |||
3006 | } | |||
3007 | kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map); | |||
3008 | } | |||
3009 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
3010 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | |||
3011 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | |||
3012 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | |||
3013 | if (compiled_data->gradient_checkpoints) | |||
3014 | { | |||
3015 | for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++) | |||
3016 | { | |||
3017 | ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)-> data)) + (size_t)(compiled_data->gradient_checkpoints)-> rsize * (size_t)(i))); | |||
3018 | assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__ ({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs" , "ccv_cnnp_model.c", 3018, __extension__ __PRETTY_FUNCTION__ ); })); | |||
3019 | ccfreefree(checkpoint->inputs); | |||
3020 | ccv_array_free(checkpoint->tensor_symbols); | |||
3021 | } | |||
3022 | ccv_array_free(compiled_data->gradient_checkpoints); | |||
3023 | } | |||
3024 | ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc); | |||
3025 | ccfreefree(compiled_data); | |||
3026 | } | |||
3027 | ||||
3028 | void ccv_cnnp_model_free(ccv_cnnp_model_t* const model) | |||
3029 | { | |||
3030 | if (model->isa->deinit) | |||
3031 | model->isa->deinit(model); | |||
3032 | if (model->io) | |||
3033 | { | |||
3034 | int i; | |||
3035 | for (i = 0; i < model->io->rnum; i++) | |||
3036 | { | |||
3037 | ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model-> io)->rsize * (size_t)(i))); | |||
3038 | if (model_io->outgoings) | |||
3039 | ccv_array_free(model_io->outgoings); | |||
3040 | if (model_io->incomings) | |||
3041 | ccv_array_free(model_io->incomings); | |||
3042 | if (model_io->dependencies) | |||
3043 | ccv_array_free(model_io->dependencies); | |||
3044 | ccfreefree(model_io); | |||
3045 | } | |||
3046 | ccv_array_free(model->io); | |||
3047 | } | |||
3048 | if (model->parameter_indices) | |||
3049 | ccv_array_free(model->parameter_indices); | |||
3050 | if (model->inputs) | |||
3051 | ccfreefree(model->inputs); | |||
3052 | if (model->graph) | |||
3053 | ccv_nnc_symbolic_graph_free(model->graph); | |||
3054 | if (model->compiled_data) | |||
3055 | _ccv_cnnp_compiled_data_free(model, model->compiled_data); | |||
3056 | if (model->name) | |||
3057 | ccfreefree(model->name); | |||
3058 | ccfreefree(model); | |||
3059 | } | |||
3060 | ||||
3061 | void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model) | |||
3062 | { | |||
3063 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
3064 | if (!compiled_data) | |||
3065 | return; | |||
3066 | if (compiled_data->graph) | |||
3067 | ccv_nnc_graph_cancel(compiled_data->graph); | |||
3068 | if (compiled_data->apply_gradients.graph) | |||
3069 | ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph); | |||
3070 | } |