File: | nnc/ccv_cnnp_model.c |
Warning: | line 965, column 100 Array access (via field 'outgrads') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | ||||
2 | #include "ccv_nnc_easy.h" | ||||
3 | #include "ccv_nnc_internal.h" | ||||
4 | #include "ccv_internal.h" | ||||
5 | #include "_ccv_cnnp_model.h" | ||||
6 | |||||
7 | // MARK - Level-5 API | ||||
8 | |||||
9 | ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size) | ||||
10 | { | ||||
11 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_cnnp_model.c", 11, __extension__ __PRETTY_FUNCTION__); })); | ||||
12 | if (!model->io) | ||||
13 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | ||||
14 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size); | ||||
15 | model_io->param_ref = 0; | ||||
16 | model_io->param_sel = 0; | ||||
17 | model_io->visit = 0; | ||||
18 | model_io->model = model; | ||||
19 | model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | ||||
20 | model_io->outgoings = 0; | ||||
21 | model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1); | ||||
22 | ccv_array_push(model->io, &model_io); | ||||
23 | int i; | ||||
24 | ccv_array_resize(model_io->incomings, input_size); | ||||
25 | memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t )(model_io->incomings)->rsize * (size_t)(0))), inputs, sizeof(ccv_cnnp_model_io_t) * input_size); | ||||
26 | for (i = 0; i < input_size; i++) | ||||
27 | { | ||||
28 | if (!inputs[i]->outgoings) | ||||
29 | inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | ||||
30 | ccv_array_push(inputs[i]->outgoings, &model_io); | ||||
31 | } | ||||
32 | return model_io; | ||||
33 | } | ||||
34 | |||||
35 | int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model) | ||||
36 | { | ||||
37 | return model->output_size; | ||||
38 | } | ||||
39 | |||||
40 | ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index) | ||||
41 | { | ||||
42 | if (!model->io) | ||||
43 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | ||||
44 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s)); | ||||
45 | model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1; | ||||
46 | model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1; | ||||
47 | model_io->visit = 0; | ||||
48 | model_io->model = model; | ||||
49 | model_io->outputs = 0; | ||||
50 | model_io->incomings = 0; | ||||
51 | model_io->outgoings = 0; | ||||
52 | ccv_array_push(model->io, &model_io); | ||||
53 | return model_io; | ||||
54 | } | ||||
55 | |||||
56 | void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context) | ||||
57 | { | ||||
58 | model->notify_hook.func = func; | ||||
59 | model->notify_hook.context = context; | ||||
60 | } | ||||
61 | |||||
62 | void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload) | ||||
63 | { | ||||
64 | if (model->notify_hook.func) | ||||
65 | model->notify_hook.func(model, tag, payload, model->notify_hook.context); | ||||
66 | if (model->isa->notify) | ||||
67 | model->isa->notify(model, tag, payload); | ||||
68 | } | ||||
69 | |||||
70 | static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size) | ||||
71 | { | ||||
72 | int i, j; | ||||
73 | for (i = 0; i < graph_exec_symbol_size; i++) | ||||
74 | { | ||||
75 | ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i; | ||||
76 | // Check whether this tensor symbol has any duplicate. | ||||
77 | for (j = i + 1; j < graph_exec_symbol_size;) | ||||
78 | { | ||||
79 | ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j; | ||||
80 | // If there is a same tensor symbol, remove it. | ||||
81 | if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph) | ||||
82 | { | ||||
83 | if (j + 1 < graph_exec_symbol_size) | ||||
84 | *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1]; | ||||
85 | --graph_exec_symbol_size; | ||||
86 | continue; | ||||
87 | } | ||||
88 | ++j; | ||||
89 | } | ||||
90 | } | ||||
91 | return graph_exec_symbol_size; | ||||
92 | } | ||||
93 | |||||
94 | typedef struct { | ||||
95 | ccv_cnnp_model_sequence_t* sequence; | ||||
96 | char prefix; | ||||
97 | ccv_array_t* symbols; | ||||
98 | ccv_array_t* ids; | ||||
99 | } ccv_cnnp_model_add_to_array_context_t; | ||||
100 | |||||
101 | static void _ccv_cnnp_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol) | ||||
102 | { | ||||
103 | ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context; | ||||
104 | ccv_cnnp_model_t* const model = add_to_array_context->sequence->model; | ||||
105 | int i; | ||||
106 | if (!model->parameter_indices) | ||||
107 | model->parameter_indices = ccv_array_new(sizeof(int), 0, 0); | ||||
108 | for (i = 0; i < add_to_array_context->symbols->rnum; i++) | ||||
109 | { | ||||
110 | const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data )) + (size_t)(add_to_array_context->symbols)->rsize * ( size_t)(i))); | ||||
111 | if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph) | ||||
112 | { | ||||
113 | // Only add to parameter_indices if it is trainable. | ||||
114 | if (add_to_array_context->prefix == 't') | ||||
115 | ccv_array_add_unique_int(model->parameter_indices, i); | ||||
116 | // Found it, return, don't add it. | ||||
117 | return; | ||||
118 | } | ||||
119 | } | ||||
120 | // Only add to parameter_indices if it is trainable. | ||||
121 | if (add_to_array_context->prefix == 't') | ||||
122 | ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum); | ||||
123 | // This is a new one, no need to add_unique_int, it is unique. | ||||
124 | ccv_array_push(add_to_array_context->symbols, &symbol); | ||||
125 | char id[2048]; | ||||
126 | id[0] = add_to_array_context->prefix; | ||||
127 | id[1] = '-'; | ||||
128 | int total_len = 2; | ||||
129 | for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++) | ||||
130 | { | ||||
131 | const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences )->data)) + (size_t)(add_to_array_context->sequence-> sequences)->rsize * (size_t)(i))); | ||||
132 | int len; | ||||
133 | if (name->name && name->name[0] != '\0') | ||||
134 | len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence); | ||||
135 | else | ||||
136 | len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence); | ||||
137 | total_len += len; | ||||
138 | if (total_len >= 2047) | ||||
139 | break; | ||||
140 | } | ||||
141 | if (total_len < 2047) | ||||
142 | total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it); | ||||
143 | assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__ ({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048" , "ccv_cnnp_model.c", 143, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
144 | char *heap_id = (char*)ccmallocmalloc(total_len + 1); | ||||
145 | memcpy(heap_id, id, total_len + 1); | ||||
146 | ccv_array_push(add_to_array_context->ids, &heap_id); | ||||
147 | ++add_to_array_context->sequence->it; | ||||
148 | } | ||||
149 | |||||
150 | static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size) | ||||
151 | { | ||||
152 | compiled_data->f = compiled_data->fits + output_size; | ||||
153 | compiled_data->xpu_alloc.mp_hdr = -1; | ||||
154 | compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str(); | ||||
155 | compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc(); | ||||
156 | } | ||||
157 | |||||
158 | static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss) | ||||
159 | { | ||||
160 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 160, __extension__ __PRETTY_FUNCTION__); })); | ||||
161 | model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size); | ||||
162 | int i; | ||||
163 | for (i = 0; i < input_size; i++) | ||||
164 | model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0); | ||||
165 | ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | ||||
166 | ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0); | ||||
167 | ccv_cnnp_model_sequence_t model_sequence = { | ||||
168 | .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank() | ||||
169 | }; | ||||
170 | ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = { | ||||
171 | .sequence = &model_sequence, | ||||
172 | .prefix = 't', | ||||
173 | .symbols = parameters, | ||||
174 | .ids = parameter_ids, | ||||
175 | }; | ||||
176 | ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | ||||
177 | ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0); | ||||
178 | ccv_cnnp_model_add_to_array_context_t add_to_output_context = { | ||||
179 | .sequence = &model_sequence, | ||||
180 | .prefix = 'r', | ||||
181 | .symbols = internals, | ||||
182 | .ids = internal_ids, | ||||
183 | }; | ||||
184 | ccv_cnnp_model_build_data_t build_data = { | ||||
185 | .model_sequence = &model_sequence, | ||||
186 | .add_to_array = _ccv_cnnp_add_to_array, | ||||
187 | .parameters = parameters, | ||||
188 | .context = { | ||||
189 | .add_to_parameter = &add_to_parameter_context, | ||||
190 | .add_to_output = &add_to_output_context, | ||||
191 | }, | ||||
192 | }; | ||||
193 | model->data = &build_data; | ||||
194 | ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0); | ||||
195 | model->data = 0; | ||||
196 | kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank); | ||||
197 | ccv_array_free(model_sequence.sequences); | ||||
198 | // Assert no parameter is alias. | ||||
199 | for (i = 0; i < parameters->rnum; i++) | ||||
200 | { | ||||
201 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); | ||||
202 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter); | ||||
203 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 203, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. | ||||
204 | } | ||||
205 | // Assert no internal is alias. | ||||
206 | for (i = 0; i < internals->rnum; i++) | ||||
207 | { | ||||
208 | const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals )->rsize * (size_t)(i))); | ||||
209 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(retained.graph, retained); | ||||
210 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 210, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. | ||||
211 | } | ||||
212 | const int output_size = model->output_size; | ||||
213 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | ||||
214 | ccv_nnc_symbolic_graph_simplify(model->graph, | ||||
215 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | ||||
216 | CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | ||||
217 | CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | ||||
218 | CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), | ||||
219 | model->inputs, input_size, | ||||
220 | model->outputs, output_size, | ||||
221 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | ||||
222 | ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1)); | ||||
223 | _ccv_cnnp_compiled_data_init(compiled_data, output_size); | ||||
224 | const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph); | ||||
225 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 225, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
226 | compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); | ||||
227 | memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); | ||||
228 | compiled_data->loss = loss; | ||||
229 | if (loss.cmd == CCV_NNC_NOOP) | ||||
230 | { | ||||
231 | // If no loss function provided, there is no fits. | ||||
232 | for (i = 0; i < output_size; i++) | ||||
233 | { | ||||
234 | compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
235 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]); | ||||
236 | if (alias_to.d < 0) | ||||
237 | compiled_data->f[i] = model->outputs[i]; | ||||
238 | else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original. | ||||
239 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
240 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
241 | ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc); | ||||
242 | int j; | ||||
243 | for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++) | ||||
244 | { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if ( ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c" , 244, __extension__ __PRETTY_FUNCTION__); })); } // There is no ofs. | ||||
245 | compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet. | ||||
246 | } | ||||
247 | } | ||||
248 | } else { | ||||
249 | for (i = 0; i < output_size; i++) | ||||
250 | { | ||||
251 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]); | ||||
252 | const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0); | ||||
253 | compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0); | ||||
254 | ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit} , (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, ( 1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | ||||
255 | } | ||||
256 | } | ||||
257 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | ||||
258 | ccv_nnc_symbolic_graph_simplify(model->graph, | ||||
259 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), // Only do Ops fusion, in this way, we can fuse the loss function. | ||||
260 | 0, 0, // No need to provide binds at this point. | ||||
261 | compiled_data->f, model->output_size, | ||||
262 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | ||||
263 | // If inputs are from GPU, stream type is GPU. | ||||
264 | compiled_data->parameters = parameters; | ||||
265 | compiled_data->internals = internals; | ||||
266 | compiled_data->ids.parameters = parameter_ids; | ||||
267 | compiled_data->ids.internals = internal_ids; | ||||
268 | } | ||||
269 | |||||
270 | static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) | ||||
271 | { | ||||
272 | ccv_array_t* const stack = (ccv_array_t*)context; | ||||
273 | ccv_array_push(stack, &symbol.d); | ||||
274 | } | ||||
275 | |||||
276 | static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) | ||||
277 | { | ||||
278 | const ccv_nnc_tensor_symbol_t src_symbol = { | ||||
279 | .d = src_index, | ||||
280 | .graph = src_graph | ||||
281 | }; | ||||
282 | const ccv_nnc_tensor_symbol_t dest_symbol = { | ||||
283 | .d = dest_index, | ||||
284 | .graph = dest_graph | ||||
285 | }; | ||||
286 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); | ||||
287 | ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params); | ||||
288 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
289 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
290 | if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc)) | ||||
291 | ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc); | ||||
292 | } | ||||
293 | |||||
294 | static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) | ||||
295 | { | ||||
296 | const ccv_nnc_tensor_symbol_t src_symbol = { | ||||
297 | .d = src_index, | ||||
298 | .graph = src_graph | ||||
299 | }; | ||||
300 | const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); | ||||
301 | const ccv_nnc_tensor_symbol_t dest_symbol = { | ||||
302 | .d = dest_index, | ||||
303 | .graph = dest_graph | ||||
304 | }; | ||||
305 | const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol); | ||||
306 | return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0; | ||||
307 | } | ||||
308 | |||||
309 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size); | ||||
310 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data); | ||||
311 | |||||
312 | typedef struct { | ||||
313 | int parallel_count; | ||||
314 | ccv_nnc_symbolic_graph_t* graph; | ||||
315 | ccv_nnc_graph_exec_arena_t* graph_exec_arena; | ||||
316 | } ccv_nnc_graph_exec_update_t; | ||||
317 | |||||
318 | static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint) | ||||
319 | { | ||||
320 | ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context; | ||||
321 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena; | ||||
322 | ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol); | ||||
323 | ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd); | ||||
324 | ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint); | ||||
325 | const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph; | ||||
326 | const int parallel_count = graph_exec_update->parallel_count; | ||||
327 | int i; | ||||
328 | for (i = 1; i < parallel_count; i++) | ||||
329 | { | ||||
330 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i)); | ||||
331 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) | ||||
332 | { | ||||
333 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); | ||||
334 | ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint); | ||||
335 | } | ||||
336 | } | ||||
337 | } | ||||
338 | |||||
339 | void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size) | ||||
340 | { | ||||
341 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 341, __extension__ __PRETTY_FUNCTION__); })); | ||||
342 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 342, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
343 | assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if (!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c" , 343, __extension__ __PRETTY_FUNCTION__); })); | ||||
344 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
345 | init->graph = ccv_nnc_symbolic_graph_new(); | ||||
346 | ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0); | ||||
347 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack); | ||||
348 | _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss); | ||||
349 | init->parallel_count = model->parallel_count; | ||||
350 | init->memory_compression = model->memory_compression; | ||||
351 | init->compiled_data->stream_type = model->compiled_data->stream_type; | ||||
352 | init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer; | ||||
353 | init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size; | ||||
354 | if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | ||||
355 | _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0); | ||||
356 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0); | ||||
357 | ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0); | ||||
358 | int i, j; | ||||
359 | // Verify parameters, internals and saved_aux in both graph has the same dimensionality. | ||||
360 | for (i = 0; i < compiled_data->parameters->rnum; i++) | ||||
361 | { | ||||
362 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | ||||
363 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 363, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
364 | } | ||||
365 | for (i = 0; i < compiled_data->internals->rnum; i++) | ||||
366 | { | ||||
367 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; | ||||
368 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 368, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
369 | } | ||||
370 | // Update inputs. | ||||
371 | assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size) ? 1 : 0), __extension__ ({ if (model->input_size == init-> input_size) ; else __assert_fail ("model->input_size == init->input_size" , "ccv_cnnp_model.c", 371, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
372 | for (i = 0; i < model->input_size; i++) | ||||
373 | if (model->inputs[i].d >= 0) | ||||
374 | { | ||||
375 | assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0" , "ccv_cnnp_model.c", 375, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
376 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d); | ||||
377 | } | ||||
378 | // Update outputs. | ||||
379 | assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size ) ? 1 : 0), __extension__ ({ if (model->output_size == init ->output_size) ; else __assert_fail ("model->output_size == init->output_size" , "ccv_cnnp_model.c", 379, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
380 | for (i = 0; i < model->output_size; i++) | ||||
381 | { | ||||
382 | if (model->outputs[i].d >= 0) | ||||
383 | { | ||||
384 | assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->outputs[i].d >= 0) ; else __assert_fail ( "init->outputs[i].d >= 0", "ccv_cnnp_model.c", 384, __extension__ __PRETTY_FUNCTION__); })); | ||||
385 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d); | ||||
386 | } | ||||
387 | if (model->outputs[i].d != model->compiled_data->f[i].d) | ||||
388 | { | ||||
389 | assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data ->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[ i].d != init->compiled_data->f[i].d) ; else __assert_fail ("init->outputs[i].d != init->compiled_data->f[i].d" , "ccv_cnnp_model.c", 389, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
390 | if (model->compiled_data->f[i].d >= 0) | ||||
391 | { | ||||
392 | assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->compiled_data->f[i] .d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0" , "ccv_cnnp_model.c", 392, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
393 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d); | ||||
394 | } | ||||
395 | } | ||||
396 | } | ||||
397 | // Go through the graph to set tensor on matching symbols | ||||
398 | for (i = 0; i < stack->rnum; i++) | ||||
399 | { | ||||
400 | const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize * (size_t)(i))); | ||||
401 | // If exceed range, skip. | ||||
402 | if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) || | ||||
403 | d >= ccv_nnc_graph_exec_symbol_count(model->graph)) | ||||
404 | continue; | ||||
405 | const ccv_nnc_graph_exec_symbol_t src_symbol = { | ||||
406 | .d = d, | ||||
407 | .graph = init->graph | ||||
408 | }; | ||||
409 | const ccv_nnc_graph_exec_symbol_t dest_symbol = { | ||||
410 | .d = d, | ||||
411 | .graph = model->graph | ||||
412 | }; | ||||
413 | const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol); | ||||
414 | const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol); | ||||
415 | // If the name doesn't match, skip. | ||||
416 | if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP) | ||||
417 | continue; | ||||
418 | // Now get all the inputs and outputs, if matches, set them. | ||||
419 | const int* src_inputs; | ||||
420 | int src_input_size; | ||||
421 | const int* src_outputs; | ||||
422 | int src_output_size; | ||||
423 | ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size); | ||||
424 | const int* dest_inputs; | ||||
425 | int dest_input_size; | ||||
426 | const int* dest_outputs; | ||||
427 | int dest_output_size; | ||||
428 | ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size); | ||||
429 | // We may have unmatched input / output size because this is the minimizer and it has | ||||
430 | // different saved_aux (for example, when we shrunk with CMD_NOOP). | ||||
431 | if (src_input_size != dest_input_size) | ||||
432 | continue; | ||||
433 | if (src_output_size != dest_output_size) | ||||
434 | continue; | ||||
435 | ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd); | ||||
436 | // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because | ||||
437 | // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original | ||||
438 | // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That | ||||
439 | // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as | ||||
440 | // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec | ||||
441 | // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not | ||||
442 | // a new exec symbol. | ||||
443 | for (j = 0; j < src_input_size; j++) | ||||
444 | if (src_inputs[j] >= 0) | ||||
445 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]); | ||||
446 | for (j = 0; j < src_output_size; j++) | ||||
447 | if (src_outputs[j] >= 0) | ||||
448 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]); | ||||
449 | } | ||||
450 | ccv_array_free(stack); | ||||
451 | // After this, we get all tensors in the model graph resolved through tensor_auto. | ||||
452 | ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0); | ||||
453 | // Verify symbols we get matches. | ||||
454 | const int parameter_size = compiled_data->parameters->rnum; | ||||
455 | for (i = 0; i < parameter_size; i++) | ||||
456 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->parameters)->data)) + (size_t)(compiled_data ->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if ( ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data-> parameters)->data)) + (size_t)(compiled_data->parameters )->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d" , "ccv_cnnp_model.c", 456, __extension__ __PRETTY_FUNCTION__) ; })); } | ||||
457 | const int internal_size = compiled_data->internals->rnum; | ||||
458 | for (i = 0; i < internal_size; i++) | ||||
459 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->internals)->data)) + (size_t)(compiled_data ->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->internals)-> data)) + (size_t)(init->compiled_data->internals)->rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((compiled_data->internals)->data)) + (size_t)(compiled_data->internals)->rsize * (size_t)(i ))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init ->compiled_data->internals)->data)) + (size_t)(init-> compiled_data->internals)->rsize * (size_t)(i))))->d ) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d" , "ccv_cnnp_model.c", 459, __extension__ __PRETTY_FUNCTION__) ; })); } | ||||
460 | // Go through compiled data. | ||||
461 | if (compiled_data->tensor_arena) | ||||
462 | { | ||||
463 | const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph); | ||||
464 | if (flag == 0 && compiled_data->graph_exec_arena) | ||||
465 | { | ||||
466 | ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph); | ||||
467 | // Since we will reinit, if we previously set is_test, we need to set it again. | ||||
468 | if (compiled_data->is_test) | ||||
469 | { | ||||
470 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
471 | ccv_nnc_graph_exec_update_t update = { | ||||
472 | .parallel_count = parallel_count, | ||||
473 | .graph = model->graph, | ||||
474 | .graph_exec_arena = compiled_data->graph_exec_arena, | ||||
475 | }; | ||||
476 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); | ||||
477 | } | ||||
478 | } else | ||||
479 | // Free-up tensor arena & graph exec arena. | ||||
480 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | ||||
481 | } | ||||
482 | // There are other compiled graphs, for accum and apply gradients. | ||||
483 | // However, the main conclusion is, these absorb operations shouldn't impact parameters. | ||||
484 | // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we | ||||
485 | // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot | ||||
486 | // be changed otherwise parameters' shape will be meaningless. The same goes to internals. | ||||
487 | // That is why we don't update these compiled graphs at all this point. | ||||
488 | // Free the model, we've already "absorbed" it. | ||||
489 | ccv_cnnp_model_free(init); | ||||
490 | } | ||||
491 | |||||
492 | void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss) | ||||
493 | { | ||||
494 | assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model-> input_size == 0) ? 1 : 0), __extension__ ({ if (input_size == model->input_size || model->input_size == 0) ; else __assert_fail ("input_size == model->input_size || model->input_size == 0" , "ccv_cnnp_model.c", 494, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
495 | if (model->input_size == 0) | ||||
496 | model->input_size = input_size; | ||||
497 | if (!model->graph) // The graph is not compiled yet. | ||||
498 | { | ||||
499 | model->graph = ccv_nnc_symbolic_graph_new(); | ||||
500 | _ccv_cnnp_model_compile(model, inputs, input_size, loss); | ||||
501 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
502 | int i, flag = 0; | ||||
503 | for (i = 0; !flag && i < input_size; i++) | ||||
504 | flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY); | ||||
505 | // If inputs are from GPU, stream type is GPU. | ||||
506 | model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | ||||
507 | model->compiled_data->minimize.minimizer = minimizer; | ||||
508 | model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer); | ||||
509 | } else { | ||||
510 | // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model. | ||||
511 | // And then absorb the "new model" to the old one. | ||||
512 | ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model); | ||||
513 | ccv_cnnp_model_absorb(model, init, inputs, input_size); | ||||
514 | // Reset minimizer. | ||||
515 | ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0); | ||||
516 | } | ||||
517 | } | ||||
518 | |||||
519 | ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model) | ||||
520 | { | ||||
521 | return _ccv_cnnp_model_copy(model, 0); | ||||
522 | } | ||||
523 | |||||
524 | void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size) | ||||
525 | { | ||||
526 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 526, __extension__ __PRETTY_FUNCTION__); })); | ||||
527 | assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0 ), __extension__ ({ if (output_size == model->output_size) ; else __assert_fail ("output_size == model->output_size" , "ccv_cnnp_model.c", 527, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
528 | ccv_nnc_symbolic_graph_t* const graph = model->graph; | ||||
529 | ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0); | ||||
530 | int i; | ||||
531 | for (i = 0; i < output_size; i++) | ||||
532 | { | ||||
533 | assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL" , "ccv_cnnp_model.c", 533, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
534 | outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]); | ||||
535 | } | ||||
536 | } | ||||
537 | |||||
538 | void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size) | ||||
539 | { | ||||
540 | if (workspace_size == model->workspace_size) | ||||
541 | return; | ||||
542 | model->workspace_size = workspace_size; | ||||
543 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
544 | if (compiled_data && compiled_data->graph) | ||||
545 | ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0); | ||||
546 | } | ||||
547 | |||||
548 | void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel) | ||||
549 | { | ||||
550 | if (parallel == 0) | ||||
551 | model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); | ||||
552 | else | ||||
553 | model->parallel_count = parallel; | ||||
554 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
555 | if (compiled_data) | ||||
556 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 556, __extension__ __PRETTY_FUNCTION__) ; })); } | ||||
557 | } | ||||
558 | |||||
559 | void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression) | ||||
560 | { | ||||
561 | model->memory_compression = memory_compression; | ||||
562 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
563 | if (compiled_data) | ||||
564 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 564, __extension__ __PRETTY_FUNCTION__) ; })); } | ||||
565 | } | ||||
566 | |||||
567 | typedef struct { | ||||
568 | int parallel_count; | ||||
569 | ccv_nnc_symbolic_graph_t* graph; | ||||
570 | ccv_cnnp_compiled_data_t* compiled_data; | ||||
571 | ccv_nnc_tensor_arena_t* tensor_arena; | ||||
572 | } ccv_nnc_tensor_init_states_t; | ||||
573 | |||||
574 | static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data) | ||||
575 | { | ||||
576 | int i; | ||||
577 | for (i = 0; i < compiled_data->parameters->rnum; i++) | ||||
578 | { | ||||
579 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | ||||
580 | if (!(compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f)))) | ||||
581 | return 1; | ||||
582 | } | ||||
583 | for (i = 0; i < compiled_data->internals->rnum; i++) | ||||
584 | { | ||||
585 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; | ||||
586 | if (!(compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f)))) | ||||
587 | return 1; | ||||
588 | } | ||||
589 | return 0; | ||||
590 | } | ||||
591 | |||||
592 | static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol) | ||||
593 | { | ||||
594 | ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context; | ||||
595 | ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena; | ||||
596 | ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol); | ||||
597 | if (!output_tensor) | ||||
598 | return; | ||||
599 | const int d = output_symbol.d; | ||||
600 | assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data-> tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states ->compiled_data->tensors_init.size) ; else __assert_fail ("d < tensor_init_states->compiled_data->tensors_init.size" , "ccv_cnnp_model.c", 600, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
601 | if (tensor_init_states->compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f))) | ||||
602 | return; | ||||
603 | tensor_init_states->compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f)); | ||||
604 | ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0); | ||||
605 | const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph; | ||||
606 | const int parallel_count = tensor_init_states->parallel_count; | ||||
607 | int i; | ||||
608 | for (i = 1; i < parallel_count; i++) | ||||
609 | { | ||||
610 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i)); | ||||
611 | if (copy) | ||||
612 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &output_tensor, 1, ©, 1, 0); | ||||
613 | } | ||||
614 | } | ||||
615 | |||||
616 | // This method can only handle cases we added new tensors and exec, never delete. This invariant is true because | ||||
617 | // we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup. | ||||
618 | static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model) | ||||
619 | { | ||||
620 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 620, __extension__ __PRETTY_FUNCTION__); })); | ||||
621 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 621, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
622 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
623 | assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__ ({ if (compiled_data->rewindables) ; else __assert_fail ( "compiled_data->rewindables", "ccv_cnnp_model.c", 623, __extension__ __PRETTY_FUNCTION__); })); | ||||
624 | int i; | ||||
625 | for (i = 0; i < compiled_data->rewindables->rnum; i++) | ||||
626 | { | ||||
627 | const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) + (size_t)(compiled_data->rewindables)->rsize * (size_t) (i))); | ||||
628 | if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC) | ||||
629 | ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec); | ||||
630 | else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR) | ||||
631 | ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor); | ||||
632 | } | ||||
633 | ccv_array_clear(compiled_data->rewindables); | ||||
634 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | ||||
635 | } | ||||
636 | |||||
637 | |||||
638 | static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name) | ||||
639 | { | ||||
640 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | ||||
641 | .type = CCV_CNNP_REWIND_TENSOR, | ||||
642 | .tensor = symbol | ||||
643 | }; | ||||
644 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | ||||
645 | ccv_array_push(rewind_symbols, &rewind_symbol); | ||||
646 | } | ||||
647 | |||||
648 | static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name) | ||||
649 | { | ||||
650 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | ||||
651 | .type = CCV_CNNP_REWIND_TENSOR, | ||||
652 | .tensor = symbol | ||||
653 | }; | ||||
654 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | ||||
655 | ccv_array_push(rewind_symbols, &rewind_symbol); | ||||
656 | } | ||||
657 | |||||
658 | static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) | ||||
659 | { | ||||
660 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | ||||
661 | .type = CCV_CNNP_REWIND_GRAPH_EXEC, | ||||
662 | .graph_exec = symbol | ||||
663 | }; | ||||
664 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | ||||
665 | ccv_array_push(rewind_symbols, &rewind_symbol); | ||||
666 | } | ||||
667 | |||||
668 | static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph) | ||||
669 | { | ||||
670 | ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol); | ||||
671 | if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0)) | ||||
672 | ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd); | ||||
673 | int i; | ||||
674 | for (i = 1; i < parallel_count; i++) | ||||
675 | { | ||||
676 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); | ||||
677 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol); | ||||
678 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) | ||||
679 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); | ||||
680 | } | ||||
681 | } | ||||
682 | |||||
683 | static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd) | ||||
684 | { | ||||
685 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 685, __extension__ __PRETTY_FUNCTION__); })); | ||||
686 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 686, __extension__ __PRETTY_FUNCTION__); })); | ||||
687 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd); | ||||
688 | int i; | ||||
689 | for (i = 1; i < parallel_count; i++) | ||||
690 | { | ||||
691 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); | ||||
692 | if (copy_symbol.graph) | ||||
693 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd); | ||||
694 | } | ||||
695 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena; | ||||
696 | if (graph_exec_arena) | ||||
697 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); | ||||
698 | // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph) | ||||
699 | ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena; | ||||
700 | if (gradient_graph_exec_arena) | ||||
701 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); | ||||
702 | } | ||||
703 | |||||
704 | static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice) | ||||
705 | { | ||||
706 | int this_parameter_flag = 0; | ||||
707 | const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]); | ||||
708 | int j, k; | ||||
709 | // For no-op, we can preserve previous saved_aux_size. | ||||
710 | if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP) | ||||
711 | { | ||||
712 | // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous | ||||
713 | // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between | ||||
714 | // noop and a minimizer. We don't want that because we do that in high-level frameworks to | ||||
715 | // make sure some model parameters don't update if we don't want them to. | ||||
716 | int old_saved_aux_size; | ||||
717 | if (old_minimizer.cmd == CCV_NNC_NOOP) | ||||
718 | { | ||||
719 | int input_size; | ||||
720 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0); | ||||
721 | if (input_size < 2) // This is not legit. | ||||
722 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); | ||||
723 | else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters. | ||||
724 | old_saved_aux_size = input_size - 2; | ||||
725 | } else | ||||
726 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); | ||||
727 | if (old_saved_aux_size != saved_aux_size) | ||||
728 | { | ||||
729 | this_parameter_flag = 1; | ||||
730 | if (saved_aux_size > old_saved_aux_size) | ||||
731 | { | ||||
732 | // Allocate new tensor symbols. | ||||
733 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]); | ||||
734 | for (j = old_saved_aux_size; j < saved_aux_size; j++) | ||||
735 | { | ||||
736 | saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0); | ||||
737 | saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0); | ||||
738 | for (k = 1; k < parallel_count; k++) | ||||
739 | { | ||||
740 | ccv_nnc_tensor_param_t dev_info = info; | ||||
741 | CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) & 0xfff) << 8)); | ||||
742 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); | ||||
743 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); | ||||
744 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy); | ||||
745 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy); | ||||
746 | } | ||||
747 | } | ||||
748 | } else { | ||||
749 | for (j = saved_aux_size; j < old_saved_aux_size; j++) | ||||
750 | { | ||||
751 | for (k = 1; k < parallel_count; k++) | ||||
752 | { | ||||
753 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); | ||||
754 | if (src_copy.d >= 0) | ||||
755 | { | ||||
756 | ccv_nnc_tensor_symbol_free(graph, src_copy); | ||||
757 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | ||||
758 | } | ||||
759 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); | ||||
760 | if (dest_copy.d >= 0) | ||||
761 | { | ||||
762 | ccv_nnc_tensor_symbol_free(graph, dest_copy); | ||||
763 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | ||||
764 | } | ||||
765 | } | ||||
766 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source); | ||||
767 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination); | ||||
768 | saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
769 | } | ||||
770 | } | ||||
771 | } | ||||
772 | } | ||||
773 | _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer); | ||||
774 | if (this_parameter_flag) | ||||
775 | { | ||||
776 | ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2]; | ||||
777 | ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1]; | ||||
778 | const int* inputs = 0; | ||||
779 | int input_size = 0; | ||||
780 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0); | ||||
781 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 781, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
782 | update_inputs[0].d = inputs[0]; | ||||
783 | update_inputs[0].graph = graph; | ||||
784 | update_inputs[1].d = inputs[1]; | ||||
785 | update_inputs[1].graph = graph; | ||||
786 | update_outputs[0] = updated_parameters[parameter_indice]; | ||||
787 | for (j = 0; j < saved_aux_size; j++) | ||||
788 | { | ||||
789 | update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source; | ||||
790 | update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination; | ||||
791 | } | ||||
792 | ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); | ||||
793 | for (k = 1; k < parallel_count; k++) | ||||
794 | { | ||||
795 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k); | ||||
796 | assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if (copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c" , 796, __extension__ __PRETTY_FUNCTION__); })); | ||||
797 | ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0); | ||||
798 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 798, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
799 | update_inputs[0].d = inputs[0]; | ||||
800 | update_inputs[0].graph = graph; | ||||
801 | update_inputs[1].d = inputs[1]; | ||||
802 | update_inputs[1].graph = graph; | ||||
803 | update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k); | ||||
804 | for (j = 0; j < saved_aux_size; j++) | ||||
805 | { | ||||
806 | update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); | ||||
807 | update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); | ||||
808 | } | ||||
809 | ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); | ||||
810 | } | ||||
811 | } | ||||
812 | return this_parameter_flag; | ||||
813 | } | ||||
814 | |||||
815 | typedef struct { | ||||
816 | int parameter_size; | ||||
817 | ccv_nnc_cmd_t minimizer; | ||||
818 | ccv_cnnp_model_io_t parameters[1]; | ||||
819 | } ccv_cnnp_set_minimizer_for_parameter_t; | ||||
820 | |||||
821 | static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model) | ||||
822 | { | ||||
823 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
824 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 824, __extension__ __PRETTY_FUNCTION__); })); | ||||
825 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | ||||
826 | // We update all parameters, at this point, we have one minimizer. | ||||
827 | const int parameter_size = compiled_data->parameters->rnum; | ||||
828 | ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes; | ||||
829 | ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph; | ||||
830 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 830, __extension__ __PRETTY_FUNCTION__); })); | ||||
831 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
832 | ccv_array_t* const parameters = compiled_data->minimize.parameters; | ||||
833 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | ||||
834 | int i, j, flag = 0; | ||||
835 | for (i = 0; i < parameters->rnum; i++) | ||||
836 | { | ||||
837 | ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); | ||||
838 | for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++) | ||||
839 | { | ||||
840 | const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel; | ||||
841 | assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_sel != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_sel != 0" , "ccv_cnnp_model.c", 841, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
842 | const int old_rnum = parameter_indices->rnum; | ||||
843 | ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices); | ||||
844 | const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref; | ||||
845 | assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_ref != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_ref != 0" , "ccv_cnnp_model.c", 845, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
846 | if (param_ref >= 0) | ||||
847 | { | ||||
848 | assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices-> rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum < parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum" , "ccv_cnnp_model.c", 848, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
849 | *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(old_rnum))) = *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref + old_rnum))); | ||||
850 | parameter_indices->rnum = old_rnum + 1; | ||||
851 | } | ||||
852 | } | ||||
853 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer); | ||||
854 | // We may have duplicated indices, but that is OK, we will set it twice. | ||||
855 | for (j = 0; j < parameter_indices->rnum; j++) | ||||
856 | { | ||||
857 | const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(j))); | ||||
858 | assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__ ({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size" , "ccv_cnnp_model.c", 858, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
859 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d)) | ||||
860 | flag = 1; | ||||
861 | } | ||||
862 | ccv_array_clear(parameter_indices); | ||||
863 | } | ||||
864 | ccv_array_free(parameter_indices); | ||||
865 | return flag; | ||||
866 | } | ||||
867 | |||||
868 | static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size) | ||||
869 | { | ||||
870 | if (new_saved_aux_size == old_saved_aux_size) | ||||
871 | return; | ||||
872 | assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ? 1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size ) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size" , "ccv_cnnp_model.c", 872, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
873 | int i, j; | ||||
874 | for (i = parameter_size - 1; i >= 0; i--) | ||||
875 | { | ||||
876 | for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--) | ||||
877 | saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
878 | for (j = old_saved_aux_size - 1; j >= 0; j--) | ||||
879 | saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j]; | ||||
880 | } | ||||
881 | } | ||||
882 | |||||
883 | static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model) | ||||
884 | { | ||||
885 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
886 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 886, __extension__ __PRETTY_FUNCTION__); })); | ||||
887 | if (!compiled_data->rewindables) | ||||
888 | compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0); | ||||
889 | ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables); | ||||
890 | ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables); | ||||
891 | ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables); | ||||
892 | } | ||||
893 | |||||
894 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size) | ||||
895 | { | ||||
896 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
897 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 897, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
| |||||
898 | assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 898, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
899 | const int evaluate_to_size = compiled_data->evaluate.to_size; | ||||
900 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 900, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
901 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
902 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); | ||||
903 | compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count); | ||||
904 | int i, j; | ||||
905 | const int output_size = model->output_size; | ||||
906 | assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size * parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count" , "ccv_cnnp_model.c", 906, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
907 | if (fits
| ||||
908 | for (i = 0; i < output_size; i++) | ||||
909 | ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info); | ||||
910 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | ||||
911 | const int parameter_size = compiled_data->parameters->rnum; | ||||
912 | compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size); | ||||
913 | compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size); | ||||
914 | compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size); | ||||
915 | int parameter_size_maybe_more = parameter_size; | ||||
916 | compiled_data->disable_outgrad = disable_outgrad; | ||||
917 | int outgrad_size; | ||||
918 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0) | ||||
919 | outgrad_size = 0; | ||||
920 | else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs. | ||||
921 | outgrad_size = model->input_size; | ||||
922 | else { | ||||
923 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 923, __extension__ __PRETTY_FUNCTION__) ; })); // If it is disable all, gradient mode won't be this. | ||||
924 | outgrad_size = 0; | ||||
925 | for (i = 0; i < model->input_size; i++) | ||||
926 | if (!(disable_outgrad & ((uint64_t)1 << i))) | ||||
927 | ++outgrad_size; | ||||
928 | } | ||||
929 | compiled_data->outgrad_size = outgrad_size; | ||||
930 | parameter_size_maybe_more += outgrad_size; | ||||
931 | compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count); | ||||
932 | compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0; | ||||
933 | compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more); | ||||
934 | compiled_data->backward.to_size = parameter_size_maybe_more; | ||||
935 | if (gradient_mode
| ||||
936 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | ||||
937 | else if (disable_outgrad
| ||||
938 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | ||||
939 | else { // Compute minimize with gradients including selected inputs. | ||||
940 | assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__ ({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0" , "ccv_cnnp_model.c", 940, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
941 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 941, __extension__ __PRETTY_FUNCTION__) ; })); // If it is disable all, gradient mode won't be this. | ||||
942 | assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__ ({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0" , "ccv_cnnp_model.c", 942, __extension__ __PRETTY_FUNCTION__) ; })); | ||||
943 | ccv_nnc_tensor_symbol_t outgrads[outgrad_size]; | ||||
944 | j = 0; | ||||
945 | for (i = 0; i < model->input_size; i++) | ||||
946 | if (!(disable_outgrad & ((uint64_t)1 << i))) | ||||
947 | outgrads[j++] = model->inputs[i]; | ||||
948 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | ||||
949 | } | ||||
950 | _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size); | ||||
951 | if (compiled_data->minimize.parameters) | ||||
952 | _ccv_cnnp_apply_parameters_with_minimizer(model); | ||||
953 | for (i = 0; i < output_size; i++) | ||||
954 | { | ||||
955 | const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); | ||||
956 | // Init this to 1 so we can backprop. | ||||
957 | ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES); | ||||
958 | } | ||||
959 | for (i = 0; i < parameter_size_maybe_more; i++) | ||||
960 | compiled_data->backward.tos[i] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]); | ||||
961 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS); | ||||
962 | ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size); | ||||
963 | for (i = 0; i < parameter_size_maybe_more - parameter_size; i++) | ||||
964 | { | ||||
965 | const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]); | ||||
| |||||
966 | const int* tos; | ||||
967 | int to_size; | ||||
968 | ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size); | ||||
969 | if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes. | ||||
970 | { | ||||
971 | const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph); | ||||
972 | int flag = 0; | ||||
973 | for (j = i - 1; !flag && j >= 0; j--) | ||||
974 | flag = (destinations[j + parameter_size].d == outgrad.d); | ||||
975 | if (!flag) // Only if we cannot find it, we add it. | ||||
976 | ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad); | ||||
977 | } | ||||
978 | } | ||||
979 | if (parallel_count > 1) | ||||
980 | { | ||||
981 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, | ||||
982 | 0, 0, | ||||
983 | compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */, | ||||
984 | compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */, | ||||
985 | 0, 0, 0, | ||||
986 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, | ||||
987 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | ||||
988 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | ||||
989 | for (i = 0; i < evaluate_to_size; i++) | ||||
990 | for (j = 1; j < parallel_count; j++) | ||||
991 | { | ||||
992 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); | ||||
993 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | ||||
994 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; | ||||
995 | } | ||||
996 | for (i = 0; i < parameter_size_maybe_more; i++) | ||||
997 | for (j = 1; j < parallel_count; j++) | ||||
998 | { | ||||
999 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j); | ||||
1000 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | ||||
1001 | compiled_data->backward.tos[compiled_data->backward.to_size++] = copy; | ||||
1002 | } | ||||
1003 | } | ||||
1004 | // Only use memory compression if we are in gradient parameter mode. | ||||
1005 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES && model->memory_compression) | ||||
1006 | ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | ||||
1007 | compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size); | ||||
1008 | compiled_data->gradient_mode = gradient_mode; | ||||
1009 | } | ||||
1010 | |||||
1011 | void ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | ||||
1012 | { | ||||
1013 | assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (!compiled_data->tensors.parameters ) ; else __assert_fail ("!compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 1013, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1014 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1015 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1016 | const int internal_size = compiled_data->internals->rnum; | ||||
1017 | compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph); | ||||
1018 | compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t)); | ||||
1019 | compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)ccmallocmalloc((sizeof(ccv_nnc_tensor_t*) * parameter_size + sizeof(ccv_nnc_tensor_t*) * internal_size) * parallel_count); | ||||
1020 | compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count; | ||||
1021 | int i, j; | ||||
1022 | for (i = 0; i < parameter_size; i++) | ||||
1023 | { | ||||
1024 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | ||||
1025 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); | ||||
1026 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | ||||
1027 | compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0); | ||||
1028 | for (j = 1; j < parallel_count; j++) | ||||
1029 | { | ||||
1030 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | ||||
1031 | compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | ||||
1032 | } | ||||
1033 | } | ||||
1034 | for (i = 0; i < internal_size; i++) | ||||
1035 | { | ||||
1036 | const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ); | ||||
1037 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained); | ||||
1038 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | ||||
1039 | compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0); | ||||
1040 | for (j = 1; j < parallel_count; j++) | ||||
1041 | { | ||||
1042 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | ||||
1043 | compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0); | ||||
1044 | } | ||||
1045 | } | ||||
1046 | } | ||||
1047 | |||||
1048 | static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) | ||||
1049 | { | ||||
1050 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1050, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1051 | int i, j; | ||||
1052 | for (i = 0; i < tensor_size; i++) | ||||
1053 | { | ||||
1054 | if (!tensors[i]) | ||||
1055 | continue; | ||||
1056 | const int d = tensor_symbols[i].d; | ||||
1057 | if (!(tensors_init[d >> 5] & (1u << (d & 0x1f)))) | ||||
1058 | continue; | ||||
1059 | for (j = 1; j < parallel_count; j++) | ||||
1060 | if (tensors[i + j * tensor_size]) | ||||
1061 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &tensors[i], 1, &tensors[i + j * tensor_size], 1, 0); | ||||
1062 | } | ||||
1063 | } | ||||
1064 | |||||
1065 | static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count) | ||||
1066 | { | ||||
1067 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1067, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1068 | int i, j; | ||||
1069 | for (i = 0; i < tensor_size; i++) | ||||
1070 | { | ||||
1071 | const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | ||||
1072 | for (j = 1; j < parallel_count; j++) | ||||
1073 | { | ||||
1074 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | ||||
1075 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; | ||||
1076 | if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1077 | { // We shouldn't allocate this, free it up. | ||||
1078 | ccv_nnc_tensor_free(tensors[i + j * tensor_size]); | ||||
1079 | tensors[i + j * tensor_size] = 0; | ||||
1080 | } | ||||
1081 | } | ||||
1082 | } | ||||
1083 | } | ||||
1084 | |||||
1085 | static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds) | ||||
1086 | { | ||||
1087 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1087, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1088 | int i, j; | ||||
1089 | for (i = 0; i < tensor_size; i++) | ||||
1090 | { | ||||
1091 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | ||||
1092 | if (graph) | ||||
1093 | { | ||||
1094 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); | ||||
1095 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1096 | tensor_symbol = alias_to; | ||||
1097 | } | ||||
1098 | ccv_nnc_tensor_t* const tensor = tensors[i]; | ||||
1099 | if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1100 | { | ||||
1101 | const ccv_nnc_tensor_bind_t retained_bind = { | ||||
1102 | .symbol = tensor_symbol, | ||||
1103 | .tensor = tensor | ||||
1104 | }; | ||||
1105 | ccv_array_push(tensor_binds, &retained_bind); | ||||
1106 | } | ||||
1107 | for (j = 1; j < parallel_count; j++) | ||||
1108 | { | ||||
1109 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | ||||
1110 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; | ||||
1111 | if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1112 | { | ||||
1113 | const ccv_nnc_tensor_bind_t bind = { | ||||
1114 | .symbol = copy, | ||||
1115 | .tensor = tensors[i + j * tensor_size] | ||||
1116 | }; | ||||
1117 | ccv_array_push(tensor_binds, &bind); | ||||
1118 | } | ||||
1119 | } | ||||
1120 | } | ||||
1121 | } | ||||
1122 | |||||
1123 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data) | ||||
1124 | { | ||||
1125 | if (compiled_data->graph) | ||||
1126 | ccv_nnc_graph_free(compiled_data->graph); | ||||
1127 | compiled_data->graph = 0; | ||||
1128 | compiled_data->is_test = 0; | ||||
1129 | if (compiled_data->tensor_arena) | ||||
1130 | ccv_nnc_tensor_arena_free(compiled_data->tensor_arena); | ||||
1131 | compiled_data->tensor_arena = 0; | ||||
1132 | if (compiled_data->graph_exec_arena) | ||||
1133 | ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena); | ||||
1134 | compiled_data->graph_exec_arena = 0; | ||||
1135 | if (compiled_data->backward.from_ops) | ||||
1136 | ccfreefree(compiled_data->backward.from_ops); | ||||
1137 | compiled_data->backward.from_ops = 0; | ||||
1138 | if (compiled_data->evaluate.schedule) | ||||
1139 | ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule); | ||||
1140 | compiled_data->evaluate.schedule = 0; | ||||
1141 | if (compiled_data->backward.schedule) | ||||
1142 | ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule); | ||||
1143 | compiled_data->backward.schedule = 0; | ||||
1144 | } | ||||
1145 | |||||
1146 | static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data) | ||||
1147 | { | ||||
1148 | if (compiled_data->gradients) | ||||
1149 | ccfreefree(compiled_data->gradients); | ||||
1150 | compiled_data->gradients = 0; | ||||
1151 | if (compiled_data->updated_parameters) | ||||
1152 | ccfreefree(compiled_data->updated_parameters); | ||||
1153 | compiled_data->updated_parameters = 0; | ||||
1154 | compiled_data->update_nodes = 0; | ||||
1155 | compiled_data->saved_aux = 0; | ||||
1156 | } | ||||
1157 | |||||
1158 | static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data) | ||||
1159 | { | ||||
1160 | if (compiled_data->backward.gradients) | ||||
1161 | ccfreefree(compiled_data->backward.gradients); | ||||
1162 | compiled_data->backward.gradients = 0; | ||||
1163 | if (compiled_data->backward.accum) | ||||
1164 | ccv_nnc_graph_free(compiled_data->backward.accum); | ||||
1165 | compiled_data->backward.accum = 0; | ||||
1166 | if (compiled_data->backward.tensor_arena) | ||||
1167 | ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena); | ||||
1168 | compiled_data->backward.tensor_arena = 0; | ||||
1169 | if (compiled_data->backward.graph_exec_arena) | ||||
1170 | ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena); | ||||
1171 | compiled_data->backward.graph_exec_arena = 0; | ||||
1172 | } | ||||
1173 | |||||
1174 | static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data) | ||||
1175 | { | ||||
1176 | if (compiled_data->apply_gradients.graph) | ||||
1177 | ccv_nnc_graph_free(compiled_data->apply_gradients.graph); | ||||
1178 | compiled_data->apply_gradients.graph = 0; | ||||
1179 | if (compiled_data->apply_gradients.tensor_arena) | ||||
1180 | ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena); | ||||
1181 | compiled_data->apply_gradients.tensor_arena = 0; | ||||
1182 | if (compiled_data->apply_gradients.graph_exec_arena) | ||||
1183 | ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena); | ||||
1184 | compiled_data->apply_gradients.graph_exec_arena = 0; | ||||
1185 | } | ||||
1186 | |||||
1187 | // Compile the graph to run ccv_cnnp_model_fit | ||||
1188 | static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | ||||
1189 | { | ||||
1190 | int i, j; | ||||
1191 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1192 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE" , "ccv_cnnp_model.c", 1192, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1193 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE; | ||||
1194 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1195 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1195, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1196 | assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__ ({ if (!fits || output_size == fit_size) ; else __assert_fail ("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1196 , __extension__ __PRETTY_FUNCTION__); })); | ||||
1197 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1197, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1198 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | ||||
1199 | { | ||||
1200 | _ccv_cnnp_model_set_rewindables(model); | ||||
1201 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); | ||||
1202 | } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) { | ||||
1203 | _ccv_cnnp_model_rewind_graph(model); | ||||
1204 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | ||||
1205 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; | ||||
1206 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); | ||||
1207 | } | ||||
1208 | const int tensors_init = !!compiled_data->tensors_init.v; | ||||
1209 | if (!tensors_init) | ||||
1210 | ccv_cnnp_model_tensors_init(model, compiled_data); | ||||
1211 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | ||||
1212 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1212, __extension__ __PRETTY_FUNCTION__); })); | ||||
1213 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1213, __extension__ __PRETTY_FUNCTION__); })); | ||||
1214 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1214 , __extension__ __PRETTY_FUNCTION__); })); | ||||
1215 | const int input_size_per_p = input_size / parallel_count; | ||||
1216 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | ||||
1217 | const int output_size_per_p = output_size / parallel_count; | ||||
1218 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | ||||
1219 | const int fit_size_per_p = fit_size / parallel_count; | ||||
1220 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds); | ||||
1221 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1222 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | ||||
1223 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | ||||
1224 | const int internal_size = compiled_data->internals->rnum; | ||||
1225 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | ||||
1226 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | ||||
1227 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | ||||
1228 | ccv_array_free(tensor_binds); | ||||
1229 | if (tensors_init && parallel_count > 1) | ||||
1230 | _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | ||||
1231 | // If tensor is not init'ed, we need to init states first. | ||||
1232 | if (_ccv_cnnp_any_to_init(compiled_data)) | ||||
1233 | { | ||||
1234 | ccv_nnc_tensor_init_states_t tensor_init_states = { | ||||
1235 | .parallel_count = parallel_count, | ||||
1236 | .graph = model->graph, | ||||
1237 | .compiled_data = compiled_data, | ||||
1238 | .tensor_arena = compiled_data->tensor_arena | ||||
1239 | }; | ||||
1240 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | ||||
1241 | } | ||||
1242 | compiled_data->is_test = 0; | ||||
1243 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer); | ||||
1244 | // No need to set because it is default to training mode. | ||||
1245 | // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); | ||||
1246 | for (i = 0; i < saved_aux_size * parameter_size; i++) | ||||
1247 | { | ||||
1248 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source); | ||||
1249 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); | ||||
1250 | for (j = 1; j < parallel_count; j++) | ||||
1251 | { | ||||
1252 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); | ||||
1253 | if (copy) | ||||
1254 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); | ||||
1255 | } | ||||
1256 | } | ||||
1257 | const int evaluate_to_size = compiled_data->evaluate.to_size; | ||||
1258 | compiled_data->evaluate.to_op_size = 0; | ||||
1259 | for (i = 0; i < evaluate_to_size; i++) | ||||
1260 | { | ||||
1261 | ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); | ||||
1262 | if (to.graph) | ||||
1263 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to; | ||||
1264 | } | ||||
1265 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type); | ||||
1266 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | ||||
1267 | } | ||||
1268 | |||||
1269 | ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model) | ||||
1270 | { | ||||
1271 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1272 | if (!compiled_data || !compiled_data->graph) | ||||
1273 | return 0; | ||||
1274 | return ccv_nnc_graph_default_stream(compiled_data->graph); | ||||
1275 | } | ||||
1276 | |||||
1277 | uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model) | ||||
1278 | { | ||||
1279 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1280 | if (!compiled_data || !compiled_data->tensor_arena) | ||||
1281 | return 0; | ||||
1282 | return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena); | ||||
1283 | } | ||||
1284 | |||||
1285 | static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) | ||||
1286 | { | ||||
1287 | int i, j; | ||||
1288 | for (i = 0; i < tensor_size; i++) | ||||
1289 | { | ||||
1290 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | ||||
1291 | if (graph) | ||||
1292 | { | ||||
1293 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); | ||||
1294 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1295 | tensor_symbol = alias_to; | ||||
1296 | } | ||||
1297 | ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]); | ||||
1298 | for (j = 1; j < parallel_count; j++) | ||||
1299 | { | ||||
1300 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | ||||
1301 | if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1302 | ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]); | ||||
1303 | } | ||||
1304 | } | ||||
1305 | } | ||||
1306 | |||||
1307 | void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
1308 | { | ||||
1309 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1310 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1310, __extension__ __PRETTY_FUNCTION__); })); | ||||
1311 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1312 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1312, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1313 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1313, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1314 | assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size) ; else __assert_fail ("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1314 , __extension__ __PRETTY_FUNCTION__); })); | ||||
1315 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1315, __extension__ __PRETTY_FUNCTION__); })); | ||||
1316 | if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) | ||||
1317 | { | ||||
1318 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | ||||
1319 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | ||||
1320 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | ||||
1321 | // Compile the symbolic graph down only when needed. | ||||
1322 | _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size); | ||||
1323 | } else { | ||||
1324 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1324, __extension__ __PRETTY_FUNCTION__); })); | ||||
1325 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1325, __extension__ __PRETTY_FUNCTION__); })); | ||||
1326 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1326 , __extension__ __PRETTY_FUNCTION__); })); | ||||
1327 | const int input_size_per_p = input_size / parallel_count; | ||||
1328 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); | ||||
1329 | const int output_size_per_p = output_size / parallel_count; | ||||
1330 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); | ||||
1331 | const int fit_size_per_p = fit_size / parallel_count; | ||||
1332 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count); | ||||
1333 | } | ||||
1334 | if (compiled_data->is_test) | ||||
1335 | { | ||||
1336 | compiled_data->is_test = 0; | ||||
1337 | ccv_nnc_graph_exec_update_t update = { | ||||
1338 | .parallel_count = parallel_count, | ||||
1339 | .graph = model->graph, | ||||
1340 | .graph_exec_arena = compiled_data->graph_exec_arena, | ||||
1341 | }; | ||||
1342 | ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); | ||||
1343 | } | ||||
1344 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); | ||||
1345 | } | ||||
1346 | |||||
1347 | // Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD). | ||||
1348 | static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | ||||
1349 | { | ||||
1350 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1351 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD; | ||||
1352 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1353 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1353, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1354 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1354, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1355 | // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather, | ||||
1356 | // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel. | ||||
1357 | if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | ||||
1358 | { | ||||
1359 | const int evaluate_to_size = compiled_data->evaluate.to_size; | ||||
1360 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); | ||||
1361 | _ccv_cnnp_model_set_rewindables(model); | ||||
1362 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, | ||||
1363 | 0, 0, | ||||
1364 | 0, 0, 0, | ||||
1365 | 0, 0, 0, | ||||
1366 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, | ||||
1367 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | ||||
1368 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | ||||
1369 | int i, j; | ||||
1370 | for (i = 0; i < evaluate_to_size; i++) | ||||
1371 | for (j = 1; j < parallel_count; j++) | ||||
1372 | { | ||||
1373 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); | ||||
1374 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | ||||
1375 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; | ||||
1376 | } | ||||
1377 | } | ||||
1378 | const int tensors_init = !!compiled_data->tensors_init.v; | ||||
1379 | if (!tensors_init) | ||||
1380 | ccv_cnnp_model_tensors_init(model, compiled_data); | ||||
1381 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | ||||
1382 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1382, __extension__ __PRETTY_FUNCTION__); })); | ||||
1383 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1383, __extension__ __PRETTY_FUNCTION__); })); | ||||
1384 | const int input_size_per_p = input_size / parallel_count; | ||||
1385 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | ||||
1386 | const int output_size_per_p = output_size / parallel_count; | ||||
1387 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | ||||
1388 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1389 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | ||||
1390 | const int internal_size = compiled_data->internals->rnum; | ||||
1391 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | ||||
1392 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | ||||
1393 | // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation. | ||||
1394 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | ||||
1395 | ccv_array_free(tensor_binds); | ||||
1396 | // If tensor is not init'ed, we need to init states first. | ||||
1397 | if (tensors_init && parallel_count > 1) | ||||
1398 | _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | ||||
1399 | if (_ccv_cnnp_any_to_init(compiled_data)) | ||||
1400 | { | ||||
1401 | ccv_nnc_tensor_init_states_t tensor_init_states = { | ||||
1402 | .parallel_count = parallel_count, | ||||
1403 | .graph = model->graph, | ||||
1404 | .compiled_data = compiled_data, | ||||
1405 | .tensor_arena = compiled_data->tensor_arena | ||||
1406 | }; | ||||
1407 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | ||||
1408 | } | ||||
1409 | compiled_data->is_test = 1; | ||||
1410 | ccv_nnc_graph_exec_update_t update = { | ||||
1411 | .parallel_count = parallel_count, | ||||
1412 | .graph = model->graph, | ||||
1413 | .graph_exec_arena = compiled_data->graph_exec_arena, | ||||
1414 | }; | ||||
1415 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); | ||||
1416 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type); | ||||
1417 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | ||||
1418 | } | ||||
1419 | |||||
1420 | static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | ||||
1421 | { | ||||
1422 | assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0 ), __extension__ ({ if (!compiled_data->tensors.gradients) ; else __assert_fail ("!compiled_data->tensors.gradients" , "ccv_cnnp_model.c", 1422, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1423 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1424 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1425 | compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count); | ||||
1426 | compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count; | ||||
1427 | int i, j; | ||||
1428 | for (i = 0; i < parameter_size; i++) | ||||
1429 | { | ||||
1430 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | ||||
1431 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); | ||||
1432 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | ||||
1433 | compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0); | ||||
1434 | compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it. | ||||
1435 | for (j = 1; j < parallel_count; j++) | ||||
1436 | { | ||||
1437 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | ||||
1438 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | ||||
1439 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0; | ||||
1440 | } | ||||
1441 | } | ||||
1442 | } | ||||
1443 | |||||
1444 | static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size) | ||||
1445 | { | ||||
1446 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL) | ||||
1447 | return 1; | ||||
1448 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) | ||||
1449 | return 0; | ||||
1450 | int i; | ||||
1451 | for (i = 0; i < input_size; i++) | ||||
1452 | if (!(disable_outgrad & ((uint64_t)1 << i))) | ||||
1453 | return 0; | ||||
1454 | return 1; | ||||
1455 | } | ||||
1456 | |||||
1457 | // Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). | ||||
1458 | // Particularly, this method compiles the evaluation and backprop graph (the main graph). | ||||
1459 | static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | ||||
1460 | { | ||||
1461 | int i, j; | ||||
1462 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1463 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; | ||||
1464 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data ->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data-> gradient_mode != target_gradient_mode) ; else __assert_fail ( "!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode" , "ccv_cnnp_model.c", 1464, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1465 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE; | ||||
1466 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1467 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1467, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1468 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1468, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1469 | // There shouldn't be a loss function if we evaluate with multistage jit. | ||||
1470 | assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP ) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP" , "ccv_cnnp_model.c", 1470, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1471 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | ||||
1472 | { | ||||
1473 | _ccv_cnnp_model_set_rewindables(model); | ||||
1474 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. | ||||
1475 | } else if (compiled_data->gradient_mode != target_gradient_mode) { | ||||
1476 | _ccv_cnnp_model_rewind_graph(model); | ||||
1477 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | ||||
1478 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; | ||||
1479 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. | ||||
1480 | } | ||||
1481 | const int tensors_init = !!compiled_data->tensors_init.v; | ||||
1482 | if (!tensors_init) | ||||
1483 | ccv_cnnp_model_tensors_init(model, compiled_data); | ||||
1484 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | ||||
1485 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1485, __extension__ __PRETTY_FUNCTION__); })); | ||||
1486 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1486, __extension__ __PRETTY_FUNCTION__); })); | ||||
1487 | const int input_size_per_p = input_size / parallel_count; | ||||
1488 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | ||||
1489 | const int output_size_per_p = output_size / parallel_count; | ||||
1490 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | ||||
1491 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1492 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | ||||
1493 | const int internal_size = compiled_data->internals->rnum; | ||||
1494 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | ||||
1495 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | ||||
1496 | if (!compiled_data->tensors.gradients) | ||||
1497 | _ccv_cnnp_model_gradient_tensors_init(model, compiled_data); | ||||
1498 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); | ||||
1499 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | ||||
1500 | ccv_array_free(tensor_binds); | ||||
1501 | if (tensors_init && parallel_count > 1) | ||||
1502 | _ccv_cnnp_model_copy_tensors(compiled_data->tensors_init.v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | ||||
1503 | // If tensor is not init'ed, we need to init states first. | ||||
1504 | if (_ccv_cnnp_any_to_init(compiled_data)) | ||||
1505 | { | ||||
1506 | ccv_nnc_tensor_init_states_t tensor_init_states = { | ||||
1507 | .parallel_count = parallel_count, | ||||
1508 | .graph = model->graph, | ||||
1509 | .compiled_data = compiled_data, | ||||
1510 | .tensor_arena = compiled_data->tensor_arena | ||||
1511 | }; | ||||
1512 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | ||||
1513 | } | ||||
1514 | compiled_data->is_test = is_test; | ||||
1515 | ccv_nnc_graph_exec_update_t update = { | ||||
1516 | .parallel_count = parallel_count, | ||||
1517 | .graph = model->graph, | ||||
1518 | .graph_exec_arena = compiled_data->graph_exec_arena, | ||||
1519 | }; | ||||
1520 | ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update); | ||||
1521 | const int evaluate_to_size = compiled_data->evaluate.to_size; | ||||
1522 | compiled_data->evaluate.to_op_size = 0; | ||||
1523 | ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0); | ||||
1524 | for (i = 0; i < evaluate_to_size; i++) | ||||
1525 | { | ||||
1526 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); | ||||
1527 | if (to_op.graph) | ||||
1528 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op; | ||||
1529 | const int* tos; | ||||
1530 | int to_size; | ||||
1531 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size); | ||||
1532 | for (j = 0; j < to_size; j++) | ||||
1533 | { | ||||
1534 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ | ||||
1535 | .d = tos[j], | ||||
1536 | .graph = model->graph | ||||
1537 | }); | ||||
1538 | if (to_op.graph) | ||||
1539 | ccv_array_add_unique_int(backward_from, to_op.d); | ||||
1540 | } | ||||
1541 | } | ||||
1542 | assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__ ({ if (backward_from->rnum > 0) ; else __assert_fail ( "backward_from->rnum > 0", "ccv_cnnp_model.c", 1542, __extension__ __PRETTY_FUNCTION__); })); | ||||
1543 | compiled_data->backward.from_op_size = backward_from->rnum; | ||||
1544 | compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum); | ||||
1545 | for (i = 0; i < backward_from->rnum; i++) | ||||
1546 | compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){ | ||||
1547 | .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from )->rsize * (size_t)(i))), | ||||
1548 | .graph = compiled_data->graph, | ||||
1549 | }; | ||||
1550 | ccv_array_free(backward_from); | ||||
1551 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type); | ||||
1552 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | ||||
1553 | } | ||||
1554 | |||||
1555 | void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
1556 | { | ||||
1557 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1558 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1558, __extension__ __PRETTY_FUNCTION__); })); | ||||
1559 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1560 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1560, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1561 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1561, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1562 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1562, __extension__ __PRETTY_FUNCTION__); })); | ||||
1563 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; | ||||
1564 | const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad)); | ||||
1565 | if (!compiled_data->graph || mode_mismatch) | ||||
1566 | { | ||||
1567 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | ||||
1568 | if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad. | ||||
1569 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | ||||
1570 | if (params.requires_grad) | ||||
1571 | _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size); | ||||
1572 | else | ||||
1573 | _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size); | ||||
1574 | } else { | ||||
1575 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena); | ||||
1576 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1576, __extension__ __PRETTY_FUNCTION__); })); | ||||
1577 | const int input_size_per_p = input_size / parallel_count; | ||||
1578 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); | ||||
1579 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1579, __extension__ __PRETTY_FUNCTION__); })); | ||||
1580 | const int output_size_per_p = output_size / parallel_count; | ||||
1581 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); | ||||
1582 | } | ||||
1583 | if (compiled_data->is_test != params.is_test) | ||||
1584 | { | ||||
1585 | compiled_data->is_test = params.is_test; | ||||
1586 | ccv_nnc_graph_exec_update_t update = { | ||||
1587 | .parallel_count = parallel_count, | ||||
1588 | .graph = model->graph, | ||||
1589 | .graph_exec_arena = compiled_data->graph_exec_arena, | ||||
1590 | }; | ||||
1591 | ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update); | ||||
1592 | } | ||||
1593 | if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD) | ||||
1594 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); | ||||
1595 | else { | ||||
1596 | if (!compiled_data->evaluate.schedule) | ||||
1597 | compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size); | ||||
1598 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context); | ||||
1599 | } | ||||
1600 | } | ||||
1601 | |||||
1602 | // Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). | ||||
1603 | // Particularly, this method compiles the accumulator graph. | ||||
1604 | static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model) | ||||
1605 | { | ||||
1606 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1607 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1607, __extension__ __PRETTY_FUNCTION__); })); | ||||
1608 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1608, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1609 | ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new(); | ||||
1610 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1611 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1612 | int i, j; | ||||
1613 | compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3); | ||||
1614 | compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count; | ||||
1615 | compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count; | ||||
1616 | for (i = 0; i < parameter_size; i++) | ||||
1617 | for (j = 0; j < parallel_count; j++) | ||||
1618 | { | ||||
1619 | const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info; | ||||
1620 | // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them. | ||||
1621 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size]; | ||||
1622 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | ||||
1623 | ccv_nnc_tensor_symbol_t inputs[2]; | ||||
1624 | inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | ||||
1625 | inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | ||||
1626 | ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | ||||
1627 | ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0); | ||||
1628 | } | ||||
1629 | ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | ||||
1630 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | ||||
1631 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); | ||||
1632 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds); | ||||
1633 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); | ||||
1634 | ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size (accum), SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size (accum), &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena); | ||||
1635 | ccv_nnc_symbolic_graph_free(accum); | ||||
1636 | ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type); | ||||
1637 | ccv_array_free(tensor_binds); | ||||
1638 | } | ||||
1639 | |||||
1640 | void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
1641 | { | ||||
1642 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1643 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1643, __extension__ __PRETTY_FUNCTION__); })); | ||||
1644 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1644, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1645 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1646 | assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model-> output_size * parallel_count) ? 1 : 0), __extension__ ({ if ( ingrad_size == 0 || ingrad_size == model->output_size * parallel_count ) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1646, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1647 | if (outgrad_size > 0) | ||||
1648 | { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size * parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size == compiled_data->outgrad_size * parallel_count) ; else __assert_fail ("outgrad_size == compiled_data->outgrad_size * parallel_count" , "ccv_cnnp_model.c", 1648, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1649 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1649, __extension__ __PRETTY_FUNCTION__); })); | ||||
1650 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 1650, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1651 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1652 | // If we need to accumulate the gradients now, do jit on accumulator. | ||||
1653 | if (compiled_data->backward.count > 0) | ||||
1654 | { | ||||
1655 | if (!compiled_data->backward.accum) | ||||
1656 | _ccv_cnnp_model_multistage_jit_1(model); | ||||
1657 | else if (compiled_data->backward.count == 1) { | ||||
1658 | // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly). | ||||
1659 | int i; | ||||
1660 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena); | ||||
1661 | for (i = 0; i < parameter_size * parallel_count; i++) | ||||
1662 | { | ||||
1663 | ccv_nnc_tensor_t* tensor; | ||||
1664 | CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), ( compiled_data->tensors.accum_gradients[i]) = (compiled_data ->tensors.gradients[i]), (compiled_data->tensors.gradients [i]) = (tensor)); | ||||
1665 | } | ||||
1666 | // Do rebind in case we messed up the binding (we switch accum_gradients and gradients). | ||||
1667 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1); | ||||
1668 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); | ||||
1669 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); | ||||
1670 | } | ||||
1671 | } | ||||
1672 | const int ingrad_size_per_p = model->output_size; | ||||
1673 | const int outgrad_size_per_p = compiled_data->outgrad_size; | ||||
1674 | int i, j; | ||||
1675 | for (i = 0; i < ingrad_size_per_p; i++) | ||||
1676 | { | ||||
1677 | const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); | ||||
1678 | if (!ingrad_size || !ingrads || ingrads[i] == 0) | ||||
1679 | { | ||||
1680 | // Set it to 1 if it is not specified. | ||||
1681 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad); | ||||
1682 | if (ingrad_tensor) | ||||
1683 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); | ||||
1684 | for (j = 1; j < parallel_count; j++) | ||||
1685 | { | ||||
1686 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j)); | ||||
1687 | if (ingrad_tensor) | ||||
1688 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); | ||||
1689 | } | ||||
1690 | } else { | ||||
1691 | // Make sure the length matches, in case it is an alias. | ||||
1692 | assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model-> graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count (ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params (model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))" , "ccv_cnnp_model.c", 1692, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1693 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]); | ||||
1694 | for (j = 1; j < parallel_count; j++) | ||||
1695 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]); | ||||
1696 | } | ||||
1697 | } | ||||
1698 | if (outgrad_size > 0) | ||||
1699 | { | ||||
1700 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\"" , "ccv_cnnp_model.c", 1700, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1701 | for (i = 0; i < outgrad_size_per_p; i++) | ||||
1702 | if (outgrads[i]) | ||||
1703 | { | ||||
1704 | const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i]; | ||||
1705 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]); | ||||
1706 | for (j = 1; j < parallel_count; j++) | ||||
1707 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]); | ||||
1708 | } | ||||
1709 | } else { | ||||
1710 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 1711, __extension__ __PRETTY_FUNCTION__ ); })) | ||||
1711 | compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 1711, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1712 | } | ||||
1713 | // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients. | ||||
1714 | // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these | ||||
1715 | // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching. | ||||
1716 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); | ||||
1717 | if (!compiled_data->backward.schedule) | ||||
1718 | compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0); | ||||
1719 | // Run the backward pass. | ||||
1720 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context); | ||||
1721 | // If we need to run accumulation round, do that now. | ||||
1722 | if (compiled_data->backward.count > 0) | ||||
1723 | ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context); | ||||
1724 | // Update the count, this determines whether we need to accumulate or not. | ||||
1725 | ++compiled_data->backward.count; | ||||
1726 | } | ||||
1727 | |||||
1728 | // Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE). | ||||
1729 | // Particularly, this method compiles the parameter update graph. | ||||
1730 | static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model) | ||||
1731 | { | ||||
1732 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1733 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1733, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1734 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1735 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1736 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | ||||
1737 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | ||||
1738 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | ||||
1739 | // Bind accumulated gradients. | ||||
1740 | if (compiled_data->backward.count > 1) | ||||
1741 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds); | ||||
1742 | else | ||||
1743 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); | ||||
1744 | ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0); | ||||
1745 | int i, j; | ||||
1746 | for (i = 0; i < compiled_data->backward.to_size; i++) | ||||
1747 | { | ||||
1748 | const int* tos; | ||||
1749 | int to_size; | ||||
1750 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size); | ||||
1751 | for (j = 0; j < to_size; j++) | ||||
1752 | { | ||||
1753 | // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply | ||||
1754 | // gradients graph. | ||||
1755 | const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ | ||||
1756 | .d = tos[j], | ||||
1757 | .graph = model->graph, | ||||
1758 | }); | ||||
1759 | if (!exec.graph) | ||||
1760 | ccv_array_add_unique_int(apply_gradients_from, tos[j]); | ||||
1761 | } | ||||
1762 | } | ||||
1763 | const int from_size = apply_gradients_from->rnum; | ||||
1764 | ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size); | ||||
1765 | for (i = 0; i < from_size; i++) | ||||
1766 | froms[i] = (ccv_nnc_graph_exec_symbol_t){ | ||||
1767 | .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t )(apply_gradients_from)->rsize * (size_t)(i))), | ||||
1768 | .graph = model->graph | ||||
1769 | }; | ||||
1770 | ccv_array_free(apply_gradients_from); | ||||
1771 | // It can only ends with updates on the parameters. | ||||
1772 | ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0); | ||||
1773 | for (i = 0; i < parameter_size; i++) | ||||
1774 | { | ||||
1775 | ccv_array_push(tos, &compiled_data->update_nodes[i]); | ||||
1776 | for (j = 1; j < parallel_count; j++) | ||||
1777 | { | ||||
1778 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j); | ||||
1779 | ccv_array_push(tos, ©); | ||||
1780 | } | ||||
1781 | } | ||||
1782 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize * (size_t)(0))), tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena); | ||||
1783 | ccv_array_free(tos); | ||||
1784 | ccv_array_free(tensor_binds); | ||||
1785 | ccfreefree(froms); | ||||
1786 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | ||||
1787 | for (i = 0; i < max_saved_aux_size * parameter_size; i++) | ||||
1788 | { | ||||
1789 | // Skip on no tensor. | ||||
1790 | if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1791 | continue; | ||||
1792 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source); | ||||
1793 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); | ||||
1794 | for (j = 1; j < parallel_count; j++) | ||||
1795 | { | ||||
1796 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); | ||||
1797 | if (copy) | ||||
1798 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); | ||||
1799 | } | ||||
1800 | } | ||||
1801 | ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type); | ||||
1802 | } | ||||
1803 | |||||
1804 | void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context) | ||||
1805 | { | ||||
1806 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1807 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1807, __extension__ __PRETTY_FUNCTION__); })); | ||||
1808 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1808, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1809 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1810 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1810, __extension__ __PRETTY_FUNCTION__); })); | ||||
1811 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 1811, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1812 | // Skip if there is no backward pass. | ||||
1813 | if (compiled_data->backward.count <= 0) | ||||
1814 | return; | ||||
1815 | // Skip if there is no parameters. | ||||
1816 | if (compiled_data->parameters->rnum == 0) | ||||
1817 | { | ||||
1818 | compiled_data->backward.count = 0; | ||||
1819 | return; | ||||
1820 | } | ||||
1821 | if (!compiled_data->apply_gradients.graph) | ||||
1822 | _ccv_cnnp_model_multistage_jit_2(model); | ||||
1823 | else { | ||||
1824 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1825 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena); | ||||
1826 | // Change to bind accum_gradients if we do gradient accumulation (run backward more than once). | ||||
1827 | if (compiled_data->backward.count > 1) | ||||
1828 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count); | ||||
1829 | else | ||||
1830 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); | ||||
1831 | } | ||||
1832 | ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context); | ||||
1833 | // Reset backward count to 0. | ||||
1834 | compiled_data->backward.count = 0; | ||||
1835 | } | ||||
1836 | |||||
1837 | void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor) | ||||
1838 | { | ||||
1839 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1840 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | ||||
1841 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1842 | const int tensors_init = !!compiled_data->tensors_init.v; | ||||
1843 | if (!tensors_init) | ||||
1844 | ccv_cnnp_model_tensors_init(model, compiled_data); | ||||
1845 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | ||||
1846 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | ||||
1847 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | ||||
1848 | if (param_ref < 0) | ||||
1849 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1849 , __extension__ __PRETTY_FUNCTION__); })); } | ||||
1850 | else | ||||
1851 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 1851, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1852 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | ||||
1853 | ccv_array_free(parameter_indices); | ||||
1854 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1855 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1855 , __extension__ __PRETTY_FUNCTION__); })); | ||||
1856 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 1856, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1857 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1858 | ccv_nnc_tensor_t* const dest = compiled_data->tensors.parameters[d]; | ||||
1859 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 1859, __extension__ __PRETTY_FUNCTION__); })); | ||||
1860 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 ), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | ||||
1861 | int i; | ||||
1862 | for (i = 1; i < parallel_count; i++) | ||||
1863 | { | ||||
1864 | ccv_nnc_tensor_t* const copy_tensor = compiled_data->tensors.parameters[d + i * parameter_size]; | ||||
1865 | if (copy_tensor) | ||||
1866 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | ||||
1867 | } | ||||
1868 | // Mark this symbol as init'ed. | ||||
1869 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( d))))->d; | ||||
1870 | compiled_data->tensors_init.v[s >> 5] |= (1u << (s & 0x1f)); | ||||
1871 | } | ||||
1872 | |||||
1873 | void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor) | ||||
1874 | { | ||||
1875 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1876 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | ||||
1877 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 1877, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1878 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 1878, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1879 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | ||||
1880 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | ||||
1881 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | ||||
1882 | if (param_ref < 0) | ||||
1883 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1883 , __extension__ __PRETTY_FUNCTION__); })); } | ||||
1884 | else | ||||
1885 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 1885, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1886 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | ||||
1887 | ccv_array_free(parameter_indices); | ||||
1888 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1889 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1889 , __extension__ __PRETTY_FUNCTION__); })); | ||||
1890 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 1890, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1891 | // We don't need to consider parallel_count, every parameter on each device is identical. | ||||
1892 | ccv_nnc_tensor_t* const src = compiled_data->tensors.parameters[d]; | ||||
1893 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 1893, __extension__ __PRETTY_FUNCTION__); })); | ||||
1894 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | ||||
1895 | } | ||||
1896 | |||||
1897 | ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter) | ||||
1898 | { | ||||
1899 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
1900 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | ||||
1901 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 1901, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1902 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 1902, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1903 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | ||||
1904 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | ||||
1905 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | ||||
1906 | if (param_ref < 0) | ||||
1907 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 1907 , __extension__ __PRETTY_FUNCTION__); })); } | ||||
1908 | else | ||||
1909 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 1909, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1910 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | ||||
1911 | ccv_array_free(parameter_indices); | ||||
1912 | const int parameter_size = compiled_data->parameters->rnum; | ||||
1913 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 1913 , __extension__ __PRETTY_FUNCTION__); })); | ||||
1914 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 1914, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1915 | // We don't need to consider parallel_count, every parameter on each device is identical. | ||||
1916 | ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[d]; | ||||
1917 | assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor ) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 1917, __extension__ __PRETTY_FUNCTION__); })); | ||||
1918 | return tensor->info; | ||||
1919 | } | ||||
1920 | |||||
1921 | static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref) | ||||
1922 | { | ||||
1923 | const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel; | ||||
1924 | assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameters->param_sel != 0) ; else __assert_fail ( "parameters->param_sel != 0", "ccv_cnnp_model.c", 1924, __extension__ __PRETTY_FUNCTION__); })); | ||||
1925 | ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0); | ||||
1926 | ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices); | ||||
1927 | *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref; | ||||
1928 | return to_parameter_indices; | ||||
1929 | } | ||||
1930 | |||||
1931 | static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref) | ||||
1932 | { | ||||
1933 | // If the model is not compiled yet. Compile them now. | ||||
1934 | if (!model->graph) | ||||
1935 | { | ||||
1936 | model->graph = ccv_nnc_symbolic_graph_new(); | ||||
1937 | assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__ ({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data" , "ccv_cnnp_model.c", 1937, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1938 | const int input_size = from_model->input_size; | ||||
1939 | ccv_nnc_tensor_param_t input_params[input_size]; | ||||
1940 | int i; | ||||
1941 | for (i = 0; i < input_size; i++) | ||||
1942 | input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]); | ||||
1943 | _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss); | ||||
1944 | model->parallel_count = from_model->parallel_count; | ||||
1945 | model->memory_compression = from_model->memory_compression; | ||||
1946 | model->compiled_data->stream_type = from_model->compiled_data->stream_type; | ||||
1947 | model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer; | ||||
1948 | model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size; | ||||
1949 | } | ||||
1950 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | ||||
1951 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 1951, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1952 | const int to_tensors_init = !!to_compiled_data->tensors_init.v; | ||||
1953 | if (!to_tensors_init) | ||||
1954 | ccv_cnnp_model_tensors_init(model, to_compiled_data); | ||||
1955 | assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (to_compiled_data->tensors.parameters ) ; else __assert_fail ("to_compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 1955, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1956 | *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref); | ||||
1957 | *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref); | ||||
1958 | if (*from_param_ref < 0 && *param_ref >= 0) | ||||
1959 | { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1 : 0), __extension__ ({ if ((*from_parameter_indices)->rnum == 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1" , "ccv_cnnp_model.c", 1959, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1960 | else if (*from_param_ref >= 0) | ||||
1961 | { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices )->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref < (*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum" , "ccv_cnnp_model.c", 1961, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1962 | if (*param_ref < 0 && *from_param_ref >= 0) | ||||
1963 | { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0) , __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else __assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c" , 1963, __extension__ __PRETTY_FUNCTION__); })); } | ||||
1964 | else if (*param_ref >= 0) | ||||
1965 | { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum ) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices )->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum" , "ccv_cnnp_model.c", 1965, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1966 | // Should be exactly the same tensor. | ||||
1967 | if (*param_ref < 0 && *from_param_ref < 0) | ||||
1968 | { assert((*from_parameter_indices)->rnum == (*parameter_indices)->rnum)((void) sizeof (((*from_parameter_indices)->rnum == (*parameter_indices )->rnum) ? 1 : 0), __extension__ ({ if ((*from_parameter_indices )->rnum == (*parameter_indices)->rnum) ; else __assert_fail ("(*from_parameter_indices)->rnum == (*parameter_indices)->rnum" , "ccv_cnnp_model.c", 1968, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
1969 | } | ||||
1970 | |||||
1971 | void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) | ||||
1972 | { | ||||
1973 | ccv_array_t* to_parameter_indices; | ||||
1974 | int to_param_ref; | ||||
1975 | ccv_array_t* from_parameter_indices; | ||||
1976 | int from_param_ref; | ||||
1977 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref); | ||||
1978 | // To models. | ||||
1979 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | ||||
1980 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 1980, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1981 | // From models. | ||||
1982 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | ||||
1983 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
1984 | const int to_parameter_size = to_compiled_data->parameters->rnum; | ||||
1985 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1; | ||||
1986 | int i, j; | ||||
1987 | for (i = 0; i < rnum; i++) | ||||
1988 | { | ||||
1989 | const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))); | ||||
1990 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 1990, __extension__ __PRETTY_FUNCTION__); })); | ||||
1991 | assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters ->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data ->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 1991, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1992 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | ||||
1993 | // If the original is not init'ed. We cannot copy from. | ||||
1994 | if (!(from_compiled_data->tensors_init.v[s >> 5] & (1u << (s & 0x1f)))) | ||||
1995 | continue; | ||||
1996 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | ||||
1997 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 1997, __extension__ __PRETTY_FUNCTION__); })); | ||||
1998 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 1998, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1999 | ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d]; | ||||
2000 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2000, __extension__ __PRETTY_FUNCTION__); })); | ||||
2001 | ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d]; | ||||
2002 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2002, __extension__ __PRETTY_FUNCTION__); })); | ||||
2003 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | ||||
2004 | for (j = 1; j < parallel_count; j++) | ||||
2005 | { | ||||
2006 | ccv_nnc_tensor_t* const copy_tensor = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size]; | ||||
2007 | if (copy_tensor) | ||||
2008 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | ||||
2009 | } | ||||
2010 | // Mark this symbol as init'ed. | ||||
2011 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | ||||
2012 | to_compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f)); | ||||
2013 | } | ||||
2014 | ccv_array_free(to_parameter_indices); | ||||
2015 | ccv_array_free(from_parameter_indices); | ||||
2016 | } | ||||
2017 | |||||
2018 | ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type) | ||||
2019 | { | ||||
2020 | if (!compiled_data->stream_map) | ||||
2021 | compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map(); | ||||
2022 | int ret = 0; | ||||
2023 | khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret ); | ||||
2024 | assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if ( ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c" , 2024, __extension__ __PRETTY_FUNCTION__); })); | ||||
2025 | ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]); | ||||
2026 | // If ret == 0, the key already exist, we can return directly, otherwise, create and return. | ||||
2027 | if (ret != 0) | ||||
2028 | { | ||||
2029 | stream = ccv_nnc_stream_context_new(type); | ||||
2030 | kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream; | ||||
2031 | } | ||||
2032 | return stream; | ||||
2033 | } | ||||
2034 | |||||
2035 | void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) | ||||
2036 | { | ||||
2037 | ccv_array_t* to_parameter_indices; | ||||
2038 | int to_param_ref; | ||||
2039 | ccv_array_t* from_parameter_indices; | ||||
2040 | int from_param_ref; | ||||
2041 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref); | ||||
2042 | // To models. | ||||
2043 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | ||||
2044 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2044, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2045 | // From models. | ||||
2046 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | ||||
2047 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
2048 | const int to_parameter_size = to_compiled_data->parameters->rnum; | ||||
2049 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1; | ||||
2050 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2050, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2051 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2051, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2052 | int i, j; | ||||
2053 | ccv_nnc_tensor_t* inputs[aux_in_size + 2]; | ||||
2054 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | ||||
2055 | for (i = 0; i < aux_in_size; i++) | ||||
2056 | inputs[i + 2] = aux_ins[i]; | ||||
2057 | for (i = 0; i < aux_out_size; i++) | ||||
2058 | outputs[i + 1] = aux_outs[i]; | ||||
2059 | for (i = 0; i < rnum; i++) | ||||
2060 | { | ||||
2061 | const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))); | ||||
2062 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2062, __extension__ __PRETTY_FUNCTION__); })); | ||||
2063 | assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters ->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data ->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2063, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2064 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | ||||
2065 | // If the original is not init'ed. We cannot copy from. | ||||
2066 | if (!(from_compiled_data->tensors_init.v[s >> 5] & (1u << (s & 0x1f)))) | ||||
2067 | continue; | ||||
2068 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | ||||
2069 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2069, __extension__ __PRETTY_FUNCTION__); })); | ||||
2070 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2070, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2071 | if (parallel_count > 1) | ||||
2072 | { | ||||
2073 | ccv_nnc_stream_context_t* streams[parallel_count]; | ||||
2074 | ccv_nnc_stream_signal_t* signal; | ||||
2075 | if (stream_context) | ||||
2076 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | ||||
2077 | for (j = 0; j < parallel_count; j++) | ||||
2078 | { | ||||
2079 | ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d + j * to_parameter_size]; | ||||
2080 | ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size]; | ||||
2081 | if (!dest || !src) | ||||
2082 | { | ||||
2083 | streams[j] = 0; | ||||
2084 | continue; | ||||
2085 | } | ||||
2086 | // At the moment, can only handle them on the same device. | ||||
2087 | assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest-> info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src-> info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)" , "ccv_cnnp_model.c", 2087, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2088 | assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >> 8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1 : 0), __extension__ ({ if ((((src->info.type) & 0xfff00 ) >> 8) == (((dest->info.type) & 0xfff00) >> 8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)" , "ccv_cnnp_model.c", 2088, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2089 | const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | ||||
2090 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8); | ||||
2091 | int type = stream_type; | ||||
2092 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | ||||
2093 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | ||||
2094 | // Wait signal to finish. | ||||
2095 | if (stream_context) | ||||
2096 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | ||||
2097 | inputs[0] = outputs[0] = dest; | ||||
2098 | inputs[1] = src; | ||||
2099 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0); | ||||
2100 | if (stream_context) | ||||
2101 | { | ||||
2102 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | ||||
2103 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | ||||
2104 | } | ||||
2105 | streams[j] = stream_0; | ||||
2106 | } | ||||
2107 | // If this should be blocking, blocking it. | ||||
2108 | if (!stream_context) | ||||
2109 | for (j = 0; j < parallel_count; j++) | ||||
2110 | if (streams[j]) | ||||
2111 | ccv_nnc_stream_context_wait(streams[j]); | ||||
2112 | } else { | ||||
2113 | ccv_nnc_tensor_t* const src = from_compiled_data->tensors.parameters[src_d]; | ||||
2114 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2114, __extension__ __PRETTY_FUNCTION__); })); | ||||
2115 | ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d]; | ||||
2116 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2116, __extension__ __PRETTY_FUNCTION__); })); | ||||
2117 | inputs[0] = outputs[0] = dest; | ||||
2118 | inputs[1] = src; | ||||
2119 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context); | ||||
2120 | } | ||||
2121 | // Mark this symbol as init'ed. | ||||
2122 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | ||||
2123 | to_compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f)); | ||||
2124 | } | ||||
2125 | ccv_array_free(to_parameter_indices); | ||||
2126 | ccv_array_free(from_parameter_indices); | ||||
2127 | } | ||||
2128 | |||||
2129 | void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context) | ||||
2130 | { | ||||
2131 | int to_param_ref; | ||||
2132 | ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref); | ||||
2133 | // To models. | ||||
2134 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | ||||
2135 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2135, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2136 | // Tensor has to be inited already. | ||||
2137 | assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 : 0), __extension__ ({ if (!!to_compiled_data->tensors_init .v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v" , "ccv_cnnp_model.c", 2137, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2138 | assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (to_compiled_data->tensors.parameters ) ; else __assert_fail ("to_compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2138, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2139 | // From models. | ||||
2140 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
2141 | const int to_parameter_size = to_compiled_data->parameters->rnum; | ||||
2142 | const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1; | ||||
2143 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2143, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2144 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2144, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2145 | int i, j; | ||||
2146 | ccv_nnc_tensor_t* inputs[aux_in_size + 1]; | ||||
2147 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | ||||
2148 | for (i = 0; i < aux_in_size; i++) | ||||
2149 | inputs[i + 1] = aux_ins[i]; | ||||
2150 | for (i = 0; i < aux_out_size; i++) | ||||
2151 | outputs[i + 1] = aux_outs[i]; | ||||
2152 | for (i = 0; i < rnum; i++) | ||||
2153 | { | ||||
2154 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | ||||
2155 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2155, __extension__ __PRETTY_FUNCTION__); })); | ||||
2156 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2156, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2157 | if (parallel_count > 1) | ||||
2158 | { | ||||
2159 | ccv_nnc_stream_context_t* streams[parallel_count]; | ||||
2160 | ccv_nnc_stream_signal_t* signal; | ||||
2161 | if (stream_context) | ||||
2162 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | ||||
2163 | for (j = 0; j < parallel_count; j++) | ||||
2164 | { | ||||
2165 | ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size]; | ||||
2166 | if (!dest) | ||||
2167 | { | ||||
2168 | streams[j] = 0; | ||||
2169 | continue; | ||||
2170 | } | ||||
2171 | const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | ||||
2172 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8); | ||||
2173 | int type = stream_type; | ||||
2174 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | ||||
2175 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | ||||
2176 | // Wait signal to finish. | ||||
2177 | if (stream_context) | ||||
2178 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | ||||
2179 | inputs[0] = outputs[0] = dest; | ||||
2180 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0); | ||||
2181 | if (stream_context) | ||||
2182 | { | ||||
2183 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | ||||
2184 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | ||||
2185 | } | ||||
2186 | streams[j] = stream_0; | ||||
2187 | } | ||||
2188 | // If this should be blocking, blocking it. | ||||
2189 | if (!stream_context) | ||||
2190 | for (j = 0; j < parallel_count; j++) | ||||
2191 | if (streams[j]) | ||||
2192 | ccv_nnc_stream_context_wait(streams[j]); | ||||
2193 | } else { | ||||
2194 | ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d]; | ||||
2195 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2195, __extension__ __PRETTY_FUNCTION__); })); | ||||
2196 | inputs[0] = outputs[0] = dest; | ||||
2197 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context); | ||||
2198 | } | ||||
2199 | // No need to mark this symbol as init'ed, it is already. | ||||
2200 | } | ||||
2201 | ccv_array_free(to_parameter_indices); | ||||
2202 | } | ||||
2203 | |||||
2204 | void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context) | ||||
2205 | { | ||||
2206 | int to_param_ref; | ||||
2207 | ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref); | ||||
2208 | // To models. | ||||
2209 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | ||||
2210 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2210, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2211 | // Tensor has to be inited already. | ||||
2212 | assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 : 0), __extension__ ({ if (!!to_compiled_data->tensors_init .v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v" , "ccv_cnnp_model.c", 2212, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2213 | ccv_nnc_tensor_t** tensor_gradients; | ||||
2214 | if (to_compiled_data->backward.count > 1) | ||||
2215 | tensor_gradients = to_compiled_data->tensors.accum_gradients; | ||||
2216 | else | ||||
2217 | tensor_gradients = to_compiled_data->tensors.gradients; | ||||
2218 | assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({ if (tensor_gradients) ; else __assert_fail ("tensor_gradients" , "ccv_cnnp_model.c", 2218, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2219 | // From models. | ||||
2220 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
2221 | const int to_parameter_size = to_compiled_data->parameters->rnum; | ||||
2222 | const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1; | ||||
2223 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2223, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2224 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2224, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2225 | int i, j; | ||||
2226 | ccv_nnc_tensor_t* inputs[aux_in_size + 1]; | ||||
2227 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | ||||
2228 | for (i = 0; i < aux_in_size; i++) | ||||
2229 | inputs[i + 1] = aux_ins[i]; | ||||
2230 | for (i = 0; i < aux_out_size; i++) | ||||
2231 | outputs[i + 1] = aux_outs[i]; | ||||
2232 | for (i = 0; i < rnum; i++) | ||||
2233 | { | ||||
2234 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | ||||
2235 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2235, __extension__ __PRETTY_FUNCTION__); })); | ||||
2236 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2236, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2237 | if (parallel_count > 1) | ||||
2238 | { | ||||
2239 | ccv_nnc_stream_context_t* streams[parallel_count]; | ||||
2240 | ccv_nnc_stream_signal_t* signal; | ||||
2241 | if (stream_context) | ||||
2242 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | ||||
2243 | for (j = 0; j < parallel_count; j++) | ||||
2244 | { | ||||
2245 | ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size]; | ||||
2246 | if (!dest) | ||||
2247 | { | ||||
2248 | streams[j] = 0; | ||||
2249 | continue; | ||||
2250 | } | ||||
2251 | const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | ||||
2252 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8); | ||||
2253 | int type = stream_type; | ||||
2254 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | ||||
2255 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | ||||
2256 | // Wait signal to finish. | ||||
2257 | if (stream_context) | ||||
2258 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | ||||
2259 | inputs[0] = outputs[0] = dest; | ||||
2260 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0); | ||||
2261 | if (stream_context) | ||||
2262 | { | ||||
2263 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | ||||
2264 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | ||||
2265 | } | ||||
2266 | streams[j] = stream_0; | ||||
2267 | } | ||||
2268 | // If this should be blocking, blocking it. | ||||
2269 | if (!stream_context) | ||||
2270 | for (j = 0; j < parallel_count; j++) | ||||
2271 | if (streams[j]) | ||||
2272 | ccv_nnc_stream_context_wait(streams[j]); | ||||
2273 | } else { | ||||
2274 | ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d]; | ||||
2275 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2275, __extension__ __PRETTY_FUNCTION__); })); | ||||
2276 | inputs[0] = outputs[0] = dest; | ||||
2277 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context); | ||||
2278 | } | ||||
2279 | // No need to mark this symbol as init'ed, it is already. | ||||
2280 | } | ||||
2281 | ccv_array_free(to_parameter_indices); | ||||
2282 | } | ||||
2283 | |||||
2284 | ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model) | ||||
2285 | { | ||||
2286 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
2287 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2287, __extension__ __PRETTY_FUNCTION__); })); | ||||
2288 | return compiled_data->minimize.minimizer; | ||||
2289 | } | ||||
2290 | |||||
2291 | void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size) | ||||
2292 | { | ||||
2293 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
2294 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2294, __extension__ __PRETTY_FUNCTION__); })); | ||||
2295 | const int parameter_size = compiled_data->parameters->rnum; | ||||
2296 | if (parameter_size == 0) | ||||
2297 | return; | ||||
2298 | if (reset) | ||||
2299 | { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size == 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 && set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0" , "ccv_cnnp_model.c", 2299, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
2300 | const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | ||||
2301 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer); | ||||
2302 | if (saved_aux_size > compiled_data->minimize.max_saved_aux_size) | ||||
2303 | compiled_data->minimize.max_saved_aux_size = saved_aux_size; | ||||
2304 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | ||||
2305 | // We update all parameters, at this point, we have one minimizer. | ||||
2306 | if (set_parameters == 0 || set_parameter_size == 0) | ||||
2307 | compiled_data->minimize.minimizer = minimizer; | ||||
2308 | int i; | ||||
2309 | if (set_parameters && set_parameter_size) | ||||
2310 | { | ||||
2311 | // I need to save what's the minimizer along with this. | ||||
2312 | if (!compiled_data->minimize.parameters) | ||||
2313 | compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0); | ||||
2314 | ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t)); | ||||
2315 | set_minimizer_for_parameter->minimizer = minimizer; | ||||
2316 | set_minimizer_for_parameter->parameter_size = set_parameter_size; | ||||
2317 | memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size); | ||||
2318 | ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter); | ||||
2319 | } | ||||
2320 | // If reset is true, clear the parameters array. | ||||
2321 | if (reset && compiled_data->minimize.parameters) | ||||
2322 | { | ||||
2323 | for (i = 0; i < compiled_data->minimize.parameters->rnum; i++) | ||||
2324 | ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)-> data)) + (size_t)(compiled_data->minimize.parameters)-> rsize * (size_t)(i)))); | ||||
2325 | ccv_array_clear(compiled_data->minimize.parameters); | ||||
2326 | } | ||||
2327 | if (!compiled_data->update_nodes) | ||||
2328 | return; | ||||
2329 | ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph; | ||||
2330 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 2330, __extension__ __PRETTY_FUNCTION__); })); | ||||
2331 | if (saved_aux_size > old_max_saved_aux_size) | ||||
2332 | { | ||||
2333 | assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->updated_parameters) ; else __assert_fail ("compiled_data->updated_parameters" , "ccv_cnnp_model.c", 2333, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2334 | // Reallocate first, move them around later. | ||||
2335 | compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size); | ||||
2336 | compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size); | ||||
2337 | compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size); | ||||
2338 | // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap. | ||||
2339 | _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size); | ||||
2340 | } | ||||
2341 | int flag = 0; | ||||
2342 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
2343 | if (set_parameters && set_parameter_size) | ||||
2344 | { | ||||
2345 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | ||||
2346 | for (i = 0; i < set_parameter_size; i++) | ||||
2347 | { | ||||
2348 | const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel; | ||||
2349 | assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0 ), __extension__ ({ if (set_parameters[i]->param_sel != 0) ; else __assert_fail ("set_parameters[i]->param_sel != 0" , "ccv_cnnp_model.c", 2349, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2350 | const int old_rnum = parameter_indices->rnum; | ||||
2351 | ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices); | ||||
2352 | const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref; | ||||
2353 | assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0 ), __extension__ ({ if (set_parameters[i]->param_ref != 0) ; else __assert_fail ("set_parameters[i]->param_ref != 0" , "ccv_cnnp_model.c", 2353, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2354 | if (param_ref >= 0) | ||||
2355 | { | ||||
2356 | assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices-> rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum < parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum" , "ccv_cnnp_model.c", 2356, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2357 | *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(old_rnum))) = *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref + old_rnum))); | ||||
2358 | parameter_indices->rnum = old_rnum + 1; | ||||
2359 | } | ||||
2360 | } | ||||
2361 | // We may have duplicated indices, but that is OK, we will set it twice. | ||||
2362 | for (i = 0; i < parameter_indices->rnum; i++) | ||||
2363 | { | ||||
2364 | const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(i))); | ||||
2365 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d)) | ||||
2366 | flag = 1; | ||||
2367 | } | ||||
2368 | ccv_array_free(parameter_indices); | ||||
2369 | } else { | ||||
2370 | for (i = 0; i < parameter_size; i++) | ||||
2371 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i)) | ||||
2372 | flag = 1; | ||||
2373 | if (compiled_data->minimize.parameters) | ||||
2374 | if (_ccv_cnnp_apply_parameters_with_minimizer(model)) | ||||
2375 | flag = 1; | ||||
2376 | } | ||||
2377 | if (flag) | ||||
2378 | { | ||||
2379 | // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph. | ||||
2380 | if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE) | ||||
2381 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | ||||
2382 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | ||||
2383 | } | ||||
2384 | } | ||||
2385 | |||||
2386 | void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params) | ||||
2387 | { | ||||
2388 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | ||||
2389 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2389, __extension__ __PRETTY_FUNCTION__); })); | ||||
2390 | compiled_data->compile_params = compile_params; | ||||
2391 | } | ||||
2392 | |||||
2393 | void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size) | ||||
2394 | { | ||||
2395 | if (model->graph && out_size > 0) | ||||
2396 | ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]); | ||||
2397 | if (model->compiled_data && model->compiled_data->graph && out_size > 1) | ||||
2398 | ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]); | ||||
2399 | if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2) | ||||
2400 | ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]); | ||||
2401 | if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3) | ||||
2402 | ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]); | ||||
2403 | } | ||||
2404 | |||||
2405 | static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | ||||
2406 | { | ||||
2407 | int i; | ||||
2408 | const int parameter_size = compiled_data->parameters->rnum; | ||||
2409 | ccv_array_free(compiled_data->parameters); | ||||
2410 | const int internal_size = compiled_data->internals->rnum; | ||||
2411 | ccv_array_free(compiled_data->internals); | ||||
2412 | assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum == parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data ->ids.parameters->rnum == parameter_size) ; else __assert_fail ("compiled_data->ids.parameters->rnum == parameter_size" , "ccv_cnnp_model.c", 2412, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2413 | assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size ) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals ->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size" , "ccv_cnnp_model.c", 2413, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
2414 | for (i = 0; i < parameter_size; i++) | ||||
2415 | ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i)))); | ||||
2416 | ccv_array_free(compiled_data->ids.parameters); | ||||
2417 | for (i = 0; i < internal_size; i++) | ||||
2418 | ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data) ) + (size_t)(compiled_data->ids.internals)->rsize * (size_t )(i)))); | ||||
2419 | ccv_array_free(compiled_data->ids.internals); | ||||
2420 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
2421 | if (compiled_data->tensors.parameters) | ||||
2422 | { | ||||
2423 | for (i = 0; i < parameter_size * parallel_count; i++) | ||||
2424 | ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]); | ||||
2425 | for (i = 0; i < internal_size * parallel_count; i++) | ||||
2426 | if (compiled_data->tensors.internals[i]) | ||||
2427 | ccv_nnc_tensor_free(compiled_data->tensors.internals[i]); | ||||
2428 | ccfreefree(compiled_data->tensors.parameters); | ||||
2429 | } | ||||
2430 | if (compiled_data->tensors.gradients) | ||||
2431 | { | ||||
2432 | for (i = 0; i < parameter_size * parallel_count; i++) | ||||
2433 | { | ||||
2434 | ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]); | ||||
2435 | if (compiled_data->tensors.accum_gradients[i]) | ||||
2436 | ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]); | ||||
2437 | } | ||||
2438 | ccfreefree(compiled_data->tensors.gradients); | ||||
2439 | } | ||||
2440 | if (compiled_data->minimize.parameters) | ||||
2441 | { | ||||
2442 | for (i = 0; i < compiled_data->minimize.parameters->rnum; i++) | ||||
2443 | ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)-> data)) + (size_t)(compiled_data->minimize.parameters)-> rsize * (size_t)(i)))); | ||||
2444 | ccv_array_free(compiled_data->minimize.parameters); | ||||
2445 | } | ||||
2446 | if (compiled_data->rewindables) | ||||
2447 | ccv_array_free(compiled_data->rewindables); | ||||
2448 | if (compiled_data->tensors_init.v) | ||||
2449 | ccfreefree(compiled_data->tensors_init.v); | ||||
2450 | if (compiled_data->evaluate.tos) | ||||
2451 | ccfreefree(compiled_data->evaluate.tos); | ||||
2452 | compiled_data->evaluate.tos = 0; | ||||
2453 | if (compiled_data->stream_map) | ||||
2454 | { | ||||
2455 | khiter_t k; | ||||
2456 | for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k) | ||||
2457 | { | ||||
2458 | if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>> (((k)&0xfU)<<1))&3))) | ||||
2459 | continue; | ||||
2460 | ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]); | ||||
2461 | ccv_nnc_stream_context_free(stream); | ||||
2462 | } | ||||
2463 | kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map); | ||||
2464 | } | ||||
2465 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | ||||
2466 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | ||||
2467 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | ||||
2468 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | ||||
2469 | ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc); | ||||
2470 | ccfreefree(compiled_data); | ||||
2471 | } | ||||
2472 | |||||
2473 | void ccv_cnnp_model_free(ccv_cnnp_model_t* const model) | ||||
2474 | { | ||||
2475 | if (model->isa->deinit) | ||||
2476 | model->isa->deinit(model); | ||||
2477 | if (model->io) | ||||
2478 | { | ||||
2479 | int i; | ||||
2480 | for (i = 0; i < model->io->rnum; i++) | ||||
2481 | { | ||||
2482 | ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model-> io)->rsize * (size_t)(i))); | ||||
2483 | if (model_io->outgoings) | ||||
2484 | ccv_array_free(model_io->outgoings); | ||||
2485 | if (model_io->incomings) | ||||
2486 | ccv_array_free(model_io->incomings); | ||||
2487 | ccfreefree(model_io); | ||||
2488 | } | ||||
2489 | ccv_array_free(model->io); | ||||
2490 | } | ||||
2491 | if (model->parameter_indices) | ||||
2492 | ccv_array_free(model->parameter_indices); | ||||
2493 | if (model->inputs) | ||||
2494 | ccfreefree(model->inputs); | ||||
2495 | if (model->graph) | ||||
2496 | ccv_nnc_symbolic_graph_free(model->graph); | ||||
2497 | if (model->compiled_data) | ||||
2498 | _ccv_cnnp_compiled_data_free(model, model->compiled_data); | ||||
2499 | if (model->name) | ||||
2500 | ccfreefree(model->name); | ||||
2501 | ccfreefree(model); | ||||
2502 | } |