/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_dynamic_graph_minimize.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #include "ccv_nnc_easy.h" |
5 | | #include "ccv_internal.h" |
6 | | #include "_ccv_nnc_dynamic_graph.h" |
7 | | |
8 | | // MARK - Level-4.5 API |
9 | | |
10 | | void ccv_nnc_dynamic_graph_minimize(ccv_nnc_dynamic_graph_t* const dynamic_graph, const ccv_nnc_cmd_t minimizer, const ccv_nnc_tensor_variable_t* const losses, const int loss_size, const ccv_nnc_tensor_variable_t* const dloss_optionals, ccv_nnc_tensor_variable_t* const parameters, const int parameter_size, ccv_nnc_tensor_variable_t* const saved_aux, const int parallel, ccv_nnc_stream_context_t* const stream_context) |
11 | 1.00k | { |
12 | 1.00k | assert(parameter_size > 0); |
13 | 1.00k | assert(loss_size > 0); |
14 | 1.00k | int d, i, j, k; |
15 | 1.00k | int losses_source_size = 0; |
16 | | // Both f_variable and tensor_variable should be, at least, executed. Otherwise we cannot differentiate. |
17 | 2.00k | for (i = 0; i < loss_size; i++1.00k ) |
18 | 1.00k | { |
19 | 1.00k | assert(losses[i]->symbol.d >= 0); |
20 | 1.00k | const ccv_nnc_tensor_variable_graph_bind_t* const loss_symbol_extra = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(dynamic_graph->binds, losses[i]->symbol.d); |
21 | 1.00k | assert(loss_symbol_extra->sources && loss_symbol_extra->sources->rnum > 0); |
22 | 1.00k | losses_source_size += loss_symbol_extra->sources->rnum; |
23 | 1.00k | } |
24 | 2.00k | for (i = 0; 1.00k i < parameter_size; i++1.00k ) |
25 | 1.00k | { |
26 | 1.00k | assert(parameters[i]->symbol.d >= 0); |
27 | 1.00k | assert(((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(dynamic_graph->binds, parameters[i]->symbol.d))->destinations && |
28 | 1.00k | ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(dynamic_graph->binds, parameters[i]->symbol.d))->destinations->rnum > 0); |
29 | 1.00k | } |
30 | 1.00k | const int exec_symbol_info_size = ccv_nnc_graph_exec_symbol_count(dynamic_graph->tape); |
31 | 1.00k | ccv_array_t* const sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 1, 0); |
32 | 1.00k | if (!dynamic_graph->ws) |
33 | 3 | dynamic_graph->ws = ccv_array_new(sizeof(int), exec_symbol_info_size * 2 + ((exec_symbol_info_size + 31) >> 5), 0); |
34 | 1.00k | ccv_array_t* const ws = dynamic_graph->ws; |
35 | 1.00k | ccv_array_resize(ws, exec_symbol_info_size * 2 + ((exec_symbol_info_size + 31) >> 5)); |
36 | | // set visited to all 0. |
37 | 1.00k | memset((uint32_t*)ccv_array_get(ws, exec_symbol_info_size * 2), 0, sizeof(uint32_t) * ((exec_symbol_info_size + 31) >> 5)); |
38 | 2.00k | for (i = 0; i < parameter_size; i++1.00k ) |
39 | 1.00k | { |
40 | 1.00k | ccv_array_t* const destinations = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(dynamic_graph->binds, parameters[i]->symbol.d))->destinations; |
41 | 2.00k | for (j = 0; j < destinations->rnum; j++1.00k ) |
42 | 1.00k | ccv_nnc_insert_if_prior_to_any(dynamic_graph->tape, |
43 | 1.00k | *(int*)ccv_array_get(destinations, j), |
44 | 1.00k | sources, (uint32_t*)ccv_array_get(ws, exec_symbol_info_size * 2), |
45 | 1.00k | (int*)ccv_array_get(ws, 0), (int*)ccv_array_get(ws, exec_symbol_info_size)); |
46 | 1.00k | } |
47 | 1.00k | ccv_array_t* const destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), losses_source_size, 0); |
48 | 2.00k | for (i = 0; i < loss_size; i++1.00k ) |
49 | 1.00k | { |
50 | 1.00k | const ccv_nnc_tensor_variable_graph_bind_t* const loss_symbol_extra = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(dynamic_graph->binds, losses[i]->symbol.d); |
51 | 2.00k | for (j = 0; j < loss_symbol_extra->sources->rnum; j++1.00k ) |
52 | 1.00k | { |
53 | 1.00k | const int symbol_d = *(int*)ccv_array_get(loss_symbol_extra->sources, j); |
54 | 1.00k | int flag = 0; |
55 | 1.00k | for (k = 0; !flag && k < destinations->rnum; k++2 ) |
56 | 2 | flag = (symbol_d == ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(destinations, k))->d); |
57 | 1.00k | if (!flag) |
58 | 1.00k | { |
59 | 1.00k | const ccv_nnc_graph_exec_symbol_t symbol = { |
60 | 1.00k | .d = symbol_d, |
61 | 1.00k | .graph = dynamic_graph->tape |
62 | 1.00k | }; |
63 | 1.00k | ccv_array_push(destinations, &symbol); |
64 | 1.00k | } |
65 | 1.00k | } |
66 | 1.00k | } |
67 | | // Go over sources, because destinations will get removed all the time, thus, the index is not accurate. |
68 | 1.00k | if (destinations->rnum > 1) |
69 | 6 | for (i = 0; 2 i < destinations->rnum; i++4 ) |
70 | 4 | { |
71 | 4 | memset((uint32_t*)ccv_array_get(ws, exec_symbol_info_size * 2), 0, sizeof(uint32_t) * ((exec_symbol_info_size + 31) >> 5)); |
72 | 4 | ccv_nnc_remove_if_prior_to_any(dynamic_graph->tape, |
73 | 4 | ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(destinations, i))->d, |
74 | 4 | destinations, (uint32_t*)ccv_array_get(ws, exec_symbol_info_size * 2), |
75 | 4 | (int*)ccv_array_get(ws, 0), (int*)ccv_array_get(ws, exec_symbol_info_size)); |
76 | 4 | } |
77 | 1.00k | ccv_nnc_tensor_symbol_t loss_symbols[loss_size]; |
78 | 2.00k | for (i = 0; i < loss_size; i++1.00k ) |
79 | 1.00k | loss_symbols[i] = losses[i]->symbol; |
80 | 1.00k | ccv_nnc_tensor_symbol_t parameter_symbols[parameter_size]; |
81 | 2.00k | for (i = 0; i < parameter_size; i++1.00k ) |
82 | 1.00k | parameter_symbols[i] = parameters[i]->symbol; |
83 | 1.00k | ccv_array_t* const symbol_stack = ccv_array_new(sizeof(ccv_nnc_tape_symbol_t), 1, 0); |
84 | 1.00k | ccv_nnc_tensor_symbol_new_hook(dynamic_graph->tape, ccv_nnc_dynamic_graph_push_backward_tensor_symbol, symbol_stack, 0); |
85 | 1.00k | ccv_nnc_tensor_symbol_alias_new_hook(dynamic_graph->tape, ccv_nnc_dynamic_graph_push_backward_tensor_symbol_alias, symbol_stack, 0); |
86 | 1.00k | ccv_nnc_graph_exec_symbol_new_hook(dynamic_graph->tape, ccv_nnc_dynamic_graph_push_backward_graph_exec_symbol, symbol_stack, 0); |
87 | 1.00k | ccv_nnc_tensor_symbol_t updated_parameter_symbols[parameter_size]; |
88 | 1.00k | const int saved_aux_size = parameter_size * ccv_nnc_minimizer_saved_aux_size(minimizer); |
89 | 1.00k | ccv_nnc_tensor_symbol_map_t saved_aux_symbols[saved_aux_size]; |
90 | 1.00k | ccv_nnc_graph_exec_symbol_t update_exec_symbols[parameter_size]; |
91 | 1.00k | ccv_nnc_symbolic_graph_minimize(dynamic_graph->tape, minimizer, |
92 | 1.00k | loss_symbols, loss_size, parameter_symbols, parameter_size, 0, 0, |
93 | 1.00k | (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, 0), sources->rnum, |
94 | 1.00k | (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(destinations, 0), destinations->rnum, |
95 | 1.00k | 0, updated_parameter_symbols, saved_aux_symbols, update_exec_symbols); |
96 | 1.00k | const int parallel_count = ccv_max(parallel, 1); |
97 | 1.00k | if (parallel_count > 1) |
98 | 2 | { |
99 | 2 | const int per_parameter_size = parameter_size / parallel_count; |
100 | 2 | assert((parameter_size % parallel_count) == 0); |
101 | 2 | ccv_nnc_tensor_symbol_t* const allreduce_inputs = parallel_count > 1 ? (ccv_nnc_tensor_symbol_t*)alloca(sizeof(ccv_nnc_tensor_symbol_t) * parallel_count * 2) : 00 ; |
102 | 2 | ccv_nnc_tensor_symbol_t* const allreduce_outputs = allreduce_inputs ? allreduce_inputs + parallel_count : 00 ; |
103 | 4 | for (i = 0; i < per_parameter_size; i++2 ) |
104 | 2 | { |
105 | 6 | for (j = 0; j < parallel_count; j++4 ) |
106 | 4 | { |
107 | 4 | const int idx = i + j * per_parameter_size; |
108 | 4 | assert(parameters[idx]->symbol.d >= 0); |
109 | 4 | const ccv_nnc_tensor_param_t info = parameters[i + j * per_parameter_size]->info; |
110 | 4 | const ccv_nnc_tensor_symbol_t gradient = ccv_nnc_tensor_symbol_for_backward(dynamic_graph->tape, parameters[idx]->symbol); |
111 | 4 | allreduce_inputs[j] = gradient; |
112 | 4 | allreduce_outputs[j] = ccv_nnc_tensor_symbol_new(dynamic_graph->tape, info, 0); |
113 | 4 | } |
114 | 2 | const ccv_nnc_graph_exec_symbol_t allreduce = ccv_nnc_graph_exec_symbol_new(dynamic_graph->tape, CMD_COMM_ALLREDUCE_FORWARD(), allreduce_inputs, parallel_count, allreduce_outputs, parallel_count, 0); |
115 | 6 | for (j = 0; j < parallel_count; j++4 ) |
116 | 4 | { |
117 | 4 | const int idx = i + j * per_parameter_size; |
118 | 4 | const ccv_nnc_tensor_symbol_t gradient = ccv_nnc_tensor_symbol_for_backward(dynamic_graph->tape, parameters[idx]->symbol); |
119 | 4 | const ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_for_backward(dynamic_graph->tape, gradient); |
120 | 4 | ccv_nnc_graph_exec_symbol_disjoin(dynamic_graph->tape, graph_exec, update_exec_symbols[idx]); |
121 | 4 | ccv_nnc_graph_exec_symbol_concat(dynamic_graph->tape, graph_exec, allreduce); |
122 | 4 | ccv_nnc_graph_exec_symbol_replace_io(dynamic_graph->tape, update_exec_symbols[idx], allreduce_inputs[j], allreduce_outputs[j]); |
123 | 4 | ccv_nnc_graph_exec_symbol_concat(dynamic_graph->tape, allreduce, update_exec_symbols[idx]); |
124 | 4 | } |
125 | 2 | } |
126 | 2 | } |
127 | 1.00k | ccv_nnc_tensor_symbol_new_hook(dynamic_graph->tape, 0, 0, 0); |
128 | 1.00k | ccv_nnc_tensor_symbol_alias_new_hook(dynamic_graph->tape, 0, 0, 0); |
129 | 1.00k | ccv_nnc_graph_exec_symbol_new_hook(dynamic_graph->tape, 0, 0, 0); |
130 | | // Bind generated tensors. |
131 | 1.00k | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), dynamic_graph->vars->rnum + 2, 0); |
132 | 8.01k | for (i = 0; i < dynamic_graph->vars->rnum; i++7.00k ) |
133 | 7.00k | { |
134 | 7.00k | ccv_nnc_tensor_variable_t var = *(ccv_nnc_tensor_variable_t*)ccv_array_get(dynamic_graph->vars, i); |
135 | 7.00k | if (var && var->tensor_view && var->symbol.d >= 0) |
136 | 6.00k | { |
137 | 6.00k | ccv_nnc_tensor_bind_t bind = { |
138 | 6.00k | .symbol = var->symbol, |
139 | 6.00k | .tensor = (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(var->tensor_view) |
140 | 6.00k | }; |
141 | 6.00k | ccv_array_push(tensor_binds, &bind); |
142 | 6.00k | } |
143 | 7.00k | } |
144 | 7.01k | for (i = 0; i < dynamic_graph->binds->rnum; i++6.00k ) |
145 | 6.00k | { |
146 | 6.00k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(dynamic_graph->binds, i); |
147 | 6.00k | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && bind->tensor_view0 ) |
148 | 0 | { |
149 | 0 | ccv_nnc_tensor_bind_t b = { |
150 | 0 | .symbol = { |
151 | 0 | .d = i, |
152 | 0 | .graph = dynamic_graph->tape, |
153 | 0 | }, |
154 | 0 | .tensor = (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(bind->tensor_view) |
155 | 0 | }; |
156 | 0 | ccv_array_push(tensor_binds, &b); |
157 | 0 | } |
158 | 6.00k | } |
159 | | // Compiled graph comes from the dloss. |
160 | 1.00k | ccv_array_clear(sources); |
161 | 1.00k | ccv_nnc_tensor_symbol_t dloss_symbols[loss_size]; |
162 | 2.00k | for (i = 0; i < loss_size; i++1.00k ) |
163 | 1.00k | { |
164 | 1.00k | dloss_symbols[i] = ccv_nnc_tensor_symbol_for_backward(dynamic_graph->tape, losses[i]->symbol); |
165 | 1.00k | assert(dloss_symbols[i].d >= 0); |
166 | 1.00k | } |
167 | 2.00k | for (d = 0; 1.00k d < destinations->rnum; d++1.00k ) |
168 | 1.00k | { |
169 | 1.00k | const ccv_nnc_graph_exec_symbol_t* const destination = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(destinations, d); |
170 | 1.00k | const int* outgoings; int outgoing_size; |
171 | 1.00k | ccv_nnc_graph_exec_symbol_to(dynamic_graph->tape, *destination, &outgoings, &outgoing_size); |
172 | 2.00k | for (i = 0; i < outgoing_size; i++1.00k ) |
173 | 1.00k | { |
174 | 1.00k | const int exec_idx = outgoings[i]; |
175 | 1.00k | const int* inputs; int input_size; |
176 | 1.00k | ccv_nnc_graph_exec_symbol_io(dynamic_graph->tape, (ccv_nnc_graph_exec_symbol_t){ |
177 | 1.00k | .d = exec_idx, |
178 | 1.00k | .graph = dynamic_graph->tape |
179 | 1.00k | }, &inputs, &input_size, 0, 0); |
180 | 1.00k | for (j = 0; j < input_size; j++0 ) |
181 | 1.00k | { |
182 | 1.00k | const int input = inputs[j]; |
183 | 1.00k | const int alias_ref = input >= 0 ? ccv_nnc_tensor_symbol_alias_to(dynamic_graph->tape, (ccv_nnc_tensor_symbol_t){ |
184 | 1.00k | .d = input, |
185 | 1.00k | .graph = dynamic_graph->tape |
186 | 1.00k | }).d : CCV_NNC_NO_TENSOR_SYMBOL0 ; // This could be CCV_NNC_NO_TENSOR_SYMBOL, which is negative. |
187 | | // alias_ref is either exists, or -1. |
188 | 1.00k | int flag = 0; |
189 | 2.01k | for (k = 0; !flag && k < loss_size1.00k ; k++1.00k ) |
190 | 1.00k | flag = (dloss_symbols[k].d == input || dloss_symbols[k].d == alias_ref2 ); |
191 | 1.00k | if (flag) |
192 | 1.00k | { |
193 | 1.00k | flag = 0; |
194 | 1.00k | for (k = 0; !flag && k < sources->rnum; k++2 ) |
195 | 2 | flag = (exec_idx == ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, k))->d); |
196 | 1.00k | if (!flag) |
197 | 1.00k | { |
198 | 1.00k | const ccv_nnc_graph_exec_symbol_t source = { |
199 | 1.00k | .d = exec_idx, |
200 | 1.00k | .graph = dynamic_graph->tape |
201 | 1.00k | }; |
202 | 1.00k | ccv_array_push(sources, &source); |
203 | 1.00k | } |
204 | 1.00k | break; |
205 | 1.00k | } |
206 | 1.00k | } |
207 | 1.00k | } |
208 | 1.00k | } |
209 | 1.00k | ccv_array_free(destinations); |
210 | 1.00k | int freeable_size = 0; |
211 | 1.00k | ccv_nnc_tensor_variable_t freeables[parameter_size + saved_aux_size]; |
212 | | // Bind dt tensor. |
213 | 2.00k | for (i = 0; i < parameter_size; i++1.00k ) |
214 | 1.00k | { |
215 | 1.00k | const ccv_nnc_tensor_symbol_t symbol = updated_parameter_symbols[i]; |
216 | 1.00k | if (parameters[i]->symbol.d >= 0) |
217 | 1.00k | freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(dynamic_graph, parameters[i]); |
218 | 1.00k | ccv_nnc_tensor_t* tensor = ccv_nnc_tensor_from_variable(dynamic_graph, parameters[i], stream_context); |
219 | 1.00k | const ccv_nnc_tensor_bind_t dt_bind = { |
220 | 1.00k | .symbol = symbol, |
221 | 1.00k | .tensor = tensor |
222 | 1.00k | }; |
223 | 1.00k | ccv_array_push(tensor_binds, &dt_bind); |
224 | 1.00k | } |
225 | 2.00k | for (i = 0; i < saved_aux_size; i++1.00k ) |
226 | 1.00k | { |
227 | 1.00k | const ccv_nnc_tensor_symbol_map_t symbol_map = saved_aux_symbols[i]; |
228 | 1.00k | if (saved_aux[i]->symbol.d >= 0) |
229 | 0 | freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(dynamic_graph, saved_aux[i]); |
230 | 1.00k | ccv_nnc_tensor_t* tensor = ccv_nnc_tensor_from_variable(dynamic_graph, saved_aux[i], stream_context); |
231 | 1.00k | ccv_nnc_tensor_bind_t aux_bind = { |
232 | 1.00k | .symbol = symbol_map.source, |
233 | 1.00k | .tensor = tensor |
234 | 1.00k | }; |
235 | 1.00k | ccv_array_push(tensor_binds, &aux_bind); |
236 | 1.00k | aux_bind.symbol = symbol_map.destination; |
237 | 1.00k | ccv_array_push(tensor_binds, &aux_bind); |
238 | 1.00k | } |
239 | 1.00k | ccv_nnc_dy_xpu_alloc_t xpu_alloc = { |
240 | 1.00k | .xpu_alloc = &dynamic_graph->xpu_alloc, |
241 | 1.00k | .stream = stream_context |
242 | 1.00k | }; |
243 | 1.00k | ccv_nnc_symbolic_graph_compile_param_t compile_params = { |
244 | 1.00k | .allocator = { |
245 | 1.00k | .isa = &ccv_nnc_dy_allocator_isa, |
246 | 1.00k | .context = { |
247 | 1.00k | .alloc = &xpu_alloc, |
248 | 1.00k | .free = &dynamic_graph->xpu_alloc, |
249 | 1.00k | } |
250 | 1.00k | } |
251 | 1.00k | }; |
252 | 1.00k | ccv_nnc_graph_t* graph = 0; |
253 | 1.00k | ccv_nnc_tensor_arena_t* tensor_arena = 0; |
254 | 1.00k | ccv_nnc_graph_exec_arena_t* exec_arena = 0; |
255 | 1.00k | if (dloss_optionals) |
256 | 0 | { |
257 | | // If provided df variable, no need to set to all ones. |
258 | 0 | for (i = 0; i < loss_size; i++) |
259 | 0 | { |
260 | 0 | const ccv_nnc_tensor_bind_t df_bind = { |
261 | 0 | .symbol = dloss_symbols[i], |
262 | 0 | .tensor = ccv_nnc_tensor_from_variable(dynamic_graph, dloss_optionals[i], stream_context) |
263 | 0 | }; |
264 | 0 | ccv_array_push(tensor_binds, &df_bind); |
265 | 0 | } |
266 | 0 | ccv_nnc_symbolic_graph_compile(dynamic_graph->tape, compile_params, |
267 | 0 | (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0), tensor_binds->rnum, |
268 | 0 | 0, 0, |
269 | 0 | (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, 0), sources->rnum, |
270 | 0 | update_exec_symbols, parameter_size, |
271 | 0 | &graph, &tensor_arena, &exec_arena); |
272 | 0 | ccv_array_free(sources); |
273 | 1.00k | } else { |
274 | 1.00k | int max_input_size = 1; |
275 | 1.00k | int max_output_size = 1; |
276 | 2.00k | for (i = 0; i < sources->rnum; i++1.00k ) |
277 | 1.00k | { |
278 | 1.00k | const ccv_nnc_graph_exec_symbol_t source = *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, i); |
279 | 1.00k | int input_size; int output_size; |
280 | 1.00k | ccv_nnc_graph_exec_symbol_io(dynamic_graph->tape, source, 0, &input_size, 0, &output_size); |
281 | 1.00k | max_input_size = ccv_max(input_size, max_input_size); |
282 | 1.00k | max_output_size = ccv_max(output_size, max_output_size); |
283 | 1.00k | } |
284 | 1.00k | const int max_input_bitmask_size = ((max_input_size + 63) >> 6); |
285 | 1.00k | const int max_output_bitmask_size = ((max_output_size + 63) >> 6); |
286 | 1.00k | ccv_nnc_tensor_symbol_t input_symbols[max_input_size]; |
287 | 1.00k | ccv_nnc_tensor_symbol_t output_symbols[max_output_size]; |
288 | 1.00k | uint64_t input_bitmasks[max_input_bitmask_size]; |
289 | 1.00k | uint64_t output_bitmasks[max_output_bitmask_size]; |
290 | | // Remove these if it is not needed by the cmd, for example, if absence assumed to be 1. |
291 | 2.00k | for (i = 0; i < loss_size; i++1.00k ) |
292 | 1.00k | { |
293 | 1.00k | if (!dloss_symbols[i].graph) // Skip. |
294 | 0 | continue; |
295 | 1.00k | int no_set = 0; // If we cannot find the df_symbols in all sources, we cannot predict whether it is used or not. |
296 | 2.01k | for (j = 0; j < sources->rnum; j++1.00k ) |
297 | 1.00k | { |
298 | 1.00k | const ccv_nnc_graph_exec_symbol_t source = *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, j); |
299 | 1.00k | const int* inputs; int input_size; |
300 | 1.00k | const int* outputs; int output_size; |
301 | 1.00k | ccv_nnc_graph_exec_symbol_io(dynamic_graph->tape, source, &inputs, &input_size, &outputs, &output_size); |
302 | 1.00k | const ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(dynamic_graph->tape, source); |
303 | 1.00k | int flag = 0; |
304 | 2.02k | for (k = 0; !flag && k < input_size1.02k ; k++1.01k ) |
305 | 1.01k | { |
306 | 1.01k | const int alias_ref = inputs[k] >= 0 ? ccv_nnc_tensor_symbol_alias_to(dynamic_graph->tape, (ccv_nnc_tensor_symbol_t){ |
307 | 1.00k | .d = inputs[k], |
308 | 1.00k | .graph = dynamic_graph->tape |
309 | 1.00k | }).d : CCV_NNC_NO_TENSOR_SYMBOL8 ; // This could be CCV_NNC_NO_TENSOR_SYMBOL, which is negative. |
310 | 1.01k | flag = (dloss_symbols[i].d == inputs[k] || dloss_symbols[i].d == alias_ref12 ); |
311 | 1.01k | } |
312 | 1.00k | if (flag) |
313 | 1.00k | { |
314 | 1.00k | no_set = 1; |
315 | | // Now, check to see if we can remove this symbol from this source. |
316 | 1.00k | memset(input_bitmasks, 0, sizeof(uint64_t) * ccv_max(1, ((input_size + 63) >> 6))); |
317 | 1.00k | memset(output_bitmasks, 0, sizeof(uint64_t) * ccv_max(1, ((output_size + 63) >> 6))); |
318 | 4.01k | for (k = 0; k < input_size; k++3.01k ) |
319 | 3.01k | if (inputs[k] >= 0) |
320 | 1.00k | { |
321 | 1.00k | const int alias_ref = inputs[k] >= 0 ? ccv_nnc_tensor_symbol_alias_to(dynamic_graph->tape, (ccv_nnc_tensor_symbol_t){ |
322 | 1.00k | .d = inputs[k], |
323 | 1.00k | .graph = dynamic_graph->tape |
324 | 1.00k | }).d : CCV_NNC_NO_TENSOR_SYMBOL0 ; // This could be CCV_NNC_NO_TENSOR_SYMBOL, which is negative. |
325 | 1.00k | if (dloss_symbols[i].d != inputs[k] && dloss_symbols[i].d != alias_ref0 ) |
326 | 0 | input_bitmasks[k >> 6] |= ((uint64_t)1 << (k & 63)); |
327 | 1.00k | } |
328 | 2.00k | for (k = 0; k < output_size; k++1.00k ) |
329 | 1.00k | if (outputs[k] >= 0) |
330 | 1.00k | output_bitmasks[k >> 6] |= ((uint64_t)1 << (k & 63)); |
331 | 1.00k | if (!ccv_nnc_cmd_bitmask(cmd, input_size, output_size, input_bitmasks, (input_size + 63) >> 6, output_bitmasks, (output_size + 63) >> 6)) |
332 | 1.00k | no_set = 0; |
333 | 1.00k | } |
334 | 1.00k | } |
335 | 1.00k | if (no_set) // Remove this flag from all sources and continue. |
336 | 0 | { |
337 | 0 | for (j = 0; j < sources->rnum; j++) |
338 | 0 | { |
339 | 0 | const ccv_nnc_graph_exec_symbol_t source = *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, j); |
340 | 0 | const int* inputs; int input_size; |
341 | 0 | const int* outputs; int output_size; |
342 | 0 | ccv_nnc_graph_exec_symbol_io(dynamic_graph->tape, source, &inputs, &input_size, &outputs, &output_size); |
343 | 0 | int flag = 0; |
344 | 0 | for (k = 0; !flag && k < input_size; k++) |
345 | 0 | { |
346 | 0 | const int alias_ref = inputs[k] >= 0 ? ccv_nnc_tensor_symbol_alias_to(dynamic_graph->tape, (ccv_nnc_tensor_symbol_t){ |
347 | 0 | .d = inputs[k], |
348 | 0 | .graph = dynamic_graph->tape |
349 | 0 | }).d : CCV_NNC_NO_TENSOR_SYMBOL; // This could be CCV_NNC_NO_TENSOR_SYMBOL, which is negative. |
350 | 0 | flag = (dloss_symbols[i].d == inputs[k] || dloss_symbols[i].d == alias_ref); |
351 | 0 | } |
352 | 0 | if (flag) |
353 | 0 | { |
354 | 0 | for (k = 0; k < input_size; k++) |
355 | 0 | if (inputs[k] >= 0) |
356 | 0 | { |
357 | 0 | const int alias_ref = inputs[k] >= 0 ? ccv_nnc_tensor_symbol_alias_to(dynamic_graph->tape, (ccv_nnc_tensor_symbol_t){ |
358 | 0 | .d = inputs[k], |
359 | 0 | .graph = dynamic_graph->tape |
360 | 0 | }).d : CCV_NNC_NO_TENSOR_SYMBOL; // This could be CCV_NNC_NO_TENSOR_SYMBOL, which is negative. |
361 | 0 | const int no_symbol = (dloss_symbols[i].d == inputs[k] || dloss_symbols[i].d == alias_ref); |
362 | 0 | input_symbols[k] = (ccv_nnc_tensor_symbol_t){ |
363 | 0 | .d = no_symbol ? CCV_NNC_NO_TENSOR_SYMBOL : inputs[k], |
364 | 0 | .graph = no_symbol ? 0 : dynamic_graph->tape, |
365 | 0 | }; |
366 | 0 | } else { |
367 | 0 | input_symbols[k] = (ccv_nnc_tensor_symbol_t){ |
368 | 0 | .d = inputs[k], |
369 | 0 | .graph = inputs[k] != CCV_NNC_NO_TENSOR_SYMBOL ? dynamic_graph->tape : 0, |
370 | 0 | }; |
371 | 0 | } |
372 | 0 | for (k = 0; k < output_size; k++) |
373 | 0 | output_symbols[k] = (ccv_nnc_tensor_symbol_t){ |
374 | 0 | .d = outputs[k], |
375 | 0 | .graph = outputs[k] != CCV_NNC_NO_TENSOR_SYMBOL ? dynamic_graph->tape : 0, |
376 | 0 | }; |
377 | 0 | ccv_nnc_graph_exec_symbol_set_io(dynamic_graph->tape, source, input_symbols, input_size, output_symbols, output_size); |
378 | 0 | } |
379 | 0 | } |
380 | 0 | dloss_symbols[i].graph = 0; |
381 | 0 | } |
382 | 1.00k | } |
383 | | // Aggregate them into one set command. |
384 | 1.00k | ccv_nnc_tensor_symbol_t dloss_symbols_0[loss_size]; |
385 | 1.00k | ccv_nnc_graph_exec_symbol_t set_ones[loss_size]; |
386 | 1.00k | int set_one_size = 0; |
387 | 2.00k | for (i = 0; i < loss_size;) |
388 | 1.00k | if (!dloss_symbols[i].graph) // Skip. |
389 | 0 | ++i; |
390 | 1.00k | else { |
391 | 1.00k | dloss_symbols_0[0] = dloss_symbols[i]; |
392 | 1.00k | k = 1; |
393 | 1.00k | int idx = loss_size; |
394 | 1.00k | const ccv_nnc_tensor_param_t params_0 = ccv_nnc_tensor_symbol_params(dynamic_graph->tape, dloss_symbols_0[0]); |
395 | 1.00k | for (j = i + 1; j < loss_size; j++2 ) |
396 | 2 | if (dloss_symbols[j].graph) |
397 | 2 | { |
398 | 2 | const ccv_nnc_tensor_param_t params_j = ccv_nnc_tensor_symbol_params(dynamic_graph->tape, dloss_symbols[j]); |
399 | 2 | if (params_j.type != params_0.type) |
400 | 2 | { |
401 | 2 | if (idx == loss_size) |
402 | 2 | idx = j; |
403 | 2 | } else { |
404 | 0 | dloss_symbols_0[k++] = dloss_symbols[j]; |
405 | 0 | assert(dloss_symbols[j].graph == dynamic_graph->tape); |
406 | 0 | dloss_symbols[j].graph = 0; |
407 | 0 | } |
408 | 2 | } |
409 | 1.00k | i = idx; |
410 | 1.00k | set_ones[set_one_size] = ccv_nnc_graph_exec_symbol_new(dynamic_graph->tape, CMD_SET_FORWARD(1), 0, 0, dloss_symbols_0, k, 0); |
411 | 2.01k | for (j = 0; j < sources->rnum; j++1.00k ) |
412 | 1.00k | ccv_nnc_graph_exec_symbol_concat(dynamic_graph->tape, set_ones[set_one_size], *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, j)); |
413 | 1.00k | ++set_one_size; |
414 | 1.00k | } |
415 | | // Reset it back. |
416 | 2.00k | for (i = 0; 1.00k i < loss_size; i++1.00k ) |
417 | 1.00k | dloss_symbols[i].graph = dynamic_graph->tape; |
418 | 1.00k | if (set_one_size > 0) |
419 | 1.00k | { |
420 | 1.00k | ccv_nnc_symbolic_graph_compile(dynamic_graph->tape, compile_params, |
421 | 1.00k | (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0), tensor_binds->rnum, |
422 | 1.00k | 0, 0, |
423 | 1.00k | set_ones, set_one_size, |
424 | 1.00k | update_exec_symbols, parameter_size, |
425 | 1.00k | &graph, &tensor_arena, &exec_arena); |
426 | 1.00k | } else { |
427 | 0 | ccv_nnc_symbolic_graph_compile(dynamic_graph->tape, compile_params, |
428 | 0 | (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0), tensor_binds->rnum, |
429 | 0 | 0, 0, |
430 | 0 | (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources, 0), sources->rnum, |
431 | 0 | update_exec_symbols, parameter_size, |
432 | 0 | &graph, &tensor_arena, &exec_arena); |
433 | 0 | } |
434 | 1.00k | ccv_array_free(sources); |
435 | 2.00k | for (i = 0; i < set_one_size; i++1.00k ) |
436 | 1.00k | ccv_nnc_graph_exec_symbol_free(dynamic_graph->tape, set_ones[i]); |
437 | 1.00k | } |
438 | 1.00k | ccv_array_free(tensor_binds); |
439 | | // Remove newly added symbols to restore the graph. |
440 | 15.0k | for (i = 0; i < symbol_stack->rnum; i++14.0k ) |
441 | 14.0k | { |
442 | 14.0k | const ccv_nnc_tape_symbol_t* const symbol = (ccv_nnc_tape_symbol_t*)ccv_array_get(symbol_stack, i); |
443 | 14.0k | if (symbol->type == CCV_NNC_SYMBOL_TENSOR || symbol->type == CCV_NNC_SYMBOL_TENSOR_ALIAS5.01k ) |
444 | 9.01k | ccv_nnc_tensor_symbol_free(dynamic_graph->tape, (ccv_nnc_tensor_symbol_t){ |
445 | 9.01k | .d = symbol->d, |
446 | 9.01k | .graph = dynamic_graph->tape |
447 | 9.01k | }); |
448 | 5.01k | else if (symbol->type == CCV_NNC_SYMBOL_GRAPH_EXEC) |
449 | 5.01k | ccv_nnc_graph_exec_symbol_free(dynamic_graph->tape, (ccv_nnc_graph_exec_symbol_t){ |
450 | 5.01k | .d = symbol->d, |
451 | 5.01k | .graph = dynamic_graph->tape |
452 | 5.01k | }); |
453 | 14.0k | } |
454 | 1.00k | ccv_array_free(symbol_stack); |
455 | 1.00k | if (stream_context) |
456 | 1 | { |
457 | 1 | ccv_nnc_graph_set_default_static_schedule(graph, ccv_nnc_stream_context_type(stream_context), dynamic_graph->max_stream_count); |
458 | 1 | ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, stream_context); |
459 | 1 | ccv_nnc_tensor_arena_buffer_free(tensor_arena); |
460 | 1 | ccv_nnc_compilation_artifact_t* const artifact = ccv_nnc_compilation_artifact_new(graph, tensor_arena, exec_arena); |
461 | 1 | ccv_nnc_stream_context_add_callback(stream_context, (ccv_nnc_callback_f)ccv_nnc_compilation_artifact_free, artifact); |
462 | 1.00k | } else { |
463 | 1.00k | if (parallel > 1) |
464 | 1 | { |
465 | 1 | int flag = 0; |
466 | 2 | for (i = 0; !flag && i < parameter_size1 ; i++1 ) |
467 | 1 | flag = (CCV_TENSOR_GET_MEMORY(parameters[i]->info.type) == CCV_TENSOR_GPU_MEMORY); |
468 | 1 | const int stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU0 ; |
469 | 1 | ccv_nnc_graph_set_default_static_schedule(graph, stream_type, dynamic_graph->max_stream_count); |
470 | 1 | ccv_nnc_stream_context_t* const default_stream = ccv_nnc_graph_default_stream(graph); |
471 | 1 | ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, default_stream); |
472 | 1 | ccv_nnc_stream_context_wait(default_stream); |
473 | 1 | } else |
474 | 1.00k | ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); |
475 | 1.00k | ccv_nnc_graph_free(graph); |
476 | 1.00k | ccv_nnc_tensor_arena_free(tensor_arena); |
477 | 1.00k | ccv_nnc_graph_exec_arena_free(exec_arena); |
478 | 1.00k | } |
479 | | // Now, able to free some of the reused outputs. This need to be the last step otherwise some of the exec symbols |
480 | | // above may be freed by this operation. |
481 | 2.00k | for (i = 0; i < freeable_size; i++1.00k ) |
482 | 1.00k | ccv_nnc_tensor_variable_free(dynamic_graph, freeables[i]); |
483 | 1.00k | } |