/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_dynamic_graph.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #include "ccv_nnc_easy.h" |
5 | | #include "ccv_internal.h" |
6 | | #include "_ccv_nnc_dynamic_graph.h" |
7 | | |
8 | | // MARK - Level-4 API |
9 | | |
10 | | ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void) |
11 | 53 | { |
12 | 53 | ccv_nnc_dynamic_graph_t* graph = ccmalloc(sizeof(ccv_nnc_dynamic_graph_t)); |
13 | 53 | graph->no_grad = 0; |
14 | 53 | graph->reuse_var = -1; |
15 | 53 | graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0); |
16 | 53 | graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0); |
17 | 53 | graph->tape = ccv_nnc_symbolic_graph_new(); |
18 | 53 | graph->xpu_alloc.mp_hdr = -1; |
19 | 53 | graph->xpu_alloc.freed = kh_init(dy_str); |
20 | 53 | graph->xpu_alloc.allocd = kh_init(dy_alloc); |
21 | | // These may not be used as frequent, init as needed. |
22 | 53 | graph->stateful_execs = 0; |
23 | 53 | graph->reuse_stateful_exec = -1; |
24 | 53 | graph->stream_map = 0; |
25 | 53 | graph->ws = 0; |
26 | 53 | return graph; |
27 | 53 | } |
28 | | |
29 | | static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing) |
30 | 32.3k | { |
31 | 32.3k | const int index = tensor_variable->index; |
32 | 32.3k | if (tensor_variable->tensor_view) |
33 | 14.6k | { |
34 | 14.6k | if (tensor_variable->destructor_hook.func) |
35 | 4 | tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context); |
36 | 14.6k | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view)) |
37 | 14.2k | { |
38 | 14.2k | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)) |
39 | 7 | ccv_nnc_tensor_view_free(tensor_variable->tensor_view); |
40 | 14.2k | else { |
41 | 14.2k | if (!tensor_variable->alias_index_ref && // Return this memory to the graph. |
42 | 14.2k | CCV_TENSOR_GET_MEMORY14.1k (tensor_variable->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY14.1k && tensor_variable->tensor_view->data.u8419 ) |
43 | 419 | ccv_nnc_xpu_free(&graph->xpu_alloc, tensor_variable->tensor_view->data.u8); |
44 | 14.2k | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); |
45 | 14.2k | } |
46 | 14.2k | } |
47 | 14.6k | } |
48 | 32.3k | ccfree(tensor_variable); |
49 | 32.3k | if (zeroing) |
50 | 32.1k | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index) = 0; |
51 | 32.3k | int i; |
52 | 64.4k | for (i = graph->vars->rnum - 1; i >= 0; i--32.1k ) |
53 | 64.2k | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) != 0) |
54 | 32.1k | { |
55 | 32.1k | graph->vars->rnum = i + 1; |
56 | 32.1k | break; |
57 | 32.1k | } |
58 | 32.3k | if (index < graph->vars->rnum && |
59 | 32.3k | (23.3k index < graph->reuse_var23.3k || graph->reuse_var < 017.2k )) |
60 | 11.9k | graph->reuse_var = index; |
61 | 20.4k | else if (graph->reuse_var >= graph->vars->rnum) |
62 | 5.62k | graph->reuse_var = -1; |
63 | 32.3k | } |
64 | | |
65 | | static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing) |
66 | 27.7k | { |
67 | 27.7k | bind->index = CCV_NNC_TENSOR_NO_VARIABLE; |
68 | 27.7k | if (bind->sources) |
69 | 15.3k | ccv_array_free(bind->sources); |
70 | 27.7k | if (bind->destinations) |
71 | 23.0k | ccv_array_free(bind->destinations); |
72 | 27.7k | if (bind->tensor_view) |
73 | 17.6k | { |
74 | 17.6k | if (bind->destructor_hook.func) |
75 | 3 | bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context); |
76 | 17.6k | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view)) |
77 | 17.4k | { |
78 | 17.4k | if (CCV_IS_TENSOR_VIEW(bind->tensor_view)) |
79 | 1 | ccv_nnc_tensor_view_free(bind->tensor_view); |
80 | 17.4k | else { |
81 | 17.4k | if (!bind->alias_ref && // Return this memory to the graph. |
82 | 17.4k | CCV_TENSOR_GET_MEMORY16.4k (bind->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY16.4k && bind->tensor_view->data.u838 ) |
83 | 38 | ccv_nnc_xpu_free(&graph->xpu_alloc, bind->tensor_view->data.u8); |
84 | 17.4k | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view); |
85 | 17.4k | } |
86 | 17.4k | } |
87 | 17.6k | } |
88 | 27.7k | if (zeroing) |
89 | 27.4k | { |
90 | 27.4k | bind->sources = 0; |
91 | 27.4k | bind->destinations = 0; |
92 | 27.4k | bind->tensor_view = 0; |
93 | 27.4k | bind->destructor_hook.func = 0; |
94 | 27.4k | bind->destructor_hook.context = 0; |
95 | 27.4k | } |
96 | 27.7k | } |
97 | | |
98 | | void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph) |
99 | 53 | { |
100 | 53 | int i; |
101 | 333 | for (i = 0; i < graph->vars->rnum; i++280 ) |
102 | 280 | { |
103 | 280 | ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i); |
104 | 280 | if (tensor_variable) |
105 | 227 | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0); |
106 | 280 | } |
107 | 53 | ccv_array_free(graph->vars); |
108 | 398 | for (i = 0; i < graph->binds->rnum; i++345 ) |
109 | 345 | _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i), 0); |
110 | 53 | ccv_array_free(graph->binds); |
111 | 53 | ccv_nnc_symbolic_graph_free(graph->tape); |
112 | 53 | if (graph->ws) |
113 | 36 | ccv_array_free(graph->ws); |
114 | 53 | if (graph->stateful_execs) |
115 | 12 | { |
116 | 33 | for (i = 0; i < graph->stateful_execs->rnum; i++21 ) |
117 | 21 | { |
118 | 21 | ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i); |
119 | 21 | if (stateful_exec) |
120 | 8 | ccfree(stateful_exec); |
121 | 21 | } |
122 | 12 | ccv_array_free(graph->stateful_execs); |
123 | 12 | } |
124 | 53 | if (graph->stream_map) |
125 | 10 | { |
126 | 10 | khiter_t k; |
127 | 58 | for (k = kh_begin10 (graph->stream_map); k != kh_end(graph->stream_map); ++k48 ) |
128 | 48 | { |
129 | 48 | if (!kh_exist(graph->stream_map, k)) |
130 | 25 | continue; |
131 | 23 | ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k); |
132 | 23 | ccv_nnc_stream_context_free(stream); |
133 | 23 | } |
134 | 10 | kh_destroy(stream_map, graph->stream_map); |
135 | 10 | } |
136 | 53 | ccv_nnc_xpu_alloc_destroy(&graph->xpu_alloc); |
137 | 53 | ccfree(graph); |
138 | 53 | } |
139 | | |
140 | | void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor) |
141 | 655 | { |
142 | 655 | assert(!tensor_variable->alias_index_ref); |
143 | 655 | if (tensor_variable->tensor_view && !2 CCV_NNC_IS_EXTERN_TENSOR_VIEW2 (tensor_variable->tensor_view)) |
144 | 0 | { |
145 | 0 | assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)); |
146 | 0 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); |
147 | 0 | } |
148 | 655 | tensor_variable->info = tensor->info; |
149 | 655 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1); |
150 | 655 | } |
151 | | |
152 | | void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context) |
153 | 7 | { |
154 | 7 | tensor_variable->destructor_hook.func = func; |
155 | 7 | tensor_variable->destructor_hook.context = context; |
156 | 7 | } |
157 | | |
158 | | inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info) |
159 | 31.3k | { |
160 | 31.3k | tensor_variable->alias_index_ref = 0; |
161 | 31.3k | tensor_variable->alias_off = 0; |
162 | 31.3k | tensor_variable->destructor_hook.func = 0; |
163 | 31.3k | tensor_variable->destructor_hook.context = 0; |
164 | 31.3k | tensor_variable->info = info; |
165 | 31.3k | tensor_variable->symbol = NO_TENSOR_SYMBOL; |
166 | 31.3k | tensor_variable->tensor_view = 0; |
167 | 31.3k | if (graph->reuse_var >= 0) |
168 | 802 | { |
169 | 802 | const int reuse_var = graph->reuse_var; |
170 | 802 | assert(reuse_var < graph->vars->rnum); |
171 | 802 | tensor_variable->index = reuse_var; |
172 | 802 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = tensor_variable; |
173 | 802 | int i; |
174 | 802 | graph->reuse_var = -1; |
175 | 1.43k | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 01.02k ; i++631 ) |
176 | 631 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0) |
177 | 591 | graph->reuse_var = i; |
178 | 30.5k | } else { |
179 | 30.5k | tensor_variable->index = graph->vars->rnum; |
180 | 30.5k | ccv_array_push(graph->vars, &tensor_variable); |
181 | 30.5k | } |
182 | 31.3k | } |
183 | | |
184 | | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) |
185 | 31.2k | { |
186 | 31.2k | ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); |
187 | 31.2k | tensor_variable->type = CCV_NNC_TENSOR_VARIABLE; |
188 | 31.2k | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); |
189 | 31.2k | return tensor_variable; |
190 | 31.2k | } |
191 | | |
192 | | ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) |
193 | 37 | { |
194 | 37 | ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); |
195 | 37 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; |
196 | 37 | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); |
197 | 37 | return tensor_variable; |
198 | 37 | } |
199 | | |
200 | | int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) |
201 | 0 | { |
202 | 0 | return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT; |
203 | 0 | } |
204 | | |
205 | | ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) |
206 | 0 | { |
207 | 0 | return tensor_variable->info; |
208 | 0 | } |
209 | | |
210 | | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info) |
211 | 1.04k | { |
212 | 1.04k | ccv_nnc_tensor_variable_t variable_alias = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); |
213 | 1.04k | variable_alias->type = tensor_variable->type; |
214 | | // If the tensor variable is an alias itself, we point directly to its original. |
215 | 1.04k | if (tensor_variable->alias_index_ref) |
216 | 1 | { |
217 | 1 | variable_alias->alias_index_ref = tensor_variable->alias_index_ref; |
218 | | // The tensor variable need to be fully specified if I am doing alias an alias. |
219 | 1 | assert(!ccv_nnc_is_tensor_auto(tensor_variable->info)); |
220 | 1 | int i; |
221 | 1 | int no_stride = 1; |
222 | 2 | for (i = 0; no_stride && i < 1 CCV_NNC_MAX_DIM_ALLOC1 ; i++1 ) |
223 | 1 | no_stride = (tensor_variable->stride[i] == 0); |
224 | 1 | int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC]; |
225 | 1 | int* to_stride; |
226 | 1 | if (no_stride) |
227 | 0 | { |
228 | 0 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, stride_from_dim); |
229 | 0 | to_stride = stride_from_dim; |
230 | 0 | } else |
231 | 1 | to_stride = tensor_variable->stride; |
232 | | // If we provide stride, or reshape to a different size, assert the tensor variable itself is contiguous (otherwise we cannot satisfy the reshape requirements). |
233 | 1 | const int different_dim = ccv_nnc_tensor_nd(info.dim) != ccv_nnc_tensor_nd(tensor_variable->info.dim); |
234 | 1 | if (different_dim || (0 stride[0] != 00 && memcmp(stride, to_stride, sizeof(int) * 0 CCV_NNC_MAX_DIM_ALLOC0 ) != 0)) |
235 | 1 | { assert(ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, to_stride)); } |
236 | | // Need to compute alias off, that is the alias off of the tensor variable plus its ofs. |
237 | 1 | const off_t off = ccv_nnc_tensor_view_offset(tensor_variable->info.datatype, to_stride, tensor_variable->ofs); |
238 | 1 | variable_alias->alias_off = tensor_variable->alias_off + off; |
239 | | // If we don't provide stride, copy the stride from previous variable. |
240 | 1 | if (stride[0] == 0) |
241 | 0 | { |
242 | 0 | if (different_dim) |
243 | 0 | ccv_nnc_tensor_get_stride(info.dim, variable_alias->stride); |
244 | 0 | else |
245 | 0 | memcpy(variable_alias->stride, to_stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
246 | 0 | } else |
247 | 1 | memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
248 | 1.04k | } else { |
249 | 1.04k | variable_alias->alias_index_ref = tensor_variable->index + 1; |
250 | 1.04k | variable_alias->alias_off = 0; |
251 | 1.04k | memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
252 | 1.04k | } |
253 | 1.04k | variable_alias->info = info; |
254 | 1.04k | variable_alias->symbol = NO_TENSOR_SYMBOL; |
255 | 1.04k | variable_alias->destructor_hook.func = 0; |
256 | 1.04k | variable_alias->destructor_hook.context = 0; |
257 | 1.04k | variable_alias->tensor_view = 0; |
258 | 1.04k | memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
259 | 1.04k | if (graph->reuse_var >= 0) |
260 | 0 | { |
261 | 0 | const int reuse_var = graph->reuse_var; |
262 | 0 | assert(reuse_var < graph->vars->rnum); |
263 | 0 | variable_alias->index = reuse_var; |
264 | 0 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = variable_alias; |
265 | 0 | int i; |
266 | 0 | graph->reuse_var = -1; |
267 | 0 | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++) |
268 | 0 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0) |
269 | 0 | graph->reuse_var = i; |
270 | 1.04k | } else { |
271 | 1.04k | variable_alias->index = graph->vars->rnum; |
272 | 1.04k | ccv_array_push(graph->vars, &variable_alias); |
273 | 1.04k | } |
274 | 1.04k | return variable_alias; |
275 | 1.04k | } |
276 | | |
277 | | int ccv_nnc_tensor_variable_alias_params(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, int ofs[CCV_NNC_MAX_DIM_ALLOC], int stride[CCV_NNC_MAX_DIM_ALLOC]) |
278 | 0 | { |
279 | 0 | if (!tensor_variable->alias_index_ref) |
280 | 0 | return -1; |
281 | 0 | if (ofs) |
282 | 0 | memcpy(ofs, tensor_variable->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
283 | 0 | if (stride) |
284 | 0 | memcpy(stride, tensor_variable->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
285 | 0 | return 0; |
286 | 0 | } |
287 | | |
288 | | ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context) |
289 | 69.1k | { |
290 | 69.1k | if (tensor_variable->tensor_view) |
291 | 37.4k | { |
292 | 37.4k | if (tensor_variable->alias_index_ref) |
293 | 1.03k | { |
294 | 1.03k | const int alias_index = tensor_variable->alias_index_ref - 1; |
295 | 1.03k | assert(alias_index >= 0); |
296 | 1.03k | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index); |
297 | 1.03k | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)) |
298 | 12 | { |
299 | 12 | ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view; |
300 | | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. |
301 | 12 | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)); |
302 | | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. |
303 | 12 | ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tv->off + tensor_variable->alias_off, &tv->data, &tv->dataof); |
304 | 1.02k | } else { |
305 | 1.02k | ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view; |
306 | | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. |
307 | 1.02k | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)); |
308 | | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. |
309 | 1.02k | ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->alias_off, &tv->data, &tv->dataof); |
310 | 1.02k | } |
311 | 1.03k | } |
312 | 37.4k | return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view); |
313 | 37.4k | } |
314 | 31.7k | if (!tensor_variable->alias_index_ref) |
315 | 30.6k | { |
316 | | // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0. |
317 | 30.6k | if (ccv_nnc_is_tensor_auto(tensor_variable->info)) |
318 | 0 | return 0; |
319 | 30.6k | void* ptr = 0; |
320 | 30.6k | const size_t data_size = ccv_nnc_tensor_data_size(tensor_variable->info); |
321 | 30.6k | if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type) == CCV_TENSOR_GPU_MEMORY && data_size > 0457 ) |
322 | 457 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type), stream_context, data_size); |
323 | 30.6k | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0); |
324 | 30.6k | if (tensor_variable->info.dim[0] > 0) |
325 | 30.6k | { assert(tensor_variable->tensor_view->data.u8); } |
326 | 30.6k | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; |
327 | 30.6k | } |
328 | 1.04k | const int alias_index = tensor_variable->alias_index_ref - 1; |
329 | 1.04k | assert(alias_index >= 0); |
330 | 1.04k | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index); |
331 | 1.04k | assert(!variable_to->alias_index_ref); |
332 | 1.04k | if (!variable_to->tensor_view) |
333 | 3 | { |
334 | | // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0. |
335 | 3 | if (ccv_nnc_is_tensor_auto(variable_to->info)) |
336 | 0 | return 0; |
337 | 3 | void* ptr = 0; |
338 | 3 | assert(variable_to->info.type == tensor_variable->info.type); |
339 | 3 | const size_t data_size = ccv_nnc_tensor_data_size(variable_to->info); |
340 | 3 | if (CCV_TENSOR_GET_MEMORY(variable_to->info.type) == CCV_TENSOR_GPU_MEMORY && data_size > 00 ) |
341 | 0 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type), stream_context, data_size); |
342 | 3 | variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0); |
343 | 3 | assert(variable_to->tensor_view->data.u8); |
344 | 3 | } |
345 | 1.04k | int i; |
346 | 1.04k | int no_ofs = 1; |
347 | 13.5k | for (i = 0; no_ofs && i < 13.5k CCV_NNC_MAX_DIM_ALLOC13.5k ; i++12.4k ) |
348 | 12.4k | no_ofs = (tensor_variable->ofs[i] == 0); |
349 | 1.04k | int no_stride = 1; |
350 | 2.46k | for (i = 0; no_stride && i < 1.45k CCV_NNC_MAX_DIM_ALLOC1.45k ; i++1.42k ) |
351 | 1.42k | no_stride = (tensor_variable->stride[i] == 0); |
352 | 1.04k | int stride_is_packed = no_stride; |
353 | 1.04k | if (!no_stride) // We have stride, now if it is packed. |
354 | 1.01k | stride_is_packed = ccv_nnc_is_tensor_stride_packed(tensor_variable->stride, tensor_variable->info.dim); |
355 | 1.04k | assert(CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info)); |
356 | | // Allowing vector type to be normal tensor, rather than a tensor view. We cannot have any offset though. |
357 | 1.04k | if (no_ofs && !stride_is_packed1.03k ) |
358 | 3 | stride_is_packed = ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, tensor_variable->stride); |
359 | 1.04k | if (no_ofs && stride_is_packed1.03k ) |
360 | 1.03k | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->info, 0); |
361 | 8 | else { |
362 | 8 | if (no_stride) |
363 | 1 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride); |
364 | 8 | tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view), tensor_variable->info, tensor_variable->ofs, tensor_variable->stride); |
365 | 8 | } |
366 | 1.04k | if (tensor_variable->alias_off) |
367 | 1 | ccv_nnc_tensor_data_add(tensor_variable->tensor_view->info, tensor_variable->alias_off, &tensor_variable->tensor_view->data, &tensor_variable->tensor_view->dataof); |
368 | 1.04k | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; |
369 | 1.04k | } |
370 | | |
371 | | static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol) |
372 | 27.6k | { |
373 | 27.6k | if (symbol.d >= graph->binds->rnum) |
374 | 345 | { |
375 | 345 | const int rnum = graph->binds->rnum; |
376 | 345 | ccv_array_resize(graph->binds, symbol.d + 1); |
377 | 345 | int i; |
378 | 690 | for (i = rnum; i < graph->binds->rnum; i++345 ) |
379 | 345 | ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i))->index = CCV_NNC_TENSOR_NO_VARIABLE; |
380 | 345 | } |
381 | 27.6k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d); |
382 | 27.6k | bind->type = tensor_variable->type; |
383 | 27.6k | bind->index = tensor_variable->index; |
384 | 27.6k | if (tensor_variable->alias_index_ref) |
385 | 1.04k | { |
386 | 1.04k | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ |
387 | 1.04k | .d = symbol.d, |
388 | 1.04k | .graph = graph->tape |
389 | 1.04k | }); |
390 | 1.04k | assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum); |
391 | 1.04k | bind->alias_ref = alias_to.d + 1; |
392 | 1.04k | } else |
393 | 26.5k | bind->alias_ref = 0; |
394 | 27.6k | if (bind->sources) |
395 | 0 | ccv_array_free(bind->sources); |
396 | 27.6k | bind->sources = 0; |
397 | 27.6k | if (bind->destinations) |
398 | 0 | ccv_array_free(bind->destinations); |
399 | 27.6k | bind->destinations = 0; |
400 | 27.6k | bind->destructor_hook.func = 0; |
401 | 27.6k | bind->destructor_hook.context = 0; |
402 | 27.6k | bind->tensor_view = 0; |
403 | 27.6k | } |
404 | | |
405 | | static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) |
406 | 44.5k | { |
407 | 44.5k | if (tensor_variable->symbol.d >= 0) |
408 | 16.9k | return tensor_variable->symbol; |
409 | 27.6k | if (!tensor_variable->alias_index_ref) |
410 | 26.5k | { |
411 | 26.5k | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0); |
412 | 26.5k | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); |
413 | 26.5k | return symbol; |
414 | 26.5k | } |
415 | 1.04k | const int alias_index = tensor_variable->alias_index_ref - 1; |
416 | 1.04k | assert(alias_index >= 0); |
417 | 1.04k | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index); |
418 | 1.04k | assert(!variable_to->alias_index_ref); |
419 | 1.04k | int no_stride = 1; |
420 | 1.04k | int i; |
421 | 2.43k | for (i = 0; no_stride && i < 1.42k CCV_NNC_MAX_DIM_ALLOC1.42k ; i++1.39k ) |
422 | 1.39k | no_stride = (tensor_variable->stride[i] == 0); |
423 | 1.04k | if (no_stride) |
424 | 32 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride); |
425 | 1.04k | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, tensor_variable->stride, tensor_variable->info, 0); |
426 | 1.04k | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); |
427 | 1.04k | return symbol; |
428 | 1.04k | } |
429 | | |
430 | | // Return the tensor variable that is old (the provided tensor variable will have a new setting). |
431 | | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable) |
432 | 6.28k | { |
433 | 6.28k | struct ccv_nnc_tensor_variable_s x = *tensor_variable; |
434 | 6.28k | ccv_nnc_tensor_variable_t new_variable; |
435 | | // Need to handle alias. |
436 | 6.28k | if (x.alias_index_ref) |
437 | 0 | new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1), x.ofs, x.stride, x.info); |
438 | 6.28k | else |
439 | 6.28k | new_variable = ccv_nnc_tensor_variable_new(graph, x.info); |
440 | 6.28k | *tensor_variable = *new_variable; |
441 | 6.28k | *new_variable = x; |
442 | | // The index should be the same though. |
443 | 6.28k | const int index = new_variable->index; |
444 | 6.28k | new_variable->index = tensor_variable->index; |
445 | 6.28k | if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) |
446 | 2.84k | { |
447 | 2.84k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d); |
448 | 2.84k | bind->index = new_variable->index; |
449 | 2.84k | } |
450 | 6.28k | tensor_variable->index = index; |
451 | 6.28k | return new_variable; |
452 | 6.28k | } |
453 | | |
454 | | void ccv_nnc_dynamic_graph_set_max_concurrency(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int max_stream_count) |
455 | 0 | { |
456 | 0 | dynamic_graph->max_stream_count = max_stream_count; |
457 | 0 | } |
458 | | |
459 | | int ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad) |
460 | 9 | { |
461 | 9 | if (dynamic_graph->no_grad == no_grad) |
462 | 0 | return -1; |
463 | 9 | dynamic_graph->no_grad = no_grad; |
464 | 9 | return 0; |
465 | 9 | } |
466 | | |
467 | | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type) |
468 | 72 | { |
469 | 72 | if (!graph->stream_map) |
470 | 10 | graph->stream_map = kh_init(stream_map); |
471 | 72 | int ret = 0; |
472 | 72 | khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret); |
473 | 72 | assert(ret >= 0); |
474 | 72 | ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k); |
475 | | // If ret == 0, the key already exist, we can return directly, otherwise, create and return. |
476 | 72 | if (ret != 0) |
477 | 23 | { |
478 | 23 | stream = ccv_nnc_stream_context_new(type); |
479 | 23 | kh_val(graph->stream_map, k) = stream; |
480 | 23 | } |
481 | 72 | return stream; |
482 | 72 | } |
483 | | |
484 | | typedef struct { |
485 | | ccv_nnc_dynamic_graph_t* graph; |
486 | | int stream_type; |
487 | | } ccv_nnc_dynamic_graph_neighbor_context_discovery_t; |
488 | | |
489 | | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context) |
490 | 0 | { |
491 | 0 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context; |
492 | 0 | int type = discovery->stream_type; |
493 | 0 | CCV_STREAM_SET_DEVICE_ID(type, device_id); |
494 | 0 | return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type); |
495 | 0 | } |
496 | | |
497 | | void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs) |
498 | 15.3k | { |
499 | 15.3k | int i, j; |
500 | 43.5k | for (i = 0; i < input_size; i++28.1k ) |
501 | 28.1k | if (inputs[i] && !inputs[i]->alias_index_ref28.1k ) |
502 | 27.1k | { assert(inputs[i]->tensor_view); } |
503 | 15.3k | ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)]; |
504 | 43.5k | for (i = 0; i < input_size; i++28.1k ) |
505 | 28.1k | input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context) : 02 ; |
506 | 15.3k | ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)]; |
507 | 43.5k | for (i = 0; i < input_size; i++28.1k ) |
508 | 28.1k | input_symbols[i] = inputs[i] ? _ccv_nnc_tensor_symbol_from_variable(graph, inputs[i])28.1k : NO_TENSOR_SYMBOL2 ; |
509 | 15.3k | ccv_array_t* input_sources[ccv_max(1, input_size)]; |
510 | 15.3k | ccv_array_t* input_alias_sources[ccv_max(1, input_size)]; |
511 | 43.5k | for (i = 0; i < input_size; i++28.1k ) |
512 | 28.1k | { |
513 | 28.1k | input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? ((ccv_nnc_tensor_variable_graph_bind_t*)28.1k ccv_array_get28.1k (graph->binds, input_symbols[i].d))->sources : 02 ; |
514 | 28.1k | if (inputs[i] && inputs[i]->alias_index_ref28.1k ) |
515 | 1.03k | { |
516 | 1.03k | const int alias_index_ref = inputs[i]->alias_index_ref - 1; |
517 | 1.03k | assert(alias_index_ref >= 0); |
518 | 1.03k | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref); |
519 | 1.03k | input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d))->sources; |
520 | 1.03k | } else |
521 | 27.1k | input_alias_sources[i] = 0; |
522 | 28.1k | } |
523 | 15.3k | const int parallel_count = ccv_max(1, parallel); |
524 | 15.3k | assert(input_size % parallel_count == 0); |
525 | 15.3k | const int per_input_size = input_size / parallel_count; |
526 | 15.3k | assert(output_size % parallel_count == 0); |
527 | 15.3k | const int per_output_size = output_size / parallel_count; |
528 | 15.3k | int output_auto = 0; |
529 | 30.9k | for (i = 0; !output_auto && i < output_size16.3k ; i++15.6k ) |
530 | 15.6k | output_auto = outputs[i] ? ccv_nnc_is_tensor_auto(outputs[i]->info)15.4k : 0200 ; |
531 | | // One extra step, infer the parameters for outputs. |
532 | 15.3k | if (output_auto) |
533 | 14.6k | { |
534 | 14.6k | ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)]; |
535 | 14.6k | ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)]; |
536 | 29.4k | for (i = 0; i < parallel_count; i++14.7k ) |
537 | 14.7k | { |
538 | 41.9k | for (j = 0; j < per_input_size; j++27.2k ) |
539 | 27.2k | input_params[j] = inputs[j + i * per_input_size] ? inputs[j + i * per_input_size]->info27.2k : ccv_nnc_tensor_auto2 ; |
540 | 29.6k | for (j = 0; j < per_output_size; j++14.9k ) |
541 | 14.9k | output_params[j] = outputs[j + i * per_output_size] ? outputs[j + i * per_output_size]->info14.7k : ccv_nnc_tensor_auto207 ; |
542 | 14.7k | ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size); |
543 | 29.6k | for (j = 0; j < per_output_size; j++14.9k ) |
544 | 14.9k | if (outputs[j + i * per_output_size]) |
545 | 14.7k | outputs[j + i * per_output_size]->info = output_params[j]; |
546 | 14.7k | } |
547 | 14.6k | } |
548 | 15.3k | int freeable_size = 0; |
549 | 15.3k | ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)]; |
550 | | // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee. |
551 | 31.0k | for (i = 0; i < output_size; i++15.6k ) |
552 | 15.6k | { |
553 | | // First, go over to see whether there is enforce inplace. |
554 | 15.6k | int enforce_idx = -1; |
555 | 44.7k | for (j = 0; enforce_idx < 0 && j < input_size44.7k ; j++29.1k ) |
556 | 29.1k | if (inputs[j] && ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)29.0k ) |
557 | 2 | enforce_idx = j; |
558 | 15.6k | if (enforce_idx >= 0) |
559 | 2 | { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL); } |
560 | | // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic. |
561 | 15.6k | if (outputs[i] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL15.4k ) |
562 | 417 | { |
563 | 417 | const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d); |
564 | 417 | if (enforce_idx >= 0) |
565 | 2 | { assert(!bind->destinations || bind->destinations->rnum == 0); } |
566 | 417 | if (bind->sources && bind->sources->rnum > 0415 ) |
567 | 415 | { |
568 | 415 | const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]); |
569 | | // If this is enforce output, make sure the tensor view is taken by the output. |
570 | 415 | if (enforce_idx >= 0) |
571 | 0 | { |
572 | 0 | outputs[i]->destructor_hook = old_var->destructor_hook; |
573 | 0 | outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output. |
574 | 0 | old_var->tensor_view = 0; |
575 | 0 | } |
576 | 415 | } |
577 | 417 | } |
578 | 15.6k | } |
579 | 15.3k | ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)]; |
580 | 15.3k | if (parallel_count > 1) |
581 | 23 | { |
582 | 23 | const int max_device_id_size = per_input_size + per_output_size; |
583 | 23 | assert(max_device_id_size > 0); |
584 | 23 | int device_ids[max_device_id_size]; |
585 | 23 | ccv_nnc_stream_context_t* streams[parallel_count]; |
586 | 23 | ccv_nnc_stream_signal_t* signal; |
587 | 23 | if (stream_context) |
588 | 14 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); |
589 | 97 | for (i = 0; i < parallel_count; i++74 ) |
590 | 74 | { |
591 | 74 | int flag = 0; |
592 | 148 | for (j = 0; !flag && j < per_input_size78 ; j++74 ) |
593 | 74 | if (input_tensors[i * per_input_size + j]) |
594 | 74 | flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type) == CCV_TENSOR_GPU_MEMORY); |
595 | 156 | for (j = 0; j < per_output_size; j++82 ) |
596 | 82 | { |
597 | 82 | output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context) : 08 ; |
598 | 82 | if (output_tensors[j] && !flag74 ) |
599 | 4 | flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type) == CCV_TENSOR_GPU_MEMORY); |
600 | 82 | } |
601 | 74 | const int stream_type = flag ? CCV_STREAM_CONTEXT_GPU70 : CCV_STREAM_CONTEXT_CPU4 ; |
602 | 74 | const int tensor_type = flag ? CCV_TENSOR_GPU_MEMORY70 : CCV_TENSOR_CPU_MEMORY4 ; |
603 | 74 | const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size); |
604 | 74 | ccv_nnc_stream_context_t* stream_0 = 0; |
605 | 146 | for (j = 0; j < device_id_size; j++72 ) |
606 | 72 | { |
607 | 72 | int type = stream_type; |
608 | 72 | CCV_STREAM_SET_DEVICE_ID(type, device_ids[j]); |
609 | 72 | ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type); |
610 | 72 | if (!stream_0) |
611 | 72 | stream_0 = stream; |
612 | 72 | } |
613 | | // Wait signal to finish. |
614 | 74 | if (stream_context) |
615 | 44 | { |
616 | 44 | if (stream_0) |
617 | 42 | ccv_nnc_stream_context_wait_signal(stream_0, signal); |
618 | 2 | else |
619 | 2 | ccv_nnc_stream_context_wait(stream_context); |
620 | 44 | } |
621 | 74 | if (stream_0) |
622 | 72 | { |
623 | 72 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = { |
624 | 72 | .graph = graph, |
625 | 72 | .stream_type = stream_type |
626 | 72 | }; |
627 | 72 | ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery); |
628 | 72 | } |
629 | 74 | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size); |
630 | 74 | int k; |
631 | 204 | for (k = 0; k < per_input_size; k++130 ) |
632 | 130 | { |
633 | 130 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? 0 CCV_TENSOR_GET_DEVICE_ID0 (input_tensors[k + i * per_input_size]->info.type) : -1)); |
634 | 130 | if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)) |
635 | 0 | ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]); |
636 | 130 | PRINT(CCV_CLI_INFO, "\n"); |
637 | 130 | } |
638 | 156 | for (k = 0; k < per_output_size; k++82 ) |
639 | 82 | { |
640 | 82 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? 0 CCV_TENSOR_GET_DEVICE_ID0 (output_tensors[k]->info.type) : -1)); |
641 | 82 | if (output_tensors[k] && CCV_CLI_OUTPUT_LEVEL_IS74 (CCV_CLI_INFO)) |
642 | 0 | ccv_nnc_print_tensor_shape(output_tensors[k]); |
643 | 82 | PRINT(CCV_CLI_INFO, "\n"); |
644 | 82 | } |
645 | 74 | const int status = ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0); |
646 | 74 | if (status != 0) |
647 | 0 | PRINT(CCV_CLI_INFO, "Invalid Status: %d\n", status); |
648 | 74 | if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)) |
649 | 0 | { |
650 | 0 | for (k = 0; k < per_output_size; k++) |
651 | 0 | { |
652 | 0 | PRINT(CCV_CLI_VERBOSE, "POST: |<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1)); |
653 | 0 | if (output_tensors[k]) |
654 | 0 | ccv_nnc_print_tensor_info(output_tensors[k]); |
655 | 0 | PRINT(CCV_CLI_VERBOSE, "\n"); |
656 | 0 | } |
657 | 0 | } |
658 | 74 | if (stream_context && stream_044 ) |
659 | 42 | { |
660 | 42 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); |
661 | 42 | ccv_nnc_stream_context_wait_signal(stream_context, signal); |
662 | 42 | } |
663 | 74 | streams[i] = stream_0; |
664 | 74 | } |
665 | 23 | if (!stream_context) |
666 | 39 | for (i = 0; 9 i < parallel_count; i++30 ) |
667 | 30 | if (streams[i]) |
668 | 30 | ccv_nnc_stream_context_wait(streams[i]); |
669 | 15.3k | } else { |
670 | 30.9k | for (i = 0; i < per_output_size; i++15.5k ) |
671 | 15.5k | output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context) : 0199 ; |
672 | 15.3k | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size); |
673 | 43.3k | for (i = 0; i < per_input_size; i++28.0k ) |
674 | 28.0k | { |
675 | 28.0k | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? 0 CCV_TENSOR_GET_DEVICE_ID0 (input_tensors[i]->info.type) : -1)); |
676 | 28.0k | if (input_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS28.0k (CCV_CLI_INFO)) |
677 | 0 | ccv_nnc_print_tensor_info(input_tensors[i]); |
678 | 28.0k | PRINT(CCV_CLI_INFO, "\n"); |
679 | 28.0k | } |
680 | 15.3k | ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context); |
681 | 30.9k | for (i = 0; i < per_output_size; i++15.5k ) |
682 | 15.5k | { |
683 | 15.5k | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? 0 CCV_TENSOR_GET_DEVICE_ID0 (output_tensors[i]->info.type) : -1)); |
684 | 15.5k | if (output_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS15.3k (CCV_CLI_INFO)) |
685 | 0 | ccv_nnc_print_tensor_info(output_tensors[i]); |
686 | 15.5k | PRINT(CCV_CLI_INFO, "\n"); |
687 | 15.5k | } |
688 | 15.3k | } |
689 | 15.3k | int inputs_are_constants = 1; |
690 | 30.7k | for (i = 0; inputs_are_constants && i < input_size15.3k ; i++15.3k ) |
691 | 15.3k | if (inputs[i] && inputs[i]->type != CCV_NNC_TENSOR_CONSTANT15.3k ) |
692 | 15.3k | inputs_are_constants = 0; |
693 | 15.3k | if (input_size > 0 && !inputs_are_constants15.3k && !graph->no_grad15.3k ) // No need to record the execution if there is no input or we disabled gradient computation. |
694 | 15.3k | { |
695 | 15.3k | ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)]; |
696 | 30.8k | for (i = 0; i < output_size; i++15.5k ) |
697 | 15.5k | if (outputs[i]) |
698 | 15.3k | { |
699 | 15.3k | assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT); |
700 | 15.3k | output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]); |
701 | 15.3k | } else |
702 | 207 | output_symbols[i] = NO_TENSOR_SYMBOL; |
703 | 15.3k | int t; |
704 | 30.6k | for (t = 0; t < parallel_count; t++15.3k ) |
705 | 15.3k | { |
706 | 15.3k | ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0); |
707 | 15.3k | if (graph_execs) |
708 | 2.40k | graph_execs[t] = graph_exec; |
709 | | // This needs to be done before we set the new sources on the outputs. |
710 | 43.4k | for (i = 0; i < per_input_size; i++28.0k ) |
711 | 28.0k | { |
712 | 28.0k | ccv_array_t* const input_source = input_sources[i + t * per_input_size]; |
713 | 28.0k | if (input_source) |
714 | 28.1k | for (j = 0; 14.0k j < input_source->rnum; j++14.0k ) |
715 | 14.0k | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ |
716 | 14.0k | .d = *(int*)ccv_array_get(input_source, j), |
717 | 14.0k | .graph = graph->tape |
718 | 14.0k | }, graph_exec); |
719 | 28.0k | ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size]; |
720 | 28.0k | if (input_alias_source) |
721 | 2.02k | for (j = 0; 1.01k j < input_alias_source->rnum; j++1.01k ) |
722 | 1.01k | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ |
723 | 1.01k | .d = *(int*)ccv_array_get(input_alias_source, j), |
724 | 1.01k | .graph = graph->tape |
725 | 1.01k | }, graph_exec); |
726 | 28.0k | } |
727 | 43.4k | for (i = 0; i < per_input_size; i++28.0k ) |
728 | 28.0k | { |
729 | 28.0k | ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size]; |
730 | 28.0k | if (!input || input->type == CCV_NNC_TENSOR_CONSTANT28.0k ) |
731 | 236 | continue; |
732 | 27.8k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d); |
733 | 27.8k | if (!bind->destinations) |
734 | 22.0k | bind->destinations = ccv_array_new(sizeof(int), 1, 0); |
735 | 27.8k | ccv_array_add_unique_int(bind->destinations, graph_exec.d); |
736 | 27.8k | if (input->alias_index_ref) |
737 | 1.01k | { |
738 | 1.01k | const int alias_index = input->alias_index_ref - 1; |
739 | 1.01k | assert(alias_index >= 0); |
740 | 1.01k | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index); |
741 | 1.01k | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d); |
742 | 1.01k | if (!root_bind->destinations) |
743 | 1.01k | root_bind->destinations = ccv_array_new(sizeof(int), 1, 0); |
744 | 1.01k | ccv_array_add_unique_int(root_bind->destinations, graph_exec.d); |
745 | 1.01k | } |
746 | 27.8k | } |
747 | 30.9k | for (i = 0; 15.3k i < per_output_size; i++15.5k ) |
748 | 15.5k | { |
749 | 15.5k | ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size]; |
750 | 15.5k | if (!output) |
751 | 207 | continue; |
752 | 15.3k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d); |
753 | 15.3k | assert(!bind->sources); // This is a new symbol, therefore, no binded sources associated yet. |
754 | 15.3k | bind->sources = ccv_array_new(sizeof(int), 1, 0); |
755 | 15.3k | ccv_array_add_unique_int(bind->sources, graph_exec.d); |
756 | 15.3k | if (output->alias_index_ref) |
757 | 8 | { |
758 | 8 | const int alias_index = output->alias_index_ref - 1; |
759 | 8 | assert(alias_index >= 0); |
760 | 8 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index); |
761 | 8 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d); |
762 | 8 | if (!root_bind->sources) |
763 | 4 | root_bind->sources = ccv_array_new(sizeof(int), 1, 0); |
764 | 8 | ccv_array_add_unique_int(root_bind->sources, graph_exec.d); |
765 | 8 | } |
766 | 15.3k | } |
767 | 15.3k | } |
768 | 15.3k | } |
769 | | // Now, able to free some of the reused outputs. |
770 | 15.7k | for (i = 0; 15.3k i < freeable_size; i++415 ) |
771 | 415 | ccv_nnc_tensor_variable_free(graph, freeables[i]); |
772 | 15.3k | } |
773 | | |
774 | | int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context) |
775 | 12.9k | { |
776 | 12.9k | ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0); |
777 | 12.9k | return CCV_NNC_EXEC_SUCCESS; |
778 | 12.9k | } |
779 | | |
780 | | static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d) |
781 | 17.5k | { |
782 | 17.5k | if (bind->alias_ref) |
783 | 1.01k | bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1); |
784 | 17.5k | if (!bind->sources || bind->sources->rnum == 0) |
785 | 0 | return 1; |
786 | 17.5k | int i; |
787 | 33.9k | for (i = 0; i < bind->sources->rnum; i++16.4k ) |
788 | 17.5k | { |
789 | 17.5k | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i); |
790 | 17.5k | const ccv_nnc_graph_exec_symbol_t exec_symbol = { |
791 | 17.5k | .d = exec_symbol_d, |
792 | 17.5k | .graph = graph->tape |
793 | 17.5k | }; |
794 | 17.5k | const int* outputs; int output_size; |
795 | 17.5k | ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size); |
796 | 17.5k | int j; |
797 | 34.0k | for (j = 0; j < output_size; j++16.5k ) |
798 | 17.5k | if (outputs[j] >= 0 && outputs[j] != symbol_d17.5k ) // If output is me, it is the only output. |
799 | 1.04k | { |
800 | 1.04k | assert(outputs[j] < graph->binds->rnum); |
801 | 1.04k | const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]); |
802 | | // This is in use and is it not a constant symbol. |
803 | 1.04k | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT25 ) |
804 | 25 | return 0; |
805 | 1.01k | if (other_bind->alias_ref) // If this is alias, use its original's destinations. |
806 | 1 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1); |
807 | | // The original is in use and is it not a constant symbol. |
808 | 1.01k | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT1 ) |
809 | 1 | return 0; |
810 | 1.01k | if (other_bind->destinations && other_bind->destinations->rnum > 0) |
811 | 1.00k | return 0; |
812 | 1.01k | } |
813 | 17.5k | } |
814 | 16.4k | return 1; |
815 | 17.5k | } |
816 | | |
817 | | static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) |
818 | 24.9k | { |
819 | 24.9k | int i; |
820 | 24.9k | if (bind->destinations) |
821 | 24.7k | { |
822 | 24.7k | int flag = 0; |
823 | 49.3k | for (i = 0; !flag && i < bind->destinations->rnum24.7k ; i++24.5k ) |
824 | 24.5k | { |
825 | 24.5k | const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i); |
826 | 24.5k | if (exec_symbol_d == freed_exec_symbol_d) |
827 | 24.5k | { |
828 | 24.5k | if (i < bind->destinations->rnum - 1) |
829 | 17 | *(int*)ccv_array_get(bind->destinations, i) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1); |
830 | 24.5k | --bind->destinations->rnum; |
831 | 24.5k | flag = 1; |
832 | 24.5k | } |
833 | 24.5k | } |
834 | | // This symbol can be freed. |
835 | 24.7k | if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED24.5k ) |
836 | 17.6k | { |
837 | 17.6k | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; |
838 | 17.6k | if (bind->alias_ref) |
839 | 1.01k | { |
840 | 1.01k | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1); |
841 | 1.01k | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) |
842 | 1.01k | root_bind = bind; |
843 | 1.01k | } |
844 | | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. |
845 | | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. |
846 | 17.6k | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && |
847 | 17.6k | (17.6k (17.6k !root_bind->sources17.6k || root_bind->sources->rnum == 08.64k ) || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)6.01k ) && |
848 | 17.6k | root_bind->destinations->rnum == 017.6k ) |
849 | 17.6k | { |
850 | 17.6k | if (root_bind->sources) |
851 | 14.6k | for (i = 0; 8.63k i < root_bind->sources->rnum; i++6.00k ) |
852 | 6.00k | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)); |
853 | 17.6k | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
854 | 17.6k | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ |
855 | 17.6k | .d = tensor_index, |
856 | 17.6k | .graph = graph->tape |
857 | 17.6k | }); |
858 | 17.6k | } else if (8 bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED8 && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. |
859 | 8 | bind->alias_ref && (2 !bind->sources2 || bind->sources->rnum == 00 ) && (2 !bind->destinations2 || bind->destinations->rnum == 02 )) { |
860 | 2 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
861 | 2 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ |
862 | 2 | .d = tensor_index, |
863 | 2 | .graph = graph->tape |
864 | 2 | }); |
865 | 2 | } |
866 | 17.6k | } |
867 | 24.7k | } |
868 | 24.9k | } |
869 | | |
870 | | static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) |
871 | 7.29k | { |
872 | 7.29k | int i; |
873 | 7.29k | if (bind->sources) |
874 | 7.29k | { |
875 | 7.29k | int flag = 0; |
876 | 14.5k | for (i = 0; !flag && i < bind->sources->rnum7.29k ; i++7.29k ) |
877 | 7.29k | { |
878 | 7.29k | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i); |
879 | 7.29k | if (exec_symbol_d == freed_exec_symbol_d) |
880 | 7.29k | { |
881 | 7.29k | if (i < bind->sources->rnum - 1) |
882 | 2 | *(int*)ccv_array_get(bind->sources, i) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1); |
883 | 7.29k | --bind->sources->rnum; |
884 | 7.29k | flag = 1; |
885 | 7.29k | } |
886 | 7.29k | } |
887 | 7.29k | if (flag && !bind->alias_ref && bind->index >= 07.28k && bind->type == CCV_NNC_TENSOR_CONSTANT4.84k && // If it is detached (constant but previously has sources). Now can check again. |
888 | 7.29k | (3 bind->sources->rnum == 03 || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0 ) && |
889 | 7.29k | (3 !bind->destinations3 || bind->destinations->rnum == 03 )) |
890 | 3 | { |
891 | | // If this is constant, set it to be no symbol again. |
892 | 3 | ccv_nnc_tensor_variable_t tv = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, bind->index); |
893 | 3 | tv->symbol = NO_TENSOR_SYMBOL; |
894 | 3 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
895 | 3 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ |
896 | 3 | .d = tensor_index, |
897 | 3 | .graph = graph->tape |
898 | 3 | }); |
899 | 7.28k | } else if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) { |
900 | | // This symbol can be freed. |
901 | 2.44k | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; |
902 | 2.44k | if (bind->alias_ref) |
903 | 3 | { |
904 | 3 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1); |
905 | 3 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) |
906 | 0 | root_bind = bind; |
907 | 3 | } |
908 | | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. |
909 | | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. |
910 | 2.44k | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && |
911 | 2.44k | (2.43k root_bind->sources->rnum == 02.43k || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0 ) && |
912 | 2.44k | (2.43k !root_bind->destinations2.43k || root_bind->destinations->rnum == 02.43k )) |
913 | 6 | { |
914 | 6 | for (i = 0; i < root_bind->sources->rnum; i++0 ) |
915 | 0 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)); |
916 | 6 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
917 | 6 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ |
918 | 6 | .d = tensor_index, |
919 | 6 | .graph = graph->tape |
920 | 6 | }); |
921 | 2.43k | } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. |
922 | 2.43k | bind->alias_ref && (3 !bind->sources3 || bind->sources->rnum == 03 ) && (3 !bind->destinations3 || bind->destinations->rnum == 00 )) { |
923 | 3 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
924 | 3 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ |
925 | 3 | .d = tensor_index, |
926 | 3 | .graph = graph->tape |
927 | 3 | }); |
928 | 3 | } |
929 | 2.44k | } |
930 | 7.29k | } |
931 | 7.29k | } |
932 | | |
933 | | static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws) |
934 | 15.3k | { |
935 | 15.3k | int i; |
936 | 43.2k | for (i = 0; i < input_size; i++27.9k ) |
937 | 27.9k | if (inputs[i] >= 0 && inputs[i] < binds->rnum) |
938 | 27.9k | { |
939 | 27.9k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i]); |
940 | 27.9k | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) |
941 | 4.01k | continue; |
942 | 23.9k | if (bind->alias_ref) |
943 | 1.01k | { |
944 | 1.01k | const int alias_to = bind->alias_ref - 1; |
945 | 1.01k | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to); |
946 | 1.01k | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) |
947 | 1.01k | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); |
948 | 1.01k | } |
949 | 23.9k | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws); |
950 | 23.9k | } |
951 | | // Note that this works because there is no overlap of inputs / outputs. (What about alias?). |
952 | 30.8k | for (i = 0; i < output_size; i++15.5k ) |
953 | 15.5k | if (outputs[i] >= 0 && outputs[i] < binds->rnum15.3k ) |
954 | 15.3k | { |
955 | 15.3k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i]); |
956 | 15.3k | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) |
957 | 8.02k | continue; |
958 | 7.28k | if (bind->alias_ref) |
959 | 5 | { |
960 | 5 | const int alias_to = bind->alias_ref - 1; |
961 | 5 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to); |
962 | 5 | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) |
963 | 5 | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); |
964 | 5 | } |
965 | 7.28k | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws); |
966 | 7.28k | } |
967 | 15.3k | } |
968 | | |
969 | | static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol) |
970 | 15.3k | { |
971 | 15.3k | if (!graph->stateful_execs) |
972 | 6.06k | return; |
973 | 9.23k | assert(symbol.d >= 0); |
974 | 9.23k | ccv_array_t* const stateful_execs = graph->stateful_execs; |
975 | 9.23k | ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol); |
976 | 9.23k | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; |
977 | 9.23k | if (!stateful_exec) |
978 | 6.83k | return; |
979 | | // If there is no backward, no need to apply gradients. |
980 | | // Otherwise, if we applied gradients, we can free it as well. |
981 | | // We don't free this stateful exec because apply gradients doesn't require any variables alive. |
982 | 2.40k | if (!stateful_exec->did_backward_but_not_apply_gradients) |
983 | 300 | { |
984 | 300 | const int index = stateful_exec->index; |
985 | 300 | ccfree(stateful_exec); |
986 | 300 | if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0) |
987 | 300 | graph->reuse_stateful_exec = index; |
988 | 300 | *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index) = 0; |
989 | 300 | } else |
990 | 2.10k | stateful_exec->should_free = 1; |
991 | 2.40k | } |
992 | | |
993 | | static int _ccv_nnc_tensor_bind_trace_forward_to_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_graph_bind_t* const bind, ccv_nnc_tensor_variable_graph_bind_t* const root_bind, int* const ws_start, const int assuming_no_source) // assuming_no_source means we are going to remove sources if possible, thus, it is irrelevant. |
994 | 27.4k | { |
995 | 27.4k | int can_free_symbol = 0; |
996 | 27.4k | const int sources_and_is_only_output = (root_bind->sources && root_bind->sources->rnum > 016.3k ) && _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)11.4k ; |
997 | 27.4k | if (!root_bind->sources || root_bind->sources->rnum == 016.3k || sources_and_is_only_output11.4k || assuming_no_source1.02k ) |
998 | 26.4k | { |
999 | 26.4k | int i, j; |
1000 | 26.4k | can_free_symbol = 1; // Assume we can free this symbol. |
1001 | 26.4k | if (!graph->ws) |
1002 | 18 | graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? root_bind->destinations->rnum : 00 , 0); |
1003 | 26.4k | ccv_array_t* const ws = graph->ws; |
1004 | 26.4k | ccv_array_clear(ws); |
1005 | 26.4k | if (root_bind->destinations) |
1006 | 43.4k | for (i = 0; 21.9k i < root_bind->destinations->rnum; i++21.5k ) |
1007 | 21.5k | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i)); |
1008 | 26.4k | const int ws_init_size = ws->rnum; |
1009 | 26.4k | *ws_start = ws_init_size; |
1010 | | // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free. |
1011 | 26.4k | if (root_bind->sources) |
1012 | 25.7k | for (i = 0; 15.2k i < root_bind->sources->rnum; i++10.4k ) |
1013 | 10.4k | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)); |
1014 | | // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want |
1015 | | // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol. |
1016 | 26.4k | if (ws_init_size == 0) |
1017 | 4.91k | can_free_symbol = (!bind->alias_ref || root_bind->index < 018 ); |
1018 | | // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources. |
1019 | 47.9k | for (i = 0; i < ws_init_size; i++21.5k ) |
1020 | 21.5k | { |
1021 | 21.5k | const int exec_symbol_d = *(int*)ccv_array_get(ws, i); |
1022 | 21.5k | const ccv_nnc_graph_exec_symbol_t symbol = { |
1023 | 21.5k | .d = exec_symbol_d, |
1024 | 21.5k | .graph = graph->tape |
1025 | 21.5k | }; |
1026 | 21.5k | const int* inputs; int input_size; |
1027 | 21.5k | const int* outputs; int output_size; |
1028 | 21.5k | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); |
1029 | 21.5k | int flag = 0; // flag denotes whether there are cases to keep this exec symbol. |
1030 | 21.5k | if (!root_bind->sources || root_bind->sources->rnum == 010.8k || assuming_no_source8.44k ) |
1031 | 13.1k | { |
1032 | | // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this. |
1033 | 36.8k | for (j = 0; !flag && j < input_size28.6k ; j++23.7k ) |
1034 | 23.7k | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d) |
1035 | 14.4k | { |
1036 | 14.4k | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]); |
1037 | 14.4k | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT6.24k ) |
1038 | 6.22k | flag = 1; |
1039 | 8.26k | else { |
1040 | 8.26k | if (other_bind->alias_ref) // If this is alias, use its original's destinations. |
1041 | 4 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1); |
1042 | 8.26k | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT21 ) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT && other_bind->sources8.23k && other_bind->sources->rnum > 02.22k ); // Constant should have no source, or it is detached. |
1043 | 8.26k | } |
1044 | 14.4k | } |
1045 | 13.1k | } else { |
1046 | | // If there are sources, check whether we have outputs or not. If we do, we cannot free this. |
1047 | 16.9k | for (j = 0; !flag && j < output_size8.45k ; j++8.45k ) |
1048 | 8.45k | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum8.45k ) |
1049 | 8.45k | { |
1050 | 8.45k | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]); |
1051 | 8.45k | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT4.43k ) |
1052 | 4.43k | flag = 1; |
1053 | 4.02k | else { |
1054 | 4.02k | if (other_bind->alias_ref) // If this is alias, use its original's destinations. |
1055 | 0 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1); |
1056 | 4.02k | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT0 ) || (other_bind->destinations && other_bind->destinations->rnum > 0); |
1057 | 4.02k | } |
1058 | 8.45k | } |
1059 | 8.44k | } |
1060 | | // This exec can be freed if there is no input required or there is no output required. |
1061 | 21.5k | can_free_symbol = (can_free_symbol && !flag); |
1062 | 21.5k | if (!flag) |
1063 | 4.87k | { |
1064 | | // Go over inputs and remove all references from binded destinations. |
1065 | | // and go over outputs remove all references from binded sources. |
1066 | 4.87k | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); |
1067 | 4.87k | const int* outgoings; int outgoing_size; |
1068 | 4.87k | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); |
1069 | 7.30k | for (j = 0; j < outgoing_size; j++2.43k ) |
1070 | 2.43k | ccv_array_add_unique_int(ws, outgoings[j]); |
1071 | 4.87k | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); |
1072 | 4.87k | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); |
1073 | 4.87k | } |
1074 | 21.5k | } |
1075 | 26.4k | } |
1076 | 27.4k | return can_free_symbol; |
1077 | 27.4k | } |
1078 | | |
1079 | | static void _ccv_nnc_tensor_bind_trace_backward_to_free(ccv_nnc_dynamic_graph_t* const graph, ccv_array_t* const ws, const int ws_start) |
1080 | 9.75k | { |
1081 | 9.75k | int i, j; |
1082 | | // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over. |
1083 | 22.6k | for (i = ws_start; i < ws->rnum; i++12.8k ) |
1084 | 12.8k | { |
1085 | 12.8k | const int exec_symbol_d = *(int*)ccv_array_get(ws, i); |
1086 | 12.8k | const ccv_nnc_graph_exec_symbol_t symbol = { |
1087 | 12.8k | .d = exec_symbol_d, |
1088 | 12.8k | .graph = graph->tape |
1089 | 12.8k | }; |
1090 | 12.8k | const int* inputs; int input_size; |
1091 | 12.8k | const int* outputs; int output_size; |
1092 | 12.8k | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); |
1093 | 12.8k | int flag = 0; |
1094 | 29.9k | for (j = 0; !flag && j < input_size19.5k ; j++17.1k ) |
1095 | 17.1k | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum) |
1096 | 17.1k | { |
1097 | 17.1k | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]); |
1098 | 17.1k | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT4.64k ) |
1099 | 4.44k | flag = 1; |
1100 | 12.6k | else { |
1101 | 12.6k | if (other_bind->alias_ref) // If this is alias, use its original's destinations. |
1102 | 1.02k | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1); |
1103 | 12.6k | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT203 ) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT && other_bind->sources12.4k && other_bind->sources->rnum > 010.4k ); |
1104 | 12.6k | } |
1105 | 17.1k | } |
1106 | 12.8k | if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done. |
1107 | 10.4k | { |
1108 | 10.4k | int output_flag = 0; |
1109 | 21.1k | for (j = 0; !output_flag && j < output_size18.6k ; j++10.6k ) |
1110 | 10.6k | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum10.4k ) |
1111 | 10.4k | { |
1112 | 10.4k | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]); |
1113 | 10.4k | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT2.41k ) |
1114 | 2.41k | output_flag = 1; |
1115 | 8.04k | else { |
1116 | 8.04k | if (other_bind->alias_ref) // If this is alias, use its original's destinations. |
1117 | 0 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1); |
1118 | 8.04k | output_flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT0 ) || (other_bind->destinations && other_bind->destinations->rnum > 020 ); |
1119 | 8.04k | } |
1120 | 10.4k | } |
1121 | 10.4k | if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination). |
1122 | 8.02k | flag = 0; |
1123 | 10.4k | } |
1124 | | // Went over all the inputs, it turns out no more inputs has other references, safe to remove. |
1125 | 12.8k | if (!flag) |
1126 | 10.4k | { |
1127 | 10.4k | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); |
1128 | 10.4k | const int* outgoings; int outgoing_size; |
1129 | 10.4k | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); |
1130 | | // It it has outgoings, add that for further inspection. |
1131 | 12.8k | for (j = 0; j < outgoing_size; j++2.40k ) |
1132 | 2.40k | ccv_array_add_unique_int(ws, outgoings[j]); |
1133 | 10.4k | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); |
1134 | 10.4k | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); |
1135 | 10.4k | } |
1136 | 12.8k | } |
1137 | 9.75k | } |
1138 | | |
1139 | | void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) |
1140 | 32.1k | { |
1141 | | // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output. |
1142 | 32.1k | if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) |
1143 | 27.4k | { |
1144 | | // If it is not a free variable, when can we free the symbol and the underlying variable? |
1145 | | // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one; |
1146 | | // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well. |
1147 | 27.4k | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d); |
1148 | | // There should be no source associated with it no more. |
1149 | | // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to |
1150 | | // compute gradient because I am the only variable it can compute gradient for). |
1151 | 27.4k | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; |
1152 | 27.4k | if (bind->alias_ref) |
1153 | 1.03k | { |
1154 | 1.03k | const int alias_to = bind->alias_ref - 1; |
1155 | 1.03k | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to); |
1156 | 1.03k | } |
1157 | 27.4k | int ws_start; |
1158 | 27.4k | const int can_free_symbol = _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, root_bind, &ws_start, 0); |
1159 | 27.4k | if (can_free_symbol) |
1160 | 9.74k | { |
1161 | 9.74k | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
1162 | 9.74k | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); |
1163 | 9.74k | _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start); |
1164 | 17.7k | } else { // If this symbol is not freed, move the tensor view to the bind. |
1165 | | // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias |
1166 | | // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this |
1167 | | // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the |
1168 | | // alias in that process. |
1169 | 17.7k | if (bind->alias_ref && (1.03k !bind->sources1.03k || bind->sources->rnum == 05 ) && (1.03k !bind->destinations1.03k || bind->destinations->rnum == 01.01k )) |
1170 | 20 | { |
1171 | 20 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
1172 | 20 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); |
1173 | 17.6k | } else { |
1174 | 17.6k | bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists. |
1175 | 17.6k | bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback. |
1176 | 17.6k | bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context. |
1177 | 17.6k | bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind. |
1178 | 17.6k | tensor_variable->tensor_view = 0; |
1179 | 17.6k | } |
1180 | 17.7k | } |
1181 | 27.4k | } |
1182 | 32.1k | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1); |
1183 | 32.1k | } |
1184 | | |
1185 | | void ccv_nnc_tensor_variable_detach(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) |
1186 | 6 | { |
1187 | | // This cannot be an alias. |
1188 | 6 | assert(!tensor_variable->alias_index_ref); |
1189 | | // If no computation done yet, mark this as constant. |
1190 | 6 | if (tensor_variable->symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1191 | 0 | { |
1192 | 0 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; |
1193 | 0 | return; |
1194 | 0 | } |
1195 | | // Otherwise, we need to do some book keeping updates to make sure it doesn't participate gradient computation any more. |
1196 | 6 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d); |
1197 | | // Because tensor variable cannot be alias, its bind cannot have alias pointer. |
1198 | 6 | assert(!bind->alias_ref); |
1199 | | // Go through to break ties between sources and destinations. |
1200 | 6 | int i, j; |
1201 | 6 | if (bind->sources && bind->destinations) |
1202 | 6 | { |
1203 | 11 | for (i = 0; i < bind->sources->rnum; i++5 ) |
1204 | 5 | { |
1205 | 5 | const int s = *(int*)ccv_array_get(bind->sources, i); |
1206 | 5 | const int* outputs; int output_size; |
1207 | 5 | const ccv_nnc_graph_exec_symbol_t s_symbol = { |
1208 | 5 | .d = s, |
1209 | 5 | .graph = graph->tape |
1210 | 5 | }; |
1211 | 5 | ccv_nnc_graph_exec_symbol_io(graph->tape, s_symbol, 0, 0, &outputs, &output_size); |
1212 | 10 | for (j = 0; j < bind->destinations->rnum; j++5 ) |
1213 | 5 | { |
1214 | 5 | const int d = *(int*)ccv_array_get(bind->destinations, j); |
1215 | 5 | const ccv_nnc_graph_exec_symbol_t d_symbol = { |
1216 | 5 | .d = d, |
1217 | 5 | .graph = graph->tape |
1218 | 5 | }; |
1219 | 5 | const int* inputs; int input_size; |
1220 | 5 | ccv_nnc_graph_exec_symbol_io(graph->tape, d_symbol, &inputs, &input_size, 0, 0); |
1221 | 5 | int x, y; |
1222 | 5 | int flag = 0; // Whether we find a symbol that connects source and destination but not the current one we detach. If found, we cannot break the tie between s_symbol and d_symbol. |
1223 | 10 | for (x = 0; !flag && x < output_size; x++5 ) |
1224 | 5 | { |
1225 | 5 | ccv_nnc_tensor_symbol_t x_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ |
1226 | 5 | .d = outputs[x], |
1227 | 5 | .graph = graph->tape |
1228 | 5 | }); |
1229 | 5 | if (x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1230 | 5 | { |
1231 | 5 | x_symbol.d = outputs[x]; |
1232 | 5 | x_symbol.graph = graph->tape; |
1233 | 5 | } |
1234 | 5 | if (x_symbol.d == tensor_variable->symbol.d || x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL0 ) |
1235 | 5 | continue; |
1236 | 0 | for (y = 0; !flag && y < input_size; y++) |
1237 | 0 | { |
1238 | 0 | ccv_nnc_tensor_symbol_t y_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ |
1239 | 0 | .d = inputs[y], |
1240 | 0 | .graph = graph->tape |
1241 | 0 | }); |
1242 | 0 | if (y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1243 | 0 | { |
1244 | 0 | y_symbol.d = inputs[y]; |
1245 | 0 | y_symbol.graph = graph->tape; |
1246 | 0 | } |
1247 | 0 | if (y_symbol.d == tensor_variable->symbol.d || y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) |
1248 | 0 | continue; |
1249 | 0 | flag = (x_symbol.d == y_symbol.d); |
1250 | 0 | } |
1251 | 0 | } |
1252 | 5 | if (!flag) |
1253 | 5 | ccv_nnc_graph_exec_symbol_disjoin(graph->tape, s_symbol, d_symbol); |
1254 | 5 | } |
1255 | 5 | } |
1256 | 6 | } |
1257 | 6 | const int sources_and_is_only_output = (bind->sources && bind->sources->rnum > 0) && _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)5 ; |
1258 | 6 | if (!bind->sources || bind->sources->rnum == 0 || sources_and_is_only_output5 ) |
1259 | 6 | { |
1260 | 6 | int ws_start = -1; |
1261 | 6 | _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, bind, &ws_start, 1); |
1262 | | // Because we are detaching from the graph, there is no need to forward trace to see if it is not used and |
1263 | | // then to remove the source execs. We can remove them right now, breaking the graph in two. That is why |
1264 | | // we called trace backward to free regardless the outcome of the forward to free. |
1265 | 6 | if (ws_start == -1) |
1266 | 0 | { |
1267 | 0 | if (!graph->ws) |
1268 | 0 | graph->ws = ccv_array_new(sizeof(int), bind->destinations ? bind->destinations->rnum : 0, 0); |
1269 | 0 | ccv_array_t* const ws = graph->ws; |
1270 | 0 | ccv_array_clear(ws); |
1271 | 0 | if (bind->sources) |
1272 | 0 | for (i = 0; i < bind->sources->rnum; i++) |
1273 | 0 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(bind->sources, i)); |
1274 | 0 | ws_start = 0; |
1275 | 0 | } |
1276 | 6 | _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start); |
1277 | 6 | } |
1278 | | // If now bind has no relevant sources or destinations, we can safely free the underlying tensor symbol. |
1279 | 6 | if ((!bind->sources || bind->sources->rnum == 0) && (1 !bind->destinations1 || bind->destinations->rnum == 01 )) |
1280 | 1 | { |
1281 | 1 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); |
1282 | 1 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); |
1283 | 1 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; |
1284 | 1 | tensor_variable->symbol = NO_TENSOR_SYMBOL; |
1285 | 1 | return; |
1286 | 1 | } |
1287 | | // Mark both as constant, such that even if it cannot be freed now, it can be freed as soon as possible later. |
1288 | 5 | bind->type = CCV_NNC_TENSOR_CONSTANT; |
1289 | 5 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; |
1290 | 5 | } |
1291 | | |
1292 | | void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask) |
1293 | 12 | { |
1294 | 12 | int i, j; |
1295 | 12 | ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0); |
1296 | 31 | for (i = 0; i < source_variable_size; i++19 ) |
1297 | 19 | { |
1298 | 19 | if (source_variables[i]->symbol.d < 0) |
1299 | 0 | continue; |
1300 | 19 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d); |
1301 | 19 | if (bind->destinations && bind->destinations->rnum > 017 ) |
1302 | 42 | for (j = 0; 17 j < bind->destinations->rnum; j++25 ) |
1303 | 25 | { |
1304 | | // It is ok to have duplicate symbols. |
1305 | 25 | const int d = *(int*)ccv_array_get(bind->destinations, j); |
1306 | 25 | ccv_nnc_graph_exec_symbol_t symbol = { |
1307 | 25 | .d = d, |
1308 | 25 | .graph = graph->tape |
1309 | 25 | }; |
1310 | 25 | ccv_array_push(sources_destinations, &symbol); |
1311 | 25 | } |
1312 | 19 | } |
1313 | 12 | const int source_size = sources_destinations->rnum; |
1314 | 24 | for (i = 0; i < destination_variable_size; i++12 ) |
1315 | 12 | { |
1316 | 12 | if (destination_variables[i]->symbol.d < 0) |
1317 | 0 | continue; |
1318 | 12 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d); |
1319 | 12 | if (bind->sources && bind->sources->rnum > 0) |
1320 | 20 | for (j = 0; 10 j < bind->sources->rnum; j++10 ) |
1321 | 10 | { |
1322 | | // It is ok to have duplicate symbols. |
1323 | 10 | const int d = *(int*)ccv_array_get(bind->sources, j); |
1324 | 10 | ccv_nnc_graph_exec_symbol_t symbol = { |
1325 | 10 | .d = d, |
1326 | 10 | .graph = graph->tape |
1327 | 10 | }; |
1328 | 10 | ccv_array_push(sources_destinations, &symbol); |
1329 | 10 | } |
1330 | 12 | } |
1331 | 12 | const int destination_size = sources_destinations->rnum - source_size; |
1332 | 12 | if (source_size == 0 || destination_size == 0) |
1333 | 2 | { |
1334 | 2 | ccv_array_free(sources_destinations); |
1335 | 2 | return; |
1336 | 2 | } |
1337 | 10 | const int bitmask_size = ((source_size + 63) >> 6); |
1338 | 10 | assert(bitmask_size < 256); |
1339 | 10 | uint64_t exec_bitmask[bitmask_size]; |
1340 | 10 | ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size), destination_size, exec_bitmask); |
1341 | 10 | int k = 0; |
1342 | 27 | for (i = 0; i < source_variable_size; i++17 ) |
1343 | 17 | { |
1344 | 17 | if (source_variables[i]->symbol.d < 0) |
1345 | 0 | { |
1346 | 0 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); |
1347 | 0 | continue; |
1348 | 0 | } |
1349 | 17 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d); |
1350 | 17 | int flag = 0; |
1351 | 17 | if (bind->destinations && bind->destinations->rnum > 015 ) |
1352 | 15 | { |
1353 | 15 | assert(k <= source_size - bind->destinations->rnum); |
1354 | 32 | for (j = 0; 15 !flag && j < bind->destinations->rnum20 ; j++17 ) |
1355 | 17 | flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]); |
1356 | 15 | k += bind->destinations->rnum; |
1357 | 15 | } |
1358 | 17 | if (flag) |
1359 | 12 | bitmask[i >> 6] |= ((uint64_t)1 << (i & 63)); |
1360 | 5 | else |
1361 | 5 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); |
1362 | 17 | } |
1363 | 10 | ccv_array_free(sources_destinations); |
1364 | 10 | } |
1365 | | |
1366 | | int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type) |
1367 | 451 | { |
1368 | 451 | return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type); |
1369 | 451 | } |
1370 | | |
1371 | | void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out) |
1372 | 416 | { |
1373 | 416 | ccv_nnc_symbolic_graph_dot(graph->tape, flags, out); |
1374 | 416 | } |
1375 | | |
1376 | | void ccv_nnc_dynamic_graph_format(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context) |
1377 | 0 | { |
1378 | 0 | ccv_nnc_symbolic_graph_format(graph->tape, 0, 0, 0, 0, format_fn, context); |
1379 | 0 | } |