File: | nnc/ccv_nnc_dynamic_graph.c |
Warning: | line 675, column 4 Branch condition evaluates to a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | ||||
2 | #include "ccv_nnc_easy.h" | ||||
3 | #include "ccv_nnc_internal.h" | ||||
4 | #include "ccv_nnc_easy.h" | ||||
5 | #include "ccv_internal.h" | ||||
6 | #include "_ccv_nnc_dynamic_graph.h" | ||||
7 | |||||
8 | // MARK - Level-4 API | ||||
9 | |||||
10 | ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void) | ||||
11 | { | ||||
12 | ccv_nnc_dynamic_graph_t* graph = ccmallocmalloc(sizeof(ccv_nnc_dynamic_graph_t)); | ||||
13 | graph->no_grad = 0; | ||||
14 | graph->reuse_var = -1; | ||||
15 | graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0); | ||||
16 | graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0); | ||||
17 | graph->tape = ccv_nnc_symbolic_graph_new(); | ||||
18 | graph->xpu_alloc.mp_hdr = -1; | ||||
19 | graph->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str(); | ||||
20 | graph->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc(); | ||||
21 | // These may not be used as frequent, init as needed. | ||||
22 | graph->stateful_execs = 0; | ||||
23 | graph->reuse_stateful_exec = -1; | ||||
24 | graph->stream_map = 0; | ||||
25 | graph->ws = 0; | ||||
26 | return graph; | ||||
27 | } | ||||
28 | |||||
29 | static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing) | ||||
30 | { | ||||
31 | const int index = tensor_variable->index; | ||||
32 | if (tensor_variable->tensor_view) | ||||
33 | { | ||||
34 | if (tensor_variable->destructor_hook.func) | ||||
35 | tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context); | ||||
36 | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view)((uintptr_t)(tensor_variable->tensor_view) & 1)) | ||||
37 | { | ||||
38 | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) | ||||
39 | ccv_nnc_tensor_view_free(tensor_variable->tensor_view); | ||||
40 | else { | ||||
41 | if (!tensor_variable->alias_index_ref && // Return this memory to the graph. | ||||
42 | CCV_TENSOR_GET_MEMORY(tensor_variable->tensor_view->info.type)((tensor_variable->tensor_view->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && tensor_variable->tensor_view->data.u8) | ||||
43 | ccv_nnc_xpu_free(&graph->xpu_alloc, tensor_variable->tensor_view->data.u8); | ||||
44 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); | ||||
45 | } | ||||
46 | } | ||||
47 | } | ||||
48 | ccfreefree(tensor_variable); | ||||
49 | if (zeroing) | ||||
50 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(index))) = 0; | ||||
51 | int i; | ||||
52 | for (i = graph->vars->rnum - 1; i >= 0; i--) | ||||
53 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) != 0) | ||||
54 | { | ||||
55 | graph->vars->rnum = i + 1; | ||||
56 | break; | ||||
57 | } | ||||
58 | if (index < graph->vars->rnum && | ||||
59 | (index < graph->reuse_var || graph->reuse_var < 0)) | ||||
60 | graph->reuse_var = index; | ||||
61 | else if (graph->reuse_var >= graph->vars->rnum) | ||||
62 | graph->reuse_var = -1; | ||||
63 | } | ||||
64 | |||||
65 | static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing) | ||||
66 | { | ||||
67 | bind->index = CCV_NNC_TENSOR_NO_VARIABLE; | ||||
68 | if (bind->sources) | ||||
69 | ccv_array_free(bind->sources); | ||||
70 | if (bind->destinations) | ||||
71 | ccv_array_free(bind->destinations); | ||||
72 | if (bind->tensor_view) | ||||
73 | { | ||||
74 | if (bind->destructor_hook.func) | ||||
75 | bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context); | ||||
76 | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view)((uintptr_t)(bind->tensor_view) & 1)) | ||||
77 | { | ||||
78 | if (CCV_IS_TENSOR_VIEW(bind->tensor_view)((*(int*)(bind->tensor_view)) & CCV_TENSOR_VIEW)) | ||||
79 | ccv_nnc_tensor_view_free(bind->tensor_view); | ||||
80 | else { | ||||
81 | if (!bind->alias_ref && // Return this memory to the graph. | ||||
82 | CCV_TENSOR_GET_MEMORY(bind->tensor_view->info.type)((bind->tensor_view->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && bind->tensor_view->data.u8) | ||||
83 | ccv_nnc_xpu_free(&graph->xpu_alloc, bind->tensor_view->data.u8); | ||||
84 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view); | ||||
85 | } | ||||
86 | } | ||||
87 | } | ||||
88 | if (zeroing) | ||||
89 | { | ||||
90 | bind->sources = 0; | ||||
91 | bind->destinations = 0; | ||||
92 | bind->tensor_view = 0; | ||||
93 | bind->destructor_hook.func = 0; | ||||
94 | bind->destructor_hook.context = 0; | ||||
95 | } | ||||
96 | } | ||||
97 | |||||
98 | void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph) | ||||
99 | { | ||||
100 | int i; | ||||
101 | for (i = 0; i < graph->vars->rnum; i++) | ||||
102 | { | ||||
103 | ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))); | ||||
104 | if (tensor_variable) | ||||
105 | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0); | ||||
106 | } | ||||
107 | ccv_array_free(graph->vars); | ||||
108 | for (i = 0; i < graph->binds->rnum; i++) | ||||
109 | _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(i))), 0); | ||||
110 | ccv_array_free(graph->binds); | ||||
111 | ccv_nnc_symbolic_graph_free(graph->tape); | ||||
112 | if (graph->ws) | ||||
113 | ccv_array_free(graph->ws); | ||||
114 | if (graph->stateful_execs) | ||||
115 | { | ||||
116 | for (i = 0; i < graph->stateful_execs->rnum; i++) | ||||
117 | { | ||||
118 | ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i)((void*)(((char*)((graph->stateful_execs)->data)) + (size_t )(graph->stateful_execs)->rsize * (size_t)(i))); | ||||
119 | if (stateful_exec) | ||||
120 | ccfreefree(stateful_exec); | ||||
121 | } | ||||
122 | ccv_array_free(graph->stateful_execs); | ||||
123 | } | ||||
124 | if (graph->stream_map) | ||||
125 | { | ||||
126 | khiter_t k; | ||||
127 | for (k = kh_begin(graph->stream_map)(khint_t)(0); k != kh_end(graph->stream_map)((graph->stream_map)->n_buckets); ++k) | ||||
128 | { | ||||
129 | if (!kh_exist(graph->stream_map, k)(!(((graph->stream_map)->flags[(k)>>4]>>((( k)&0xfU)<<1))&3))) | ||||
130 | continue; | ||||
131 | ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]); | ||||
132 | ccv_nnc_stream_context_free(stream); | ||||
133 | } | ||||
134 | kh_destroy(stream_map, graph->stream_map)kh_destroy_stream_map(graph->stream_map); | ||||
135 | } | ||||
136 | ccv_nnc_xpu_alloc_destroy(&graph->xpu_alloc); | ||||
137 | ccfreefree(graph); | ||||
138 | } | ||||
139 | |||||
140 | void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor) | ||||
141 | { | ||||
142 | assert(!tensor_variable->alias_index_ref)((void) sizeof ((!tensor_variable->alias_index_ref) ? 1 : 0 ), __extension__ ({ if (!tensor_variable->alias_index_ref) ; else __assert_fail ("!tensor_variable->alias_index_ref" , "ccv_nnc_dynamic_graph.c", 142, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
143 | if (tensor_variable->tensor_view && !CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view)((uintptr_t)(tensor_variable->tensor_view) & 1)) | ||||
144 | { | ||||
145 | assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))((void) sizeof ((!((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!((*( int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)" , "ccv_nnc_dynamic_graph.c", 145, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
146 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); | ||||
147 | } | ||||
148 | tensor_variable->info = tensor->info; | ||||
149 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1); | ||||
150 | } | ||||
151 | |||||
152 | void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context) | ||||
153 | { | ||||
154 | tensor_variable->destructor_hook.func = func; | ||||
155 | tensor_variable->destructor_hook.context = context; | ||||
156 | } | ||||
157 | |||||
158 | inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info) | ||||
159 | { | ||||
160 | tensor_variable->alias_index_ref = 0; | ||||
161 | tensor_variable->alias_off = 0; | ||||
162 | tensor_variable->destructor_hook.func = 0; | ||||
163 | tensor_variable->destructor_hook.context = 0; | ||||
164 | tensor_variable->info = info; | ||||
165 | tensor_variable->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
166 | tensor_variable->tensor_view = 0; | ||||
167 | if (graph->reuse_var >= 0) | ||||
168 | { | ||||
169 | const int reuse_var = graph->reuse_var; | ||||
170 | assert(reuse_var < graph->vars->rnum)((void) sizeof ((reuse_var < graph->vars->rnum) ? 1 : 0), __extension__ ({ if (reuse_var < graph->vars->rnum ) ; else __assert_fail ("reuse_var < graph->vars->rnum" , "ccv_nnc_dynamic_graph.c", 170, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
171 | tensor_variable->index = reuse_var; | ||||
172 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(reuse_var))) = tensor_variable; | ||||
173 | int i; | ||||
174 | graph->reuse_var = -1; | ||||
175 | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++) | ||||
176 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) == 0) | ||||
177 | graph->reuse_var = i; | ||||
178 | } else { | ||||
179 | tensor_variable->index = graph->vars->rnum; | ||||
180 | ccv_array_push(graph->vars, &tensor_variable); | ||||
181 | } | ||||
182 | } | ||||
183 | |||||
184 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) | ||||
185 | { | ||||
186 | ccv_nnc_tensor_variable_t tensor_variable = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | ||||
187 | tensor_variable->type = CCV_NNC_TENSOR_VARIABLE; | ||||
188 | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); | ||||
189 | return tensor_variable; | ||||
190 | } | ||||
191 | |||||
192 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) | ||||
193 | { | ||||
194 | ccv_nnc_tensor_variable_t tensor_variable = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | ||||
195 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | ||||
196 | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); | ||||
197 | return tensor_variable; | ||||
198 | } | ||||
199 | |||||
200 | int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | ||||
201 | { | ||||
202 | return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT; | ||||
203 | } | ||||
204 | |||||
205 | ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | ||||
206 | { | ||||
207 | return tensor_variable->info; | ||||
208 | } | ||||
209 | |||||
210 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info) | ||||
211 | { | ||||
212 | ccv_nnc_tensor_variable_t variable_alias = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | ||||
213 | variable_alias->type = tensor_variable->type; | ||||
214 | // If the tensor variable is an alias itself, we point directly to its original. | ||||
215 | if (tensor_variable->alias_index_ref) | ||||
216 | { | ||||
217 | variable_alias->alias_index_ref = tensor_variable->alias_index_ref; | ||||
218 | // The tensor variable need to be fully specified if I am doing alias an alias. | ||||
219 | assert(!ccv_nnc_is_tensor_auto(tensor_variable->info))((void) sizeof ((!ccv_nnc_is_tensor_auto(tensor_variable-> info)) ? 1 : 0), __extension__ ({ if (!ccv_nnc_is_tensor_auto (tensor_variable->info)) ; else __assert_fail ("!ccv_nnc_is_tensor_auto(tensor_variable->info)" , "ccv_nnc_dynamic_graph.c", 219, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
220 | int i; | ||||
221 | int no_stride = 1; | ||||
222 | for (i = 0; no_stride && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | ||||
223 | no_stride = (tensor_variable->stride[i] == 0); | ||||
224 | int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
225 | int* to_stride; | ||||
226 | if (no_stride) | ||||
227 | { | ||||
228 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, stride_from_dim); | ||||
229 | to_stride = stride_from_dim; | ||||
230 | } else | ||||
231 | to_stride = tensor_variable->stride; | ||||
232 | // If we provide stride, or reshape to a different size, assert the tensor variable itself is contiguous (otherwise we cannot satisfy the reshape requirements). | ||||
233 | const int different_dim = ccv_nnc_tensor_nd(info.dim) != ccv_nnc_tensor_nd(tensor_variable->info.dim); | ||||
234 | if (different_dim || (stride[0] != 0 && memcmp(stride, to_stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0)) | ||||
235 | { assert(ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, to_stride))((void) sizeof ((ccv_nnc_tensor_view_is_contiguous(tensor_variable ->info.dim, to_stride)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_is_contiguous (tensor_variable->info.dim, to_stride)) ; else __assert_fail ("ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, to_stride)" , "ccv_nnc_dynamic_graph.c", 235, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
236 | // Need to compute alias off, that is the alias off of the tensor variable plus its ofs. | ||||
237 | const off_t off = ccv_nnc_tensor_view_offset(tensor_variable->info.datatype, to_stride, tensor_variable->ofs); | ||||
238 | variable_alias->alias_off = tensor_variable->alias_off + off; | ||||
239 | // If we don't provide stride, copy the stride from previous variable. | ||||
240 | if (stride[0] == 0) | ||||
241 | { | ||||
242 | if (different_dim) | ||||
243 | ccv_nnc_tensor_get_stride(info.dim, variable_alias->stride); | ||||
244 | else | ||||
245 | memcpy(variable_alias->stride, to_stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | ||||
246 | } else | ||||
247 | memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | ||||
248 | } else { | ||||
249 | variable_alias->alias_index_ref = tensor_variable->index + 1; | ||||
250 | variable_alias->alias_off = 0; | ||||
251 | memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | ||||
252 | } | ||||
253 | variable_alias->info = info; | ||||
254 | variable_alias->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
255 | variable_alias->destructor_hook.func = 0; | ||||
256 | variable_alias->destructor_hook.context = 0; | ||||
257 | variable_alias->tensor_view = 0; | ||||
258 | memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | ||||
259 | if (graph->reuse_var >= 0) | ||||
260 | { | ||||
261 | const int reuse_var = graph->reuse_var; | ||||
262 | assert(reuse_var < graph->vars->rnum)((void) sizeof ((reuse_var < graph->vars->rnum) ? 1 : 0), __extension__ ({ if (reuse_var < graph->vars->rnum ) ; else __assert_fail ("reuse_var < graph->vars->rnum" , "ccv_nnc_dynamic_graph.c", 262, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
263 | variable_alias->index = reuse_var; | ||||
264 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(reuse_var))) = variable_alias; | ||||
265 | int i; | ||||
266 | graph->reuse_var = -1; | ||||
267 | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++) | ||||
268 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) == 0) | ||||
269 | graph->reuse_var = i; | ||||
270 | } else { | ||||
271 | variable_alias->index = graph->vars->rnum; | ||||
272 | ccv_array_push(graph->vars, &variable_alias); | ||||
273 | } | ||||
274 | return variable_alias; | ||||
275 | } | ||||
276 | |||||
277 | int ccv_nnc_tensor_variable_alias_params(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) | ||||
278 | { | ||||
279 | if (!tensor_variable->alias_index_ref) | ||||
280 | return -1; | ||||
281 | if (ofs) | ||||
282 | memcpy(ofs, tensor_variable->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | ||||
283 | if (stride) | ||||
284 | memcpy(stride, tensor_variable->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | ||||
285 | return 0; | ||||
286 | } | ||||
287 | |||||
288 | ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context) | ||||
289 | { | ||||
290 | if (tensor_variable->tensor_view) | ||||
291 | { | ||||
292 | if (tensor_variable->alias_index_ref) | ||||
293 | { | ||||
294 | const int alias_index = tensor_variable->alias_index_ref - 1; | ||||
295 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 295, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
296 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | ||||
297 | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) | ||||
298 | { | ||||
299 | ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view; | ||||
300 | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. | ||||
301 | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv))((void) sizeof ((!((uintptr_t)(tv) & 1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t)(tv) & 1)) ; else __assert_fail ("!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)" , "ccv_nnc_dynamic_graph.c", 301, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
302 | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. | ||||
303 | ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8, tv->off + tensor_variable->alias_off, &tv->data, &tv->dataof); | ||||
304 | } else { | ||||
305 | ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | ||||
306 | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. | ||||
307 | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv))((void) sizeof ((!((uintptr_t)(tv) & 1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t)(tv) & 1)) ; else __assert_fail ("!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)" , "ccv_nnc_dynamic_graph.c", 307, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
308 | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. | ||||
309 | ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8, tensor_variable->alias_off, &tv->data, &tv->dataof); | ||||
310 | } | ||||
311 | } | ||||
312 | return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(tensor_variable->tensor_view ) & ~(uintptr_t)1)); | ||||
313 | } | ||||
314 | if (!tensor_variable->alias_index_ref) | ||||
315 | { | ||||
316 | // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0. | ||||
317 | if (ccv_nnc_is_tensor_auto(tensor_variable->info)) | ||||
318 | return 0; | ||||
319 | void* ptr = 0; | ||||
320 | const size_t data_size = ccv_nnc_tensor_data_size(tensor_variable->info); | ||||
321 | if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type)((tensor_variable->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && data_size > 0) | ||||
322 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type)(((tensor_variable->info.type) & 0xfff00) >> 8), stream_context, data_size); | ||||
323 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0); | ||||
324 | if (tensor_variable->info.dim[0] > 0) | ||||
325 | { assert(tensor_variable->tensor_view->data.u8)((void) sizeof ((tensor_variable->tensor_view->data.u8) ? 1 : 0), __extension__ ({ if (tensor_variable->tensor_view ->data.u8) ; else __assert_fail ("tensor_variable->tensor_view->data.u8" , "ccv_nnc_dynamic_graph.c", 325, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
326 | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | ||||
327 | } | ||||
328 | const int alias_index = tensor_variable->alias_index_ref - 1; | ||||
329 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 329, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
330 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | ||||
331 | assert(!variable_to->alias_index_ref)((void) sizeof ((!variable_to->alias_index_ref) ? 1 : 0), __extension__ ({ if (!variable_to->alias_index_ref) ; else __assert_fail ("!variable_to->alias_index_ref", "ccv_nnc_dynamic_graph.c" , 331, __extension__ __PRETTY_FUNCTION__); })); | ||||
332 | if (!variable_to->tensor_view) | ||||
333 | { | ||||
334 | // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0. | ||||
335 | if (ccv_nnc_is_tensor_auto(variable_to->info)) | ||||
336 | return 0; | ||||
337 | void* ptr = 0; | ||||
338 | assert(variable_to->info.type == tensor_variable->info.type)((void) sizeof ((variable_to->info.type == tensor_variable ->info.type) ? 1 : 0), __extension__ ({ if (variable_to-> info.type == tensor_variable->info.type) ; else __assert_fail ("variable_to->info.type == tensor_variable->info.type" , "ccv_nnc_dynamic_graph.c", 338, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
339 | const size_t data_size = ccv_nnc_tensor_data_size(variable_to->info); | ||||
340 | if (CCV_TENSOR_GET_MEMORY(variable_to->info.type)((variable_to->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && data_size > 0) | ||||
341 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type)(((variable_to->info.type) & 0xfff00) >> 8), stream_context, data_size); | ||||
342 | variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0); | ||||
343 | assert(variable_to->tensor_view->data.u8)((void) sizeof ((variable_to->tensor_view->data.u8) ? 1 : 0), __extension__ ({ if (variable_to->tensor_view->data .u8) ; else __assert_fail ("variable_to->tensor_view->data.u8" , "ccv_nnc_dynamic_graph.c", 343, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
344 | } | ||||
345 | int i; | ||||
346 | int no_ofs = 1; | ||||
347 | for (i = 0; no_ofs && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | ||||
348 | no_ofs = (tensor_variable->ofs[i] == 0); | ||||
349 | int no_stride = 1; | ||||
350 | for (i = 0; no_stride && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | ||||
351 | no_stride = (tensor_variable->stride[i] == 0); | ||||
352 | int stride_is_packed = no_stride; | ||||
353 | if (!no_stride) // We have stride, now if it is packed. | ||||
354 | stride_is_packed = ccv_nnc_is_tensor_stride_packed(tensor_variable->stride, tensor_variable->info.dim); | ||||
355 | assert(CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info))((void) sizeof ((_ccv_get_data_type_size[((tensor_variable-> info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count (tensor_variable->info) + tensor_variable->alias_off <= _ccv_get_data_type_size[((variable_to->info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count(variable_to-> info)) ? 1 : 0), __extension__ ({ if (_ccv_get_data_type_size [((tensor_variable->info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable ->alias_off <= _ccv_get_data_type_size[((variable_to-> info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count (variable_to->info)) ; else __assert_fail ("CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info)" , "ccv_nnc_dynamic_graph.c", 355, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
356 | // Allowing vector type to be normal tensor, rather than a tensor view. We cannot have any offset though. | ||||
357 | if (no_ofs && !stride_is_packed) | ||||
358 | stride_is_packed = ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, tensor_variable->stride); | ||||
359 | if (no_ofs && stride_is_packed) | ||||
360 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8, tensor_variable->info, 0); | ||||
361 | else { | ||||
362 | if (no_stride) | ||||
363 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride); | ||||
364 | tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1)), tensor_variable->info, tensor_variable->ofs, tensor_variable->stride); | ||||
365 | } | ||||
366 | if (tensor_variable->alias_off) | ||||
367 | ccv_nnc_tensor_data_add(tensor_variable->tensor_view->info, tensor_variable->alias_off, &tensor_variable->tensor_view->data, &tensor_variable->tensor_view->dataof); | ||||
368 | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | ||||
369 | } | ||||
370 | |||||
371 | static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol) | ||||
372 | { | ||||
373 | if (symbol.d >= graph->binds->rnum) | ||||
374 | { | ||||
375 | const int rnum = graph->binds->rnum; | ||||
376 | ccv_array_resize(graph->binds, symbol.d + 1); | ||||
377 | int i; | ||||
378 | for (i = rnum; i < graph->binds->rnum; i++) | ||||
379 | ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(i))))->index = CCV_NNC_TENSOR_NO_VARIABLE; | ||||
380 | } | ||||
381 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(symbol.d))); | ||||
382 | bind->type = tensor_variable->type; | ||||
383 | bind->index = tensor_variable->index; | ||||
384 | if (tensor_variable->alias_index_ref) | ||||
385 | { | ||||
386 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
387 | .d = symbol.d, | ||||
388 | .graph = graph->tape | ||||
389 | }); | ||||
390 | assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum)((void) sizeof ((alias_to.d >= 0 && alias_to.d < graph->binds->rnum) ? 1 : 0), __extension__ ({ if (alias_to .d >= 0 && alias_to.d < graph->binds->rnum ) ; else __assert_fail ("alias_to.d >= 0 && alias_to.d < graph->binds->rnum" , "ccv_nnc_dynamic_graph.c", 390, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
391 | bind->alias_ref = alias_to.d + 1; | ||||
392 | } else | ||||
393 | bind->alias_ref = 0; | ||||
394 | if (bind->sources) | ||||
395 | ccv_array_free(bind->sources); | ||||
396 | bind->sources = 0; | ||||
397 | if (bind->destinations) | ||||
398 | ccv_array_free(bind->destinations); | ||||
399 | bind->destinations = 0; | ||||
400 | bind->destructor_hook.func = 0; | ||||
401 | bind->destructor_hook.context = 0; | ||||
402 | bind->tensor_view = 0; | ||||
403 | } | ||||
404 | |||||
405 | static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | ||||
406 | { | ||||
407 | if (tensor_variable->symbol.d >= 0) | ||||
408 | return tensor_variable->symbol; | ||||
409 | if (!tensor_variable->alias_index_ref) | ||||
410 | { | ||||
411 | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0); | ||||
412 | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); | ||||
413 | return symbol; | ||||
414 | } | ||||
415 | const int alias_index = tensor_variable->alias_index_ref - 1; | ||||
416 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 416, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
417 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | ||||
418 | assert(!variable_to->alias_index_ref)((void) sizeof ((!variable_to->alias_index_ref) ? 1 : 0), __extension__ ({ if (!variable_to->alias_index_ref) ; else __assert_fail ("!variable_to->alias_index_ref", "ccv_nnc_dynamic_graph.c" , 418, __extension__ __PRETTY_FUNCTION__); })); | ||||
419 | int no_stride = 1; | ||||
420 | int i; | ||||
421 | for (i = 0; no_stride && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | ||||
422 | no_stride = (tensor_variable->stride[i] == 0); | ||||
423 | if (no_stride) | ||||
424 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride); | ||||
425 | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, tensor_variable->stride, tensor_variable->info, 0); | ||||
426 | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); | ||||
427 | return symbol; | ||||
428 | } | ||||
429 | |||||
430 | // Return the tensor variable that is old (the provided tensor variable will have a new setting). | ||||
431 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable) | ||||
432 | { | ||||
433 | struct ccv_nnc_tensor_variable_s x = *tensor_variable; | ||||
434 | ccv_nnc_tensor_variable_t new_variable; | ||||
435 | // Need to handle alias. | ||||
436 | if (x.alias_index_ref) | ||||
437 | new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(x.alias_index_ref - 1))), x.ofs, x.stride, x.info); | ||||
438 | else | ||||
439 | new_variable = ccv_nnc_tensor_variable_new(graph, x.info)ccv_nnc_tensor_variable_new_impl(graph, x.info); | ||||
440 | *tensor_variable = *new_variable; | ||||
441 | *new_variable = x; | ||||
442 | // The index should be the same though. | ||||
443 | const int index = new_variable->index; | ||||
444 | new_variable->index = tensor_variable->index; | ||||
445 | if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
446 | { | ||||
447 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(new_variable->symbol.d))); | ||||
448 | bind->index = new_variable->index; | ||||
449 | } | ||||
450 | tensor_variable->index = index; | ||||
451 | return new_variable; | ||||
452 | } | ||||
453 | |||||
454 | void ccv_nnc_dynamic_graph_set_max_concurrency(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int max_stream_count) | ||||
455 | { | ||||
456 | dynamic_graph->max_stream_count = max_stream_count; | ||||
457 | } | ||||
458 | |||||
459 | int ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad) | ||||
460 | { | ||||
461 | if (dynamic_graph->no_grad == no_grad) | ||||
462 | return -1; | ||||
463 | dynamic_graph->no_grad = no_grad; | ||||
464 | return 0; | ||||
465 | } | ||||
466 | |||||
467 | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type) | ||||
468 | { | ||||
469 | if (!graph->stream_map) | ||||
470 | graph->stream_map = kh_init(stream_map)kh_init_stream_map(); | ||||
471 | int ret = 0; | ||||
472 | khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret)kh_put_stream_map(graph->stream_map, type, &ret); | ||||
473 | assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if ( ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_nnc_dynamic_graph.c" , 473, __extension__ __PRETTY_FUNCTION__); })); | ||||
474 | ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]); | ||||
475 | // If ret == 0, the key already exist, we can return directly, otherwise, create and return. | ||||
476 | if (ret != 0) | ||||
477 | { | ||||
478 | stream = ccv_nnc_stream_context_new(type); | ||||
479 | kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]) = stream; | ||||
480 | } | ||||
481 | return stream; | ||||
482 | } | ||||
483 | |||||
484 | typedef struct { | ||||
485 | ccv_nnc_dynamic_graph_t* graph; | ||||
486 | int stream_type; | ||||
487 | } ccv_nnc_dynamic_graph_neighbor_context_discovery_t; | ||||
488 | |||||
489 | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context) | ||||
490 | { | ||||
491 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context; | ||||
492 | int type = discovery->stream_type; | ||||
493 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | ||||
494 | return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type); | ||||
495 | } | ||||
496 | |||||
497 | void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs) | ||||
498 | { | ||||
499 | int i, j; | ||||
500 | for (i = 0; i < input_size; i++) | ||||
| |||||
501 | if (inputs[i] && !inputs[i]->alias_index_ref) | ||||
502 | { assert(inputs[i]->tensor_view)((void) sizeof ((inputs[i]->tensor_view) ? 1 : 0), __extension__ ({ if (inputs[i]->tensor_view) ; else __assert_fail ("inputs[i]->tensor_view" , "ccv_nnc_dynamic_graph.c", 502, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
503 | ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | ||||
504 | for (i = 0; i
| ||||
505 | input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(graph, inputs[i], stream_context ) : 0; | ||||
506 | ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | ||||
507 | for (i = 0; i
| ||||
508 | input_symbols[i] = inputs[i] ? _ccv_nnc_tensor_symbol_from_variable(graph, inputs[i]) : NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
509 | ccv_array_t* input_sources[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | ||||
510 | ccv_array_t* input_alias_sources[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | ||||
511 | for (i = 0; i
| ||||
512 | { | ||||
513 | input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(input_symbols[i].d))))->sources : 0; | ||||
514 | if (inputs[i] && inputs[i]->alias_index_ref) | ||||
515 | { | ||||
516 | const int alias_index_ref = inputs[i]->alias_index_ref - 1; | ||||
517 | assert(alias_index_ref >= 0)((void) sizeof ((alias_index_ref >= 0) ? 1 : 0), __extension__ ({ if (alias_index_ref >= 0) ; else __assert_fail ("alias_index_ref >= 0" , "ccv_nnc_dynamic_graph.c", 517, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
518 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index_ref))); | ||||
519 | input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))))->sources; | ||||
520 | } else | ||||
521 | input_alias_sources[i] = 0; | ||||
522 | } | ||||
523 | const int parallel_count = ccv_max(1, parallel)({ typeof (1) _a = (1); typeof (parallel) _b = (parallel); (_a > _b) ? _a : _b; }); | ||||
524 | assert(input_size % parallel_count == 0)((void) sizeof ((input_size % parallel_count == 0) ? 1 : 0), __extension__ ({ if (input_size % parallel_count == 0) ; else __assert_fail ("input_size % parallel_count == 0", "ccv_nnc_dynamic_graph.c" , 524, __extension__ __PRETTY_FUNCTION__); })); | ||||
525 | const int per_input_size = input_size / parallel_count; | ||||
526 | assert(output_size % parallel_count == 0)((void) sizeof ((output_size % parallel_count == 0) ? 1 : 0), __extension__ ({ if (output_size % parallel_count == 0) ; else __assert_fail ("output_size % parallel_count == 0", "ccv_nnc_dynamic_graph.c" , 526, __extension__ __PRETTY_FUNCTION__); })); | ||||
527 | const int per_output_size = output_size / parallel_count; | ||||
528 | int output_auto = 0; | ||||
529 | for (i = 0; !output_auto
| ||||
530 | output_auto = outputs[i] ? ccv_nnc_is_tensor_auto(outputs[i]->info) : 0; | ||||
531 | // One extra step, infer the parameters for outputs. | ||||
532 | if (output_auto
| ||||
533 | { | ||||
534 | ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)({ typeof (1) _a = (1); typeof (per_input_size) _b = (per_input_size ); (_a > _b) ? _a : _b; })]; | ||||
535 | ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | ||||
536 | for (i = 0; i
| ||||
537 | { | ||||
538 | for (j = 0; j
| ||||
539 | input_params[j] = inputs[j + i * per_input_size] ? inputs[j + i * per_input_size]->info : ccv_nnc_tensor_auto; | ||||
540 | for (j = 0; j
| ||||
541 | output_params[j] = outputs[j + i * per_output_size] ? outputs[j + i * per_output_size]->info : ccv_nnc_tensor_auto; | ||||
542 | ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size); | ||||
543 | for (j = 0; j < per_output_size; j++) | ||||
544 | if (outputs[j + i * per_output_size]) | ||||
545 | outputs[j + i * per_output_size]->info = output_params[j]; | ||||
546 | } | ||||
547 | } | ||||
548 | int freeable_size = 0; | ||||
549 | ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)({ typeof (1) _a = (1); typeof (output_size) _b = (output_size ); (_a > _b) ? _a : _b; })]; | ||||
550 | // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee. | ||||
551 | for (i = 0; i
| ||||
552 | { | ||||
553 | // First, go over to see whether there is enforce inplace. | ||||
554 | int enforce_idx = -1; | ||||
555 | for (j = 0; enforce_idx
| ||||
556 | if (inputs[j] && ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)) | ||||
557 | enforce_idx = j; | ||||
558 | if (enforce_idx
| ||||
559 | { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) ? 1 : 0 ), __extension__ ({ if (outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) ; else __assert_fail ("outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL" , "ccv_nnc_dynamic_graph.c", 559, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
560 | // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic. | ||||
561 | if (outputs[i] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
562 | { | ||||
563 | const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[i]->symbol.d))); | ||||
564 | if (enforce_idx >= 0) | ||||
565 | { assert(!bind->destinations || bind->destinations->rnum == 0)((void) sizeof ((!bind->destinations || bind->destinations ->rnum == 0) ? 1 : 0), __extension__ ({ if (!bind->destinations || bind->destinations->rnum == 0) ; else __assert_fail ("!bind->destinations || bind->destinations->rnum == 0" , "ccv_nnc_dynamic_graph.c", 565, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
566 | if (bind->sources && bind->sources->rnum > 0) | ||||
567 | { | ||||
568 | const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]); | ||||
569 | // If this is enforce output, make sure the tensor view is taken by the output. | ||||
570 | if (enforce_idx >= 0) | ||||
571 | { | ||||
572 | outputs[i]->destructor_hook = old_var->destructor_hook; | ||||
573 | outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output. | ||||
574 | old_var->tensor_view = 0; | ||||
575 | } | ||||
576 | } | ||||
577 | } | ||||
578 | } | ||||
579 | ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | ||||
580 | if (parallel_count
| ||||
581 | { | ||||
582 | const int max_device_id_size = per_input_size + per_output_size; | ||||
583 | assert(max_device_id_size > 0)((void) sizeof ((max_device_id_size > 0) ? 1 : 0), __extension__ ({ if (max_device_id_size > 0) ; else __assert_fail ("max_device_id_size > 0" , "ccv_nnc_dynamic_graph.c", 583, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
584 | int device_ids[max_device_id_size]; | ||||
585 | ccv_nnc_stream_context_t* streams[parallel_count]; | ||||
586 | ccv_nnc_stream_signal_t* signal; | ||||
587 | if (stream_context) | ||||
588 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | ||||
589 | for (i = 0; i < parallel_count; i++) | ||||
590 | { | ||||
591 | int flag = 0; | ||||
592 | for (j = 0; !flag && j < per_input_size; j++) | ||||
593 | if (input_tensors[i * per_input_size + j]) | ||||
594 | flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type)((input_tensors[i * per_input_size + j]->info.type) & 0x3 ) == CCV_TENSOR_GPU_MEMORY); | ||||
595 | for (j = 0; j < per_output_size; j++) | ||||
596 | { | ||||
597 | output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context)ccv_nnc_tensor_from_variable_impl(graph, outputs[j + i * per_output_size ], stream_context) : 0; | ||||
598 | if (output_tensors[j] && !flag) | ||||
599 | flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type)((output_tensors[j]->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY); | ||||
600 | } | ||||
601 | const int stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | ||||
602 | const int tensor_type = flag ? CCV_TENSOR_GPU_MEMORY : CCV_TENSOR_CPU_MEMORY; | ||||
603 | const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size); | ||||
604 | ccv_nnc_stream_context_t* stream_0 = 0; | ||||
605 | for (j = 0; j < device_id_size; j++) | ||||
606 | { | ||||
607 | int type = stream_type; | ||||
608 | CCV_STREAM_SET_DEVICE_ID(type, device_ids[j])(type) = (((type) & ~0xfff00) | (((device_ids[j]) & 0xfff ) << 8)); | ||||
609 | ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type); | ||||
610 | if (!stream_0) | ||||
611 | stream_0 = stream; | ||||
612 | } | ||||
613 | // Wait signal to finish. | ||||
614 | if (stream_context) | ||||
615 | { | ||||
616 | if (stream_0) | ||||
617 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | ||||
618 | else | ||||
619 | ccv_nnc_stream_context_wait(stream_context); | ||||
620 | } | ||||
621 | if (stream_0) | ||||
622 | { | ||||
623 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = { | ||||
624 | .graph = graph, | ||||
625 | .stream_type = stream_type | ||||
626 | }; | ||||
627 | ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery); | ||||
628 | } | ||||
629 | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size , per_output_size); fflush(stdout); } } while (0); | ||||
630 | int k; | ||||
631 | for (k = 0; k < per_input_size; k++) | ||||
632 | { | ||||
633 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? CCV_TENSOR_GET_DEVICE_ID(input_tensors[k + i * per_input_size]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", k + 1, input_tensors[k + i * per_input_size ], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? (((input_tensors[k + i * per_input_size]-> info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | ||||
634 | if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
635 | ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]); | ||||
636 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
637 | } | ||||
638 | for (k = 0; k < per_output_size; k++) | ||||
639 | { | ||||
640 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors [k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? (((output_tensors[k]->info.type) & 0xfff00) >> 8 ) : -1)); fflush(stdout); } } while (0); | ||||
641 | if (output_tensors[k] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
642 | ccv_nnc_print_tensor_shape(output_tensors[k]); | ||||
643 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
644 | } | ||||
645 | const int status = ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0); | ||||
646 | if (status != 0) | ||||
647 | PRINT(CCV_CLI_INFO, "Invalid Status: %d\n", status)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("Invalid Status: %d\n", status); fflush(stdout); } } while ( 0); | ||||
648 | if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) | ||||
649 | { | ||||
650 | for (k = 0; k < per_output_size; k++) | ||||
651 | { | ||||
652 | PRINT(CCV_CLI_VERBOSE, "POST: |<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("POST: |<- %d. %p (%p:%d)", k + 1, output_tensors [k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? (((output_tensors[k]->info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | ||||
653 | if (output_tensors[k]) | ||||
654 | ccv_nnc_print_tensor_info(output_tensors[k]); | ||||
655 | PRINT(CCV_CLI_VERBOSE, "\n")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("\n"); fflush(stdout); } } while (0); | ||||
656 | } | ||||
657 | } | ||||
658 | if (stream_context && stream_0) | ||||
659 | { | ||||
660 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | ||||
661 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | ||||
662 | } | ||||
663 | streams[i] = stream_0; | ||||
664 | } | ||||
665 | if (!stream_context) | ||||
666 | for (i = 0; i < parallel_count; i++) | ||||
667 | if (streams[i]) | ||||
668 | ccv_nnc_stream_context_wait(streams[i]); | ||||
669 | } else { | ||||
670 | for (i = 0; i < per_output_size; i++) | ||||
671 | output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(graph, outputs[i], stream_context ) : 0; | ||||
672 | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size , per_output_size); fflush(stdout); } } while (0); | ||||
673 | for (i = 0; i < per_input_size; i++) | ||||
674 | { | ||||
675 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? CCV_TENSOR_GET_DEVICE_ID(input_tensors[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", i + 1, input_tensors[i], (input_tensors [i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? ( ((input_tensors[i]->info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | ||||
| |||||
676 | if (input_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
677 | ccv_nnc_print_tensor_info(input_tensors[i]); | ||||
678 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
679 | } | ||||
680 | ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context); | ||||
681 | for (i = 0; i < per_output_size; i++) | ||||
682 | { | ||||
683 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", i + 1, output_tensors[i], (output_tensors [i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? (((output_tensors[i]->info.type) & 0xfff00) >> 8 ) : -1)); fflush(stdout); } } while (0); | ||||
684 | if (output_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
685 | ccv_nnc_print_tensor_info(output_tensors[i]); | ||||
686 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
687 | } | ||||
688 | } | ||||
689 | int inputs_are_constants = 1; | ||||
690 | for (i = 0; inputs_are_constants && i < input_size; i++) | ||||
691 | if (inputs[i] && inputs[i]->type != CCV_NNC_TENSOR_CONSTANT) | ||||
692 | inputs_are_constants = 0; | ||||
693 | if (input_size > 0 && !inputs_are_constants && !graph->no_grad) // No need to record the execution if there is no input or we disabled gradient computation. | ||||
694 | { | ||||
695 | ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)({ typeof (1) _a = (1); typeof (output_size) _b = (output_size ); (_a > _b) ? _a : _b; })]; | ||||
696 | for (i = 0; i < output_size; i++) | ||||
697 | if (outputs[i]) | ||||
698 | { | ||||
699 | assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT)((void) sizeof ((outputs[i]->type != CCV_NNC_TENSOR_CONSTANT ) ? 1 : 0), __extension__ ({ if (outputs[i]->type != CCV_NNC_TENSOR_CONSTANT ) ; else __assert_fail ("outputs[i]->type != CCV_NNC_TENSOR_CONSTANT" , "ccv_nnc_dynamic_graph.c", 699, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
700 | output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]); | ||||
701 | } else | ||||
702 | output_symbols[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
703 | int t; | ||||
704 | for (t = 0; t < parallel_count; t++) | ||||
705 | { | ||||
706 | ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0); | ||||
707 | if (graph_execs) | ||||
708 | graph_execs[t] = graph_exec; | ||||
709 | // This needs to be done before we set the new sources on the outputs. | ||||
710 | for (i = 0; i < per_input_size; i++) | ||||
711 | { | ||||
712 | ccv_array_t* const input_source = input_sources[i + t * per_input_size]; | ||||
713 | if (input_source) | ||||
714 | for (j = 0; j < input_source->rnum; j++) | ||||
715 | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ | ||||
716 | .d = *(int*)ccv_array_get(input_source, j)((void*)(((char*)((input_source)->data)) + (size_t)(input_source )->rsize * (size_t)(j))), | ||||
717 | .graph = graph->tape | ||||
718 | }, graph_exec); | ||||
719 | ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size]; | ||||
720 | if (input_alias_source) | ||||
721 | for (j = 0; j < input_alias_source->rnum; j++) | ||||
722 | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ | ||||
723 | .d = *(int*)ccv_array_get(input_alias_source, j)((void*)(((char*)((input_alias_source)->data)) + (size_t)( input_alias_source)->rsize * (size_t)(j))), | ||||
724 | .graph = graph->tape | ||||
725 | }, graph_exec); | ||||
726 | } | ||||
727 | for (i = 0; i < per_input_size; i++) | ||||
728 | { | ||||
729 | ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size]; | ||||
730 | if (!input || input->type == CCV_NNC_TENSOR_CONSTANT) | ||||
731 | continue; | ||||
732 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(input_symbols[i + t * per_input_size ].d))); | ||||
733 | if (!bind->destinations) | ||||
734 | bind->destinations = ccv_array_new(sizeof(int), 1, 0); | ||||
735 | ccv_array_add_unique_int(bind->destinations, graph_exec.d); | ||||
736 | if (input->alias_index_ref) | ||||
737 | { | ||||
738 | const int alias_index = input->alias_index_ref - 1; | ||||
739 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 739, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
740 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | ||||
741 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))); | ||||
742 | if (!root_bind->destinations) | ||||
743 | root_bind->destinations = ccv_array_new(sizeof(int), 1, 0); | ||||
744 | ccv_array_add_unique_int(root_bind->destinations, graph_exec.d); | ||||
745 | } | ||||
746 | } | ||||
747 | for (i = 0; i < per_output_size; i++) | ||||
748 | { | ||||
749 | ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size]; | ||||
750 | if (!output) | ||||
751 | continue; | ||||
752 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(output_symbols[i + t * per_output_size ].d))); | ||||
753 | assert(!bind->sources)((void) sizeof ((!bind->sources) ? 1 : 0), __extension__ ( { if (!bind->sources) ; else __assert_fail ("!bind->sources" , "ccv_nnc_dynamic_graph.c", 753, __extension__ __PRETTY_FUNCTION__ ); })); // This is a new symbol, therefore, no binded sources associated yet. | ||||
754 | bind->sources = ccv_array_new(sizeof(int), 1, 0); | ||||
755 | ccv_array_add_unique_int(bind->sources, graph_exec.d); | ||||
756 | if (output->alias_index_ref) | ||||
757 | { | ||||
758 | const int alias_index = output->alias_index_ref - 1; | ||||
759 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 759, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
760 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | ||||
761 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))); | ||||
762 | if (!root_bind->sources) | ||||
763 | root_bind->sources = ccv_array_new(sizeof(int), 1, 0); | ||||
764 | ccv_array_add_unique_int(root_bind->sources, graph_exec.d); | ||||
765 | } | ||||
766 | } | ||||
767 | } | ||||
768 | } | ||||
769 | // Now, able to free some of the reused outputs. | ||||
770 | for (i = 0; i < freeable_size; i++) | ||||
771 | ccv_nnc_tensor_variable_free(graph, freeables[i]); | ||||
772 | } | ||||
773 | |||||
774 | int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context) | ||||
775 | { | ||||
776 | ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0); | ||||
777 | return CCV_NNC_EXEC_SUCCESS; | ||||
778 | } | ||||
779 | |||||
780 | static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d) | ||||
781 | { | ||||
782 | if (bind->alias_ref) | ||||
783 | bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(bind->alias_ref - 1))); | ||||
784 | if (!bind->sources || bind->sources->rnum == 0) | ||||
785 | return 1; | ||||
786 | int i; | ||||
787 | for (i = 0; i < bind->sources->rnum; i++) | ||||
788 | { | ||||
789 | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | ||||
790 | const ccv_nnc_graph_exec_symbol_t exec_symbol = { | ||||
791 | .d = exec_symbol_d, | ||||
792 | .graph = graph->tape | ||||
793 | }; | ||||
794 | const int* outputs; int output_size; | ||||
795 | ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size); | ||||
796 | int j; | ||||
797 | for (j = 0; j < output_size; j++) | ||||
798 | if (outputs[j] >= 0 && outputs[j] != symbol_d) // If output is me, it is the only output. | ||||
799 | { | ||||
800 | assert(outputs[j] < graph->binds->rnum)((void) sizeof ((outputs[j] < graph->binds->rnum) ? 1 : 0), __extension__ ({ if (outputs[j] < graph->binds-> rnum) ; else __assert_fail ("outputs[j] < graph->binds->rnum" , "ccv_nnc_dynamic_graph.c", 800, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
801 | const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | ||||
802 | // This is in use and is it not a constant symbol. | ||||
803 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | ||||
804 | return 0; | ||||
805 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | ||||
806 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | ||||
807 | // The original is in use and is it not a constant symbol. | ||||
808 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | ||||
809 | return 0; | ||||
810 | if (other_bind->destinations && other_bind->destinations->rnum > 0) | ||||
811 | return 0; | ||||
812 | } | ||||
813 | } | ||||
814 | return 1; | ||||
815 | } | ||||
816 | |||||
817 | static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) | ||||
818 | { | ||||
819 | int i; | ||||
820 | if (bind->destinations) | ||||
821 | { | ||||
822 | int flag = 0; | ||||
823 | for (i = 0; !flag && i < bind->destinations->rnum; i++) | ||||
824 | { | ||||
825 | const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(i))); | ||||
826 | if (exec_symbol_d == freed_exec_symbol_d) | ||||
827 | { | ||||
828 | if (i < bind->destinations->rnum - 1) | ||||
829 | *(int*)ccv_array_get(bind->destinations, i)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(i))) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(bind->destinations ->rnum - 1))); | ||||
830 | --bind->destinations->rnum; | ||||
831 | flag = 1; | ||||
832 | } | ||||
833 | } | ||||
834 | // This symbol can be freed. | ||||
835 | if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) | ||||
836 | { | ||||
837 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | ||||
838 | if (bind->alias_ref) | ||||
839 | { | ||||
840 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(bind->alias_ref - 1))); | ||||
841 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | ||||
842 | root_bind = bind; | ||||
843 | } | ||||
844 | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. | ||||
845 | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. | ||||
846 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && | ||||
847 | ((!root_bind->sources || root_bind->sources->rnum == 0) || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | ||||
848 | root_bind->destinations->rnum == 0) | ||||
849 | { | ||||
850 | if (root_bind->sources) | ||||
851 | for (i = 0; i < root_bind->sources->rnum; i++) | ||||
852 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | ||||
853 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
854 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
855 | .d = tensor_index, | ||||
856 | .graph = graph->tape | ||||
857 | }); | ||||
858 | } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. | ||||
859 | bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) { | ||||
860 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
861 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
862 | .d = tensor_index, | ||||
863 | .graph = graph->tape | ||||
864 | }); | ||||
865 | } | ||||
866 | } | ||||
867 | } | ||||
868 | } | ||||
869 | |||||
870 | static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) | ||||
871 | { | ||||
872 | int i; | ||||
873 | if (bind->sources) | ||||
874 | { | ||||
875 | int flag = 0; | ||||
876 | for (i = 0; !flag && i < bind->sources->rnum; i++) | ||||
877 | { | ||||
878 | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | ||||
879 | if (exec_symbol_d == freed_exec_symbol_d) | ||||
880 | { | ||||
881 | if (i < bind->sources->rnum - 1) | ||||
882 | *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(bind->sources->rnum - 1))); | ||||
883 | --bind->sources->rnum; | ||||
884 | flag = 1; | ||||
885 | } | ||||
886 | } | ||||
887 | if (flag && !bind->alias_ref && bind->index >= 0 && bind->type == CCV_NNC_TENSOR_CONSTANT && // If it is detached (constant but previously has sources). Now can check again. | ||||
888 | (bind->sources->rnum == 0 || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | ||||
889 | (!bind->destinations || bind->destinations->rnum == 0)) | ||||
890 | { | ||||
891 | // If this is constant, set it to be no symbol again. | ||||
892 | ccv_nnc_tensor_variable_t tv = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, bind->index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(bind->index))); | ||||
893 | tv->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
894 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
895 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
896 | .d = tensor_index, | ||||
897 | .graph = graph->tape | ||||
898 | }); | ||||
899 | } else if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) { | ||||
900 | // This symbol can be freed. | ||||
901 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | ||||
902 | if (bind->alias_ref) | ||||
903 | { | ||||
904 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(bind->alias_ref - 1))); | ||||
905 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | ||||
906 | root_bind = bind; | ||||
907 | } | ||||
908 | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. | ||||
909 | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. | ||||
910 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && | ||||
911 | (root_bind->sources->rnum == 0 || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | ||||
912 | (!root_bind->destinations || root_bind->destinations->rnum == 0)) | ||||
913 | { | ||||
914 | for (i = 0; i < root_bind->sources->rnum; i++) | ||||
915 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | ||||
916 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
917 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
918 | .d = tensor_index, | ||||
919 | .graph = graph->tape | ||||
920 | }); | ||||
921 | } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. | ||||
922 | bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) { | ||||
923 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
924 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
925 | .d = tensor_index, | ||||
926 | .graph = graph->tape | ||||
927 | }); | ||||
928 | } | ||||
929 | } | ||||
930 | } | ||||
931 | } | ||||
932 | |||||
933 | static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws) | ||||
934 | { | ||||
935 | int i; | ||||
936 | for (i = 0; i < input_size; i++) | ||||
937 | if (inputs[i] >= 0 && inputs[i] < binds->rnum) | ||||
938 | { | ||||
939 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i])((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(inputs[i]))); | ||||
940 | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | ||||
941 | continue; | ||||
942 | if (bind->alias_ref) | ||||
943 | { | ||||
944 | const int alias_to = bind->alias_ref - 1; | ||||
945 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(alias_to))); | ||||
946 | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) | ||||
947 | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); | ||||
948 | } | ||||
949 | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws); | ||||
950 | } | ||||
951 | // Note that this works because there is no overlap of inputs / outputs. (What about alias?). | ||||
952 | for (i = 0; i < output_size; i++) | ||||
953 | if (outputs[i] >= 0 && outputs[i] < binds->rnum) | ||||
954 | { | ||||
955 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i])((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(outputs[i]))); | ||||
956 | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | ||||
957 | continue; | ||||
958 | if (bind->alias_ref) | ||||
959 | { | ||||
960 | const int alias_to = bind->alias_ref - 1; | ||||
961 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(alias_to))); | ||||
962 | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) | ||||
963 | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); | ||||
964 | } | ||||
965 | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws); | ||||
966 | } | ||||
967 | } | ||||
968 | |||||
969 | static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol) | ||||
970 | { | ||||
971 | if (!graph->stateful_execs) | ||||
972 | return; | ||||
973 | assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({ if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0" , "ccv_nnc_dynamic_graph.c", 973, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
974 | ccv_array_t* const stateful_execs = graph->stateful_execs; | ||||
975 | ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol); | ||||
976 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | ||||
977 | if (!stateful_exec) | ||||
978 | return; | ||||
979 | // If there is no backward, no need to apply gradients. | ||||
980 | // Otherwise, if we applied gradients, we can free it as well. | ||||
981 | // We don't free this stateful exec because apply gradients doesn't require any variables alive. | ||||
982 | if (!stateful_exec->did_backward_but_not_apply_gradients) | ||||
983 | { | ||||
984 | const int index = stateful_exec->index; | ||||
985 | ccfreefree(stateful_exec); | ||||
986 | if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0) | ||||
987 | graph->reuse_stateful_exec = index; | ||||
988 | *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index)((void*)(((char*)((stateful_execs)->data)) + (size_t)(stateful_execs )->rsize * (size_t)(index))) = 0; | ||||
989 | } else | ||||
990 | stateful_exec->should_free = 1; | ||||
991 | } | ||||
992 | |||||
993 | static int _ccv_nnc_tensor_bind_trace_forward_to_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_graph_bind_t* const bind, ccv_nnc_tensor_variable_graph_bind_t* const root_bind, int* const ws_start, const int assuming_no_source) // assuming_no_source means we are going to remove sources if possible, thus, it is irrelevant. | ||||
994 | { | ||||
995 | int can_free_symbol = 0; | ||||
996 | const int sources_and_is_only_output = (root_bind->sources && root_bind->sources->rnum > 0) && _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d); | ||||
997 | if (!root_bind->sources || root_bind->sources->rnum == 0 || sources_and_is_only_output || assuming_no_source) | ||||
998 | { | ||||
999 | int i, j; | ||||
1000 | can_free_symbol = 1; // Assume we can free this symbol. | ||||
1001 | if (!graph->ws) | ||||
1002 | graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? root_bind->destinations->rnum : 0, 0); | ||||
1003 | ccv_array_t* const ws = graph->ws; | ||||
1004 | ccv_array_clear(ws); | ||||
1005 | if (root_bind->destinations) | ||||
1006 | for (i = 0; i < root_bind->destinations->rnum; i++) | ||||
1007 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i)((void*)(((char*)((root_bind->destinations)->data)) + ( size_t)(root_bind->destinations)->rsize * (size_t)(i)))); | ||||
1008 | const int ws_init_size = ws->rnum; | ||||
1009 | *ws_start = ws_init_size; | ||||
1010 | // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free. | ||||
1011 | if (root_bind->sources) | ||||
1012 | for (i = 0; i < root_bind->sources->rnum; i++) | ||||
1013 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | ||||
1014 | // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want | ||||
1015 | // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol. | ||||
1016 | if (ws_init_size == 0) | ||||
1017 | can_free_symbol = (!bind->alias_ref || root_bind->index < 0); | ||||
1018 | // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources. | ||||
1019 | for (i = 0; i < ws_init_size; i++) | ||||
1020 | { | ||||
1021 | const int exec_symbol_d = *(int*)ccv_array_get(ws, i)((void*)(((char*)((ws)->data)) + (size_t)(ws)->rsize * ( size_t)(i))); | ||||
1022 | const ccv_nnc_graph_exec_symbol_t symbol = { | ||||
1023 | .d = exec_symbol_d, | ||||
1024 | .graph = graph->tape | ||||
1025 | }; | ||||
1026 | const int* inputs; int input_size; | ||||
1027 | const int* outputs; int output_size; | ||||
1028 | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); | ||||
1029 | int flag = 0; // flag denotes whether there are cases to keep this exec symbol. | ||||
1030 | if (!root_bind->sources || root_bind->sources->rnum == 0 || assuming_no_source) | ||||
1031 | { | ||||
1032 | // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this. | ||||
1033 | for (j = 0; !flag && j < input_size; j++) | ||||
1034 | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d) | ||||
1035 | { | ||||
1036 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(inputs[j]))); | ||||
1037 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | ||||
1038 | flag = 1; | ||||
1039 | else { | ||||
1040 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | ||||
1041 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | ||||
1042 | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT && other_bind->sources && other_bind->sources->rnum > 0); // Constant should have no source, or it is detached. | ||||
1043 | } | ||||
1044 | } | ||||
1045 | } else { | ||||
1046 | // If there are sources, check whether we have outputs or not. If we do, we cannot free this. | ||||
1047 | for (j = 0; !flag && j < output_size; j++) | ||||
1048 | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum) | ||||
1049 | { | ||||
1050 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | ||||
1051 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | ||||
1052 | flag = 1; | ||||
1053 | else { | ||||
1054 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | ||||
1055 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | ||||
1056 | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->destinations && other_bind->destinations->rnum > 0); | ||||
1057 | } | ||||
1058 | } | ||||
1059 | } | ||||
1060 | // This exec can be freed if there is no input required or there is no output required. | ||||
1061 | can_free_symbol = (can_free_symbol && !flag); | ||||
1062 | if (!flag) | ||||
1063 | { | ||||
1064 | // Go over inputs and remove all references from binded destinations. | ||||
1065 | // and go over outputs remove all references from binded sources. | ||||
1066 | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); | ||||
1067 | const int* outgoings; int outgoing_size; | ||||
1068 | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); | ||||
1069 | for (j = 0; j < outgoing_size; j++) | ||||
1070 | ccv_array_add_unique_int(ws, outgoings[j]); | ||||
1071 | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); | ||||
1072 | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); | ||||
1073 | } | ||||
1074 | } | ||||
1075 | } | ||||
1076 | return can_free_symbol; | ||||
1077 | } | ||||
1078 | |||||
1079 | static void _ccv_nnc_tensor_bind_trace_backward_to_free(ccv_nnc_dynamic_graph_t* const graph, ccv_array_t* const ws, const int ws_start) | ||||
1080 | { | ||||
1081 | int i, j; | ||||
1082 | // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over. | ||||
1083 | for (i = ws_start; i < ws->rnum; i++) | ||||
1084 | { | ||||
1085 | const int exec_symbol_d = *(int*)ccv_array_get(ws, i)((void*)(((char*)((ws)->data)) + (size_t)(ws)->rsize * ( size_t)(i))); | ||||
1086 | const ccv_nnc_graph_exec_symbol_t symbol = { | ||||
1087 | .d = exec_symbol_d, | ||||
1088 | .graph = graph->tape | ||||
1089 | }; | ||||
1090 | const int* inputs; int input_size; | ||||
1091 | const int* outputs; int output_size; | ||||
1092 | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); | ||||
1093 | int flag = 0; | ||||
1094 | for (j = 0; !flag && j < input_size; j++) | ||||
1095 | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum) | ||||
1096 | { | ||||
1097 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(inputs[j]))); | ||||
1098 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | ||||
1099 | flag = 1; | ||||
1100 | else { | ||||
1101 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | ||||
1102 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | ||||
1103 | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT && other_bind->sources && other_bind->sources->rnum > 0); | ||||
1104 | } | ||||
1105 | } | ||||
1106 | if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done. | ||||
1107 | { | ||||
1108 | int output_flag = 0; | ||||
1109 | for (j = 0; !output_flag && j < output_size; j++) | ||||
1110 | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum) | ||||
1111 | { | ||||
1112 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | ||||
1113 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | ||||
1114 | output_flag = 1; | ||||
1115 | else { | ||||
1116 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | ||||
1117 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | ||||
1118 | output_flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->destinations && other_bind->destinations->rnum > 0); | ||||
1119 | } | ||||
1120 | } | ||||
1121 | if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination). | ||||
1122 | flag = 0; | ||||
1123 | } | ||||
1124 | // Went over all the inputs, it turns out no more inputs has other references, safe to remove. | ||||
1125 | if (!flag) | ||||
1126 | { | ||||
1127 | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); | ||||
1128 | const int* outgoings; int outgoing_size; | ||||
1129 | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); | ||||
1130 | // It it has outgoings, add that for further inspection. | ||||
1131 | for (j = 0; j < outgoing_size; j++) | ||||
1132 | ccv_array_add_unique_int(ws, outgoings[j]); | ||||
1133 | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); | ||||
1134 | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); | ||||
1135 | } | ||||
1136 | } | ||||
1137 | } | ||||
1138 | |||||
1139 | void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | ||||
1140 | { | ||||
1141 | // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output. | ||||
1142 | if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1143 | { | ||||
1144 | // If it is not a free variable, when can we free the symbol and the underlying variable? | ||||
1145 | // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one; | ||||
1146 | // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well. | ||||
1147 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(tensor_variable->symbol.d ))); | ||||
1148 | // There should be no source associated with it no more. | ||||
1149 | // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to | ||||
1150 | // compute gradient because I am the only variable it can compute gradient for). | ||||
1151 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | ||||
1152 | if (bind->alias_ref) | ||||
1153 | { | ||||
1154 | const int alias_to = bind->alias_ref - 1; | ||||
1155 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(alias_to))); | ||||
1156 | } | ||||
1157 | int ws_start; | ||||
1158 | const int can_free_symbol = _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, root_bind, &ws_start, 0); | ||||
1159 | if (can_free_symbol) | ||||
1160 | { | ||||
1161 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
1162 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | ||||
1163 | _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start); | ||||
1164 | } else { // If this symbol is not freed, move the tensor view to the bind. | ||||
1165 | // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias | ||||
1166 | // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this | ||||
1167 | // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the | ||||
1168 | // alias in that process. | ||||
1169 | if (bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) | ||||
1170 | { | ||||
1171 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
1172 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | ||||
1173 | } else { | ||||
1174 | bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists. | ||||
1175 | bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback. | ||||
1176 | bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context. | ||||
1177 | bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind. | ||||
1178 | tensor_variable->tensor_view = 0; | ||||
1179 | } | ||||
1180 | } | ||||
1181 | } | ||||
1182 | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1); | ||||
1183 | } | ||||
1184 | |||||
1185 | void ccv_nnc_tensor_variable_detach(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | ||||
1186 | { | ||||
1187 | // This cannot be an alias. | ||||
1188 | assert(!tensor_variable->alias_index_ref)((void) sizeof ((!tensor_variable->alias_index_ref) ? 1 : 0 ), __extension__ ({ if (!tensor_variable->alias_index_ref) ; else __assert_fail ("!tensor_variable->alias_index_ref" , "ccv_nnc_dynamic_graph.c", 1188, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1189 | // If no computation done yet, mark this as constant. | ||||
1190 | if (tensor_variable->symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1191 | { | ||||
1192 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | ||||
1193 | return; | ||||
1194 | } | ||||
1195 | // Otherwise, we need to do some book keeping updates to make sure it doesn't participate gradient computation any more. | ||||
1196 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(tensor_variable->symbol.d ))); | ||||
1197 | // Because tensor variable cannot be alias, its bind cannot have alias pointer. | ||||
1198 | assert(!bind->alias_ref)((void) sizeof ((!bind->alias_ref) ? 1 : 0), __extension__ ({ if (!bind->alias_ref) ; else __assert_fail ("!bind->alias_ref" , "ccv_nnc_dynamic_graph.c", 1198, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1199 | // Go through to break ties between sources and destinations. | ||||
1200 | int i, j; | ||||
1201 | if (bind->sources && bind->destinations) | ||||
1202 | { | ||||
1203 | for (i = 0; i < bind->sources->rnum; i++) | ||||
1204 | { | ||||
1205 | const int s = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | ||||
1206 | const int* outputs; int output_size; | ||||
1207 | const ccv_nnc_graph_exec_symbol_t s_symbol = { | ||||
1208 | .d = s, | ||||
1209 | .graph = graph->tape | ||||
1210 | }; | ||||
1211 | ccv_nnc_graph_exec_symbol_io(graph->tape, s_symbol, 0, 0, &outputs, &output_size); | ||||
1212 | for (j = 0; j < bind->destinations->rnum; j++) | ||||
1213 | { | ||||
1214 | const int d = *(int*)ccv_array_get(bind->destinations, j)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(j))); | ||||
1215 | const ccv_nnc_graph_exec_symbol_t d_symbol = { | ||||
1216 | .d = d, | ||||
1217 | .graph = graph->tape | ||||
1218 | }; | ||||
1219 | const int* inputs; int input_size; | ||||
1220 | ccv_nnc_graph_exec_symbol_io(graph->tape, d_symbol, &inputs, &input_size, 0, 0); | ||||
1221 | int x, y; | ||||
1222 | int flag = 0; // Whether we find a symbol that connects source and destination but not the current one we detach. If found, we cannot break the tie between s_symbol and d_symbol. | ||||
1223 | for (x = 0; !flag && x < output_size; x++) | ||||
1224 | { | ||||
1225 | ccv_nnc_tensor_symbol_t x_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
1226 | .d = outputs[x], | ||||
1227 | .graph = graph->tape | ||||
1228 | }); | ||||
1229 | if (x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1230 | { | ||||
1231 | x_symbol.d = outputs[x]; | ||||
1232 | x_symbol.graph = graph->tape; | ||||
1233 | } | ||||
1234 | if (x_symbol.d == tensor_variable->symbol.d || x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1235 | continue; | ||||
1236 | for (y = 0; !flag && y < input_size; y++) | ||||
1237 | { | ||||
1238 | ccv_nnc_tensor_symbol_t y_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ | ||||
1239 | .d = inputs[y], | ||||
1240 | .graph = graph->tape | ||||
1241 | }); | ||||
1242 | if (y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1243 | { | ||||
1244 | y_symbol.d = inputs[y]; | ||||
1245 | y_symbol.graph = graph->tape; | ||||
1246 | } | ||||
1247 | if (y_symbol.d == tensor_variable->symbol.d || y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | ||||
1248 | continue; | ||||
1249 | flag = (x_symbol.d == y_symbol.d); | ||||
1250 | } | ||||
1251 | } | ||||
1252 | if (!flag) | ||||
1253 | ccv_nnc_graph_exec_symbol_disjoin(graph->tape, s_symbol, d_symbol); | ||||
1254 | } | ||||
1255 | } | ||||
1256 | } | ||||
1257 | const int sources_and_is_only_output = (bind->sources && bind->sources->rnum > 0) && _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d); | ||||
1258 | if (!bind->sources || bind->sources->rnum == 0 || sources_and_is_only_output) | ||||
1259 | { | ||||
1260 | int ws_start = -1; | ||||
1261 | _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, bind, &ws_start, 1); | ||||
1262 | // Because we are detaching from the graph, there is no need to forward trace to see if it is not used and | ||||
1263 | // then to remove the source execs. We can remove them right now, breaking the graph in two. That is why | ||||
1264 | // we called trace backward to free regardless the outcome of the forward to free. | ||||
1265 | if (ws_start == -1) | ||||
1266 | { | ||||
1267 | if (!graph->ws) | ||||
1268 | graph->ws = ccv_array_new(sizeof(int), bind->destinations ? bind->destinations->rnum : 0, 0); | ||||
1269 | ccv_array_t* const ws = graph->ws; | ||||
1270 | ccv_array_clear(ws); | ||||
1271 | if (bind->sources) | ||||
1272 | for (i = 0; i < bind->sources->rnum; i++) | ||||
1273 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i)))); | ||||
1274 | ws_start = 0; | ||||
1275 | } | ||||
1276 | _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start); | ||||
1277 | } | ||||
1278 | // If now bind has no relevant sources or destinations, we can safely free the underlying tensor symbol. | ||||
1279 | if ((!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) | ||||
1280 | { | ||||
1281 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | ||||
1282 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | ||||
1283 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | ||||
1284 | tensor_variable->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | ||||
1285 | return; | ||||
1286 | } | ||||
1287 | // Mark both as constant, such that even if it cannot be freed now, it can be freed as soon as possible later. | ||||
1288 | bind->type = CCV_NNC_TENSOR_CONSTANT; | ||||
1289 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | ||||
1290 | } | ||||
1291 | |||||
1292 | void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask) | ||||
1293 | { | ||||
1294 | int i, j; | ||||
1295 | ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0); | ||||
1296 | for (i = 0; i < source_variable_size; i++) | ||||
1297 | { | ||||
1298 | if (source_variables[i]->symbol.d < 0) | ||||
1299 | continue; | ||||
1300 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(source_variables[i]->symbol .d))); | ||||
1301 | if (bind->destinations && bind->destinations->rnum > 0) | ||||
1302 | for (j = 0; j < bind->destinations->rnum; j++) | ||||
1303 | { | ||||
1304 | // It is ok to have duplicate symbols. | ||||
1305 | const int d = *(int*)ccv_array_get(bind->destinations, j)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(j))); | ||||
1306 | ccv_nnc_graph_exec_symbol_t symbol = { | ||||
1307 | .d = d, | ||||
1308 | .graph = graph->tape | ||||
1309 | }; | ||||
1310 | ccv_array_push(sources_destinations, &symbol); | ||||
1311 | } | ||||
1312 | } | ||||
1313 | const int source_size = sources_destinations->rnum; | ||||
1314 | for (i = 0; i < destination_variable_size; i++) | ||||
1315 | { | ||||
1316 | if (destination_variables[i]->symbol.d < 0) | ||||
1317 | continue; | ||||
1318 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(destination_variables[i]-> symbol.d))); | ||||
1319 | if (bind->sources && bind->sources->rnum > 0) | ||||
1320 | for (j = 0; j < bind->sources->rnum; j++) | ||||
1321 | { | ||||
1322 | // It is ok to have duplicate symbols. | ||||
1323 | const int d = *(int*)ccv_array_get(bind->sources, j)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(j))); | ||||
1324 | ccv_nnc_graph_exec_symbol_t symbol = { | ||||
1325 | .d = d, | ||||
1326 | .graph = graph->tape | ||||
1327 | }; | ||||
1328 | ccv_array_push(sources_destinations, &symbol); | ||||
1329 | } | ||||
1330 | } | ||||
1331 | const int destination_size = sources_destinations->rnum - source_size; | ||||
1332 | if (source_size == 0 || destination_size == 0) | ||||
1333 | { | ||||
1334 | ccv_array_free(sources_destinations); | ||||
1335 | return; | ||||
1336 | } | ||||
1337 | const int bitmask_size = ((source_size + 63) >> 6); | ||||
1338 | assert(bitmask_size < 256)((void) sizeof ((bitmask_size < 256) ? 1 : 0), __extension__ ({ if (bitmask_size < 256) ; else __assert_fail ("bitmask_size < 256" , "ccv_nnc_dynamic_graph.c", 1338, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1339 | uint64_t exec_bitmask[bitmask_size]; | ||||
1340 | ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0)((void*)(((char*)((sources_destinations)->data)) + (size_t )(sources_destinations)->rsize * (size_t)(0))), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size)((void*)(((char*)((sources_destinations)->data)) + (size_t )(sources_destinations)->rsize * (size_t)(source_size))), destination_size, exec_bitmask); | ||||
1341 | int k = 0; | ||||
1342 | for (i = 0; i < source_variable_size; i++) | ||||
1343 | { | ||||
1344 | if (source_variables[i]->symbol.d < 0) | ||||
1345 | { | ||||
1346 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); | ||||
1347 | continue; | ||||
1348 | } | ||||
1349 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(source_variables[i]->symbol .d))); | ||||
1350 | int flag = 0; | ||||
1351 | if (bind->destinations && bind->destinations->rnum > 0) | ||||
1352 | { | ||||
1353 | assert(k <= source_size - bind->destinations->rnum)((void) sizeof ((k <= source_size - bind->destinations-> rnum) ? 1 : 0), __extension__ ({ if (k <= source_size - bind ->destinations->rnum) ; else __assert_fail ("k <= source_size - bind->destinations->rnum" , "ccv_nnc_dynamic_graph.c", 1353, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
1354 | for (j = 0; !flag && j < bind->destinations->rnum; j++) | ||||
1355 | flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]); | ||||
1356 | k += bind->destinations->rnum; | ||||
1357 | } | ||||
1358 | if (flag) | ||||
1359 | bitmask[i >> 6] |= ((uint64_t)1 << (i & 63)); | ||||
1360 | else | ||||
1361 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); | ||||
1362 | } | ||||
1363 | ccv_array_free(sources_destinations); | ||||
1364 | } | ||||
1365 | |||||
1366 | int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type) | ||||
1367 | { | ||||
1368 | return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type); | ||||
1369 | } | ||||
1370 | |||||
1371 | void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out) | ||||
1372 | { | ||||
1373 | ccv_nnc_symbolic_graph_dot(graph->tape, flags, out); | ||||
1374 | } | ||||
1375 | |||||
1376 | void ccv_nnc_dynamic_graph_format(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context) | ||||
1377 | { | ||||
1378 | ccv_nnc_symbolic_graph_format(graph->tape, 0, 0, 0, 0, format_fn, context); | ||||
1379 | } |