File: | nnc/ccv_nnc_dynamic_graph.c |
Warning: | line 522, column 9 Branch condition evaluates to a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #include "ccv_nnc_easy.h" | |||
5 | #include "ccv_internal.h" | |||
6 | #include "_ccv_nnc_dynamic_graph.h" | |||
7 | ||||
8 | // MARK - Level-4 API | |||
9 | ||||
10 | ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void) | |||
11 | { | |||
12 | ccv_nnc_dynamic_graph_t* graph = ccmallocmalloc(sizeof(ccv_nnc_dynamic_graph_t)); | |||
13 | graph->no_grad = 0; | |||
14 | graph->reuse_var = -1; | |||
15 | graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0); | |||
16 | graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0); | |||
17 | graph->tape = ccv_nnc_symbolic_graph_new(); | |||
18 | graph->xpu_alloc.mp_hdr = -1; | |||
19 | graph->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str(); | |||
20 | graph->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc(); | |||
21 | // These may not be used as frequent, init as needed. | |||
22 | graph->stateful_execs = 0; | |||
23 | graph->reuse_stateful_exec = -1; | |||
24 | graph->stream_map = 0; | |||
25 | graph->ws = 0; | |||
26 | return graph; | |||
27 | } | |||
28 | ||||
29 | static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing) | |||
30 | { | |||
31 | const int index = tensor_variable->index; | |||
32 | if (tensor_variable->tensor_view) | |||
33 | { | |||
34 | if (tensor_variable->destructor_hook.func) | |||
35 | tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context); | |||
36 | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view)((uintptr_t)(tensor_variable->tensor_view) & 1)) | |||
37 | { | |||
38 | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) | |||
39 | ccv_nnc_tensor_view_free(tensor_variable->tensor_view); | |||
40 | else { | |||
41 | if (!tensor_variable->alias_index_ref && // Return this memory to the graph. | |||
42 | CCV_TENSOR_GET_MEMORY(tensor_variable->tensor_view->info.type)((tensor_variable->tensor_view->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
43 | ccv_nnc_xpu_free(&graph->xpu_alloc, tensor_variable->tensor_view->data.ptr); | |||
44 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); | |||
45 | } | |||
46 | } | |||
47 | } | |||
48 | ccfreefree(tensor_variable); | |||
49 | if (zeroing) | |||
50 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(index))) = 0; | |||
51 | int i; | |||
52 | for (i = graph->vars->rnum - 1; i >= 0; i--) | |||
53 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) != 0) | |||
54 | { | |||
55 | graph->vars->rnum = i + 1; | |||
56 | break; | |||
57 | } | |||
58 | if (index < graph->vars->rnum && | |||
59 | (index < graph->reuse_var || graph->reuse_var < 0)) | |||
60 | graph->reuse_var = index; | |||
61 | else if (graph->reuse_var >= graph->vars->rnum) | |||
62 | graph->reuse_var = -1; | |||
63 | } | |||
64 | ||||
65 | static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing) | |||
66 | { | |||
67 | bind->index = CCV_NNC_TENSOR_NO_VARIABLE; | |||
68 | if (bind->sources) | |||
69 | ccv_array_free(bind->sources); | |||
70 | if (bind->destinations) | |||
71 | ccv_array_free(bind->destinations); | |||
72 | if (bind->tensor_view) | |||
73 | { | |||
74 | if (bind->destructor_hook.func) | |||
75 | bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context); | |||
76 | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view)((uintptr_t)(bind->tensor_view) & 1)) | |||
77 | { | |||
78 | if (CCV_IS_TENSOR_VIEW(bind->tensor_view)((*(int*)(bind->tensor_view)) & CCV_TENSOR_VIEW)) | |||
79 | ccv_nnc_tensor_view_free(bind->tensor_view); | |||
80 | else { | |||
81 | if (!bind->alias_ref && // Return this memory to the graph. | |||
82 | CCV_TENSOR_GET_MEMORY(bind->tensor_view->info.type)((bind->tensor_view->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
83 | ccv_nnc_xpu_free(&graph->xpu_alloc, bind->tensor_view->data.ptr); | |||
84 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view); | |||
85 | } | |||
86 | } | |||
87 | } | |||
88 | if (zeroing) | |||
89 | { | |||
90 | bind->sources = 0; | |||
91 | bind->destinations = 0; | |||
92 | bind->tensor_view = 0; | |||
93 | bind->destructor_hook.func = 0; | |||
94 | bind->destructor_hook.context = 0; | |||
95 | } | |||
96 | } | |||
97 | ||||
98 | void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph) | |||
99 | { | |||
100 | int i; | |||
101 | for (i = 0; i < graph->vars->rnum; i++) | |||
102 | { | |||
103 | ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))); | |||
104 | if (tensor_variable) | |||
105 | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0); | |||
106 | } | |||
107 | ccv_array_free(graph->vars); | |||
108 | for (i = 0; i < graph->binds->rnum; i++) | |||
109 | _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(i))), 0); | |||
110 | ccv_array_free(graph->binds); | |||
111 | ccv_nnc_symbolic_graph_free(graph->tape); | |||
112 | if (graph->ws) | |||
113 | ccv_array_free(graph->ws); | |||
114 | if (graph->stateful_execs) | |||
115 | { | |||
116 | for (i = 0; i < graph->stateful_execs->rnum; i++) | |||
117 | { | |||
118 | ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i)((void*)(((char*)((graph->stateful_execs)->data)) + (size_t )(graph->stateful_execs)->rsize * (size_t)(i))); | |||
119 | if (stateful_exec) | |||
120 | ccfreefree(stateful_exec); | |||
121 | } | |||
122 | ccv_array_free(graph->stateful_execs); | |||
123 | } | |||
124 | if (graph->stream_map) | |||
125 | { | |||
126 | khiter_t k; | |||
127 | for (k = kh_begin(graph->stream_map)(khint_t)(0); k != kh_end(graph->stream_map)((graph->stream_map)->n_buckets); ++k) | |||
128 | { | |||
129 | if (!kh_exist(graph->stream_map, k)(!(((graph->stream_map)->flags[(k)>>4]>>((( k)&0xfU)<<1))&3))) | |||
130 | continue; | |||
131 | ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]); | |||
132 | ccv_nnc_stream_context_free(stream); | |||
133 | } | |||
134 | kh_destroy(stream_map, graph->stream_map)kh_destroy_stream_map(graph->stream_map); | |||
135 | } | |||
136 | ccv_nnc_xpu_alloc_destroy(&graph->xpu_alloc); | |||
137 | ccfreefree(graph); | |||
138 | } | |||
139 | ||||
140 | void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor) | |||
141 | { | |||
142 | assert(!tensor_variable->alias_index_ref)((void) sizeof ((!tensor_variable->alias_index_ref) ? 1 : 0 ), __extension__ ({ if (!tensor_variable->alias_index_ref) ; else __assert_fail ("!tensor_variable->alias_index_ref" , "ccv_nnc_dynamic_graph.c", 142, __extension__ __PRETTY_FUNCTION__ ); })); | |||
143 | if (tensor_variable->tensor_view && !CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view)((uintptr_t)(tensor_variable->tensor_view) & 1)) | |||
144 | { | |||
145 | assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))((void) sizeof ((!((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!((*( int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)" , "ccv_nnc_dynamic_graph.c", 145, __extension__ __PRETTY_FUNCTION__ ); })); | |||
146 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); | |||
147 | } | |||
148 | tensor_variable->info = tensor->info; | |||
149 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1); | |||
150 | } | |||
151 | ||||
152 | void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context) | |||
153 | { | |||
154 | tensor_variable->destructor_hook.func = func; | |||
155 | tensor_variable->destructor_hook.context = context; | |||
156 | } | |||
157 | ||||
158 | inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info) | |||
159 | { | |||
160 | tensor_variable->alias_index_ref = 0; | |||
161 | tensor_variable->destructor_hook.func = 0; | |||
162 | tensor_variable->destructor_hook.context = 0; | |||
163 | tensor_variable->info = info; | |||
164 | tensor_variable->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
165 | tensor_variable->tensor_view = 0; | |||
166 | if (graph->reuse_var >= 0) | |||
167 | { | |||
168 | const int reuse_var = graph->reuse_var; | |||
169 | assert(reuse_var < graph->vars->rnum)((void) sizeof ((reuse_var < graph->vars->rnum) ? 1 : 0), __extension__ ({ if (reuse_var < graph->vars->rnum ) ; else __assert_fail ("reuse_var < graph->vars->rnum" , "ccv_nnc_dynamic_graph.c", 169, __extension__ __PRETTY_FUNCTION__ ); })); | |||
170 | tensor_variable->index = reuse_var; | |||
171 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(reuse_var))) = tensor_variable; | |||
172 | int i; | |||
173 | graph->reuse_var = -1; | |||
174 | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++) | |||
175 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) == 0) | |||
176 | graph->reuse_var = i; | |||
177 | } else { | |||
178 | tensor_variable->index = graph->vars->rnum; | |||
179 | ccv_array_push(graph->vars, &tensor_variable); | |||
180 | } | |||
181 | } | |||
182 | ||||
183 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) | |||
184 | { | |||
185 | ccv_nnc_tensor_variable_t tensor_variable = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | |||
186 | tensor_variable->type = CCV_NNC_TENSOR_VARIABLE; | |||
187 | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); | |||
188 | return tensor_variable; | |||
189 | } | |||
190 | ||||
191 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) | |||
192 | { | |||
193 | ccv_nnc_tensor_variable_t tensor_variable = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | |||
194 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | |||
195 | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); | |||
196 | return tensor_variable; | |||
197 | } | |||
198 | ||||
199 | int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
200 | { | |||
201 | return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT; | |||
202 | } | |||
203 | ||||
204 | ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
205 | { | |||
206 | return tensor_variable->info; | |||
207 | } | |||
208 | ||||
209 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info) | |||
210 | { | |||
211 | assert(!tensor_variable->alias_index_ref)((void) sizeof ((!tensor_variable->alias_index_ref) ? 1 : 0 ), __extension__ ({ if (!tensor_variable->alias_index_ref) ; else __assert_fail ("!tensor_variable->alias_index_ref" , "ccv_nnc_dynamic_graph.c", 211, __extension__ __PRETTY_FUNCTION__ ); })); | |||
212 | ccv_nnc_tensor_variable_t variable_alias = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | |||
213 | variable_alias->type = tensor_variable->type; | |||
214 | variable_alias->alias_index_ref = tensor_variable->index + 1; | |||
215 | variable_alias->info = info; | |||
216 | variable_alias->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
217 | variable_alias->destructor_hook.func = 0; | |||
218 | variable_alias->destructor_hook.context = 0; | |||
219 | variable_alias->tensor_view = 0; | |||
220 | memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
221 | memcpy(variable_alias->inc, inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
222 | if (graph->reuse_var >= 0) | |||
223 | { | |||
224 | const int reuse_var = graph->reuse_var; | |||
225 | assert(reuse_var < graph->vars->rnum)((void) sizeof ((reuse_var < graph->vars->rnum) ? 1 : 0), __extension__ ({ if (reuse_var < graph->vars->rnum ) ; else __assert_fail ("reuse_var < graph->vars->rnum" , "ccv_nnc_dynamic_graph.c", 225, __extension__ __PRETTY_FUNCTION__ ); })); | |||
226 | variable_alias->index = reuse_var; | |||
227 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(reuse_var))) = variable_alias; | |||
228 | int i; | |||
229 | graph->reuse_var = -1; | |||
230 | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++) | |||
231 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) == 0) | |||
232 | graph->reuse_var = i; | |||
233 | } else { | |||
234 | variable_alias->index = graph->vars->rnum; | |||
235 | ccv_array_push(graph->vars, &variable_alias); | |||
236 | } | |||
237 | return variable_alias; | |||
238 | } | |||
239 | ||||
240 | ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context) | |||
241 | { | |||
242 | if (tensor_variable->tensor_view) | |||
243 | { | |||
244 | if (tensor_variable->alias_index_ref) | |||
245 | { | |||
246 | const int alias_index = tensor_variable->alias_index_ref - 1; | |||
247 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 247, __extension__ __PRETTY_FUNCTION__ ); })); | |||
248 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
249 | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) | |||
250 | { | |||
251 | ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view; | |||
252 | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. | |||
253 | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv))((void) sizeof ((!((uintptr_t)(tv) & 1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t)(tv) & 1)) ; else __assert_fail ("!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)" , "ccv_nnc_dynamic_graph.c", 253, __extension__ __PRETTY_FUNCTION__ ); })); | |||
254 | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. | |||
255 | tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8 + tv->off; | |||
256 | } else { | |||
257 | ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | |||
258 | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. | |||
259 | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv))((void) sizeof ((!((uintptr_t)(tv) & 1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t)(tv) & 1)) ; else __assert_fail ("!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)" , "ccv_nnc_dynamic_graph.c", 259, __extension__ __PRETTY_FUNCTION__ ); })); | |||
260 | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. | |||
261 | tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8; | |||
262 | } | |||
263 | } | |||
264 | return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(tensor_variable->tensor_view ) & ~(uintptr_t)1)); | |||
265 | } | |||
266 | if (!tensor_variable->alias_index_ref) | |||
267 | { | |||
268 | // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0. | |||
269 | if (ccv_nnc_is_tensor_auto(tensor_variable->info)) | |||
270 | return 0; | |||
271 | void* ptr = 0; | |||
272 | if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type)((tensor_variable->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
273 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type)(((tensor_variable->info.type) & 0xfff00) >> 8), stream_context, ccv_nnc_tensor_data_size(tensor_variable->info)); | |||
274 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0); | |||
275 | assert(tensor_variable->tensor_view->data.u8)((void) sizeof ((tensor_variable->tensor_view->data.u8) ? 1 : 0), __extension__ ({ if (tensor_variable->tensor_view ->data.u8) ; else __assert_fail ("tensor_variable->tensor_view->data.u8" , "ccv_nnc_dynamic_graph.c", 275, __extension__ __PRETTY_FUNCTION__ ); })); | |||
276 | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | |||
277 | } | |||
278 | const int alias_index = tensor_variable->alias_index_ref - 1; | |||
279 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 279, __extension__ __PRETTY_FUNCTION__ ); })); | |||
280 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
281 | assert(!variable_to->alias_index_ref)((void) sizeof ((!variable_to->alias_index_ref) ? 1 : 0), __extension__ ({ if (!variable_to->alias_index_ref) ; else __assert_fail ("!variable_to->alias_index_ref", "ccv_nnc_dynamic_graph.c" , 281, __extension__ __PRETTY_FUNCTION__); })); | |||
282 | if (!variable_to->tensor_view) | |||
283 | { | |||
284 | // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0. | |||
285 | if (ccv_nnc_is_tensor_auto(variable_to->info)) | |||
286 | return 0; | |||
287 | void* ptr = 0; | |||
288 | assert(variable_to->info.type == tensor_variable->info.type)((void) sizeof ((variable_to->info.type == tensor_variable ->info.type) ? 1 : 0), __extension__ ({ if (variable_to-> info.type == tensor_variable->info.type) ; else __assert_fail ("variable_to->info.type == tensor_variable->info.type" , "ccv_nnc_dynamic_graph.c", 288, __extension__ __PRETTY_FUNCTION__ ); })); | |||
289 | if (CCV_TENSOR_GET_MEMORY(variable_to->info.type)((variable_to->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
290 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type)(((variable_to->info.type) & 0xfff00) >> 8), stream_context, ccv_nnc_tensor_data_size(variable_to->info)); | |||
291 | variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0); | |||
292 | assert(variable_to->tensor_view->data.u8)((void) sizeof ((variable_to->tensor_view->data.u8) ? 1 : 0), __extension__ ({ if (variable_to->tensor_view->data .u8) ; else __assert_fail ("variable_to->tensor_view->data.u8" , "ccv_nnc_dynamic_graph.c", 292, __extension__ __PRETTY_FUNCTION__ ); })); | |||
293 | } | |||
294 | int no_ofs = 1; | |||
295 | int i; | |||
296 | for (i = 0; no_ofs && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
297 | no_ofs = (tensor_variable->ofs[i] == 0); | |||
298 | int no_inc = 1; | |||
299 | for (i = 0; no_inc && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
300 | no_inc = (tensor_variable->inc[i] == 0); | |||
301 | if (!no_inc) | |||
302 | no_inc = (memcmp(tensor_variable->inc, tensor_variable->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0); | |||
303 | assert(ccv_nnc_tensor_count(tensor_variable->info) <= ccv_nnc_tensor_count(variable_to->info))((void) sizeof ((ccv_nnc_tensor_count(tensor_variable->info ) <= ccv_nnc_tensor_count(variable_to->info)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count(tensor_variable-> info) <= ccv_nnc_tensor_count(variable_to->info)) ; else __assert_fail ("ccv_nnc_tensor_count(tensor_variable->info) <= ccv_nnc_tensor_count(variable_to->info)" , "ccv_nnc_dynamic_graph.c", 303, __extension__ __PRETTY_FUNCTION__ ); })); | |||
304 | if (no_ofs && no_inc) | |||
305 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8, tensor_variable->info, 0); | |||
306 | else | |||
307 | tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1)), tensor_variable->info, tensor_variable->ofs, no_inc ? tensor_variable->info.dim : tensor_variable->inc); | |||
308 | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | |||
309 | } | |||
310 | ||||
311 | static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol) | |||
312 | { | |||
313 | if (symbol.d >= graph->binds->rnum) | |||
314 | { | |||
315 | const int rnum = graph->binds->rnum; | |||
316 | ccv_array_resize(graph->binds, symbol.d + 1); | |||
317 | int i; | |||
318 | for (i = rnum; i < graph->binds->rnum; i++) | |||
319 | ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(i))))->index = CCV_NNC_TENSOR_NO_VARIABLE; | |||
320 | } | |||
321 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(symbol.d))); | |||
322 | bind->type = tensor_variable->type; | |||
323 | bind->index = tensor_variable->index; | |||
324 | if (tensor_variable->alias_index_ref) | |||
325 | { | |||
326 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
327 | .d = symbol.d, | |||
328 | .graph = graph->tape | |||
329 | }); | |||
330 | assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum)((void) sizeof ((alias_to.d >= 0 && alias_to.d < graph->binds->rnum) ? 1 : 0), __extension__ ({ if (alias_to .d >= 0 && alias_to.d < graph->binds->rnum ) ; else __assert_fail ("alias_to.d >= 0 && alias_to.d < graph->binds->rnum" , "ccv_nnc_dynamic_graph.c", 330, __extension__ __PRETTY_FUNCTION__ ); })); | |||
331 | bind->alias_ref = alias_to.d + 1; | |||
332 | } else | |||
333 | bind->alias_ref = 0; | |||
334 | if (bind->sources) | |||
335 | ccv_array_free(bind->sources); | |||
336 | bind->sources = 0; | |||
337 | if (bind->destinations) | |||
338 | ccv_array_free(bind->destinations); | |||
339 | bind->destinations = 0; | |||
340 | bind->destructor_hook.func = 0; | |||
341 | bind->destructor_hook.context = 0; | |||
342 | bind->tensor_view = 0; | |||
343 | } | |||
344 | ||||
345 | static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
346 | { | |||
347 | if (tensor_variable->symbol.d >= 0) | |||
348 | return tensor_variable->symbol; | |||
349 | if (!tensor_variable->alias_index_ref) | |||
350 | { | |||
351 | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0); | |||
352 | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); | |||
353 | return symbol; | |||
354 | } | |||
355 | const int alias_index = tensor_variable->alias_index_ref - 1; | |||
356 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 356, __extension__ __PRETTY_FUNCTION__ ); })); | |||
357 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
358 | assert(!variable_to->alias_index_ref)((void) sizeof ((!variable_to->alias_index_ref) ? 1 : 0), __extension__ ({ if (!variable_to->alias_index_ref) ; else __assert_fail ("!variable_to->alias_index_ref", "ccv_nnc_dynamic_graph.c" , 358, __extension__ __PRETTY_FUNCTION__); })); | |||
359 | int no_inc = 1; | |||
360 | int i; | |||
361 | for (i = 0; no_inc && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
362 | no_inc = (tensor_variable->inc[i] == 0); | |||
363 | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, no_inc ? tensor_variable->info.dim : tensor_variable->inc, tensor_variable->info, 0); | |||
364 | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); | |||
365 | return symbol; | |||
366 | } | |||
367 | ||||
368 | // Return the tensor variable that is old (the provided tensor variable will have a new setting). | |||
369 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable) | |||
370 | { | |||
371 | struct ccv_nnc_tensor_variable_s x = *tensor_variable; | |||
372 | ccv_nnc_tensor_variable_t new_variable; | |||
373 | // Need to handle alias. | |||
374 | if (x.alias_index_ref) | |||
375 | new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(x.alias_index_ref - 1))), x.ofs, x.inc, x.info); | |||
376 | else | |||
377 | new_variable = ccv_nnc_tensor_variable_new(graph, x.info)ccv_nnc_tensor_variable_new_impl(graph, x.info); | |||
378 | *tensor_variable = *new_variable; | |||
379 | *new_variable = x; | |||
380 | // The index should be the same though. | |||
381 | const int index = new_variable->index; | |||
382 | new_variable->index = tensor_variable->index; | |||
383 | if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
384 | { | |||
385 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(new_variable->symbol.d))); | |||
386 | bind->index = new_variable->index; | |||
387 | } | |||
388 | tensor_variable->index = index; | |||
389 | return new_variable; | |||
390 | } | |||
391 | ||||
392 | void ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad) | |||
393 | { | |||
394 | dynamic_graph->no_grad = no_grad; | |||
395 | } | |||
396 | ||||
397 | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type) | |||
398 | { | |||
399 | if (!graph->stream_map) | |||
400 | graph->stream_map = kh_init(stream_map)kh_init_stream_map(); | |||
401 | int ret = 0; | |||
402 | khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret)kh_put_stream_map(graph->stream_map, type, &ret); | |||
403 | assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if ( ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_nnc_dynamic_graph.c" , 403, __extension__ __PRETTY_FUNCTION__); })); | |||
404 | ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]); | |||
405 | // If ret == 0, the key already exist, we can return directly, otherwise, create and return. | |||
406 | if (ret != 0) | |||
407 | { | |||
408 | stream = ccv_nnc_stream_context_new(type); | |||
409 | kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]) = stream; | |||
410 | } | |||
411 | return stream; | |||
412 | } | |||
413 | ||||
414 | typedef struct { | |||
415 | ccv_nnc_dynamic_graph_t* graph; | |||
416 | int stream_type; | |||
417 | } ccv_nnc_dynamic_graph_neighbor_context_discovery_t; | |||
418 | ||||
419 | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context) | |||
420 | { | |||
421 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context; | |||
422 | int type = discovery->stream_type; | |||
423 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
424 | return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type); | |||
425 | } | |||
426 | ||||
427 | void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs) | |||
428 | { | |||
429 | int i, j; | |||
430 | for (i = 0; i < input_size; i++) | |||
| ||||
431 | if (inputs[i] && !inputs[i]->alias_index_ref) | |||
432 | { assert(inputs[i]->tensor_view)((void) sizeof ((inputs[i]->tensor_view) ? 1 : 0), __extension__ ({ if (inputs[i]->tensor_view) ; else __assert_fail ("inputs[i]->tensor_view" , "ccv_nnc_dynamic_graph.c", 432, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
433 | ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
434 | for (i = 0; i
| |||
435 | input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(graph, inputs[i], stream_context ) : 0; | |||
436 | ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
437 | for (i = 0; i
| |||
438 | input_symbols[i] = inputs[i] ? _ccv_nnc_tensor_symbol_from_variable(graph, inputs[i]) : NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
439 | ccv_array_t* input_sources[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
440 | ccv_array_t* input_alias_sources[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
441 | for (i = 0; i
| |||
442 | { | |||
443 | input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(input_symbols[i].d))))->sources : 0; | |||
444 | if (inputs[i] && inputs[i]->alias_index_ref) | |||
445 | { | |||
446 | const int alias_index_ref = inputs[i]->alias_index_ref - 1; | |||
447 | assert(alias_index_ref >= 0)((void) sizeof ((alias_index_ref >= 0) ? 1 : 0), __extension__ ({ if (alias_index_ref >= 0) ; else __assert_fail ("alias_index_ref >= 0" , "ccv_nnc_dynamic_graph.c", 447, __extension__ __PRETTY_FUNCTION__ ); })); | |||
448 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index_ref))); | |||
449 | input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))))->sources; | |||
450 | } else | |||
451 | input_alias_sources[i] = 0; | |||
452 | } | |||
453 | const int parallel_count = ccv_max(1, parallel)({ typeof (1) _a = (1); typeof (parallel) _b = (parallel); (_a > _b) ? _a : _b; }); | |||
454 | assert(input_size % parallel_count == 0)((void) sizeof ((input_size % parallel_count == 0) ? 1 : 0), __extension__ ({ if (input_size % parallel_count == 0) ; else __assert_fail ("input_size % parallel_count == 0", "ccv_nnc_dynamic_graph.c" , 454, __extension__ __PRETTY_FUNCTION__); })); | |||
455 | const int per_input_size = input_size / parallel_count; | |||
456 | assert(output_size % parallel_count == 0)((void) sizeof ((output_size % parallel_count == 0) ? 1 : 0), __extension__ ({ if (output_size % parallel_count == 0) ; else __assert_fail ("output_size % parallel_count == 0", "ccv_nnc_dynamic_graph.c" , 456, __extension__ __PRETTY_FUNCTION__); })); | |||
457 | const int per_output_size = output_size / parallel_count; | |||
458 | int output_auto = 0; | |||
459 | for (i = 0; !output_auto
| |||
460 | output_auto = outputs[i] ? ccv_nnc_is_tensor_auto(outputs[i]->info) : 0; | |||
461 | // One extra step, infer the parameters for outputs. | |||
462 | if (output_auto
| |||
463 | { | |||
464 | ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)({ typeof (1) _a = (1); typeof (per_input_size) _b = (per_input_size ); (_a > _b) ? _a : _b; })]; | |||
465 | ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
466 | for (i = 0; i < parallel_count; i++) | |||
467 | { | |||
468 | for (j = 0; j < per_input_size; j++) | |||
469 | input_params[j] = inputs[j + i * per_input_size] ? inputs[j + i * per_input_size]->info : ccv_nnc_tensor_auto; | |||
470 | for (j = 0; j < per_output_size; j++) | |||
471 | output_params[j] = outputs[j + i * per_output_size] ? outputs[j + i * per_output_size]->info : ccv_nnc_tensor_auto; | |||
472 | ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size); | |||
473 | for (j = 0; j < per_output_size; j++) | |||
474 | if (outputs[j + i * per_output_size]) | |||
475 | outputs[j + i * per_output_size]->info = output_params[j]; | |||
476 | } | |||
477 | } | |||
478 | int freeable_size = 0; | |||
479 | ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)({ typeof (1) _a = (1); typeof (output_size) _b = (output_size ); (_a > _b) ? _a : _b; })]; | |||
480 | // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee. | |||
481 | for (i = 0; i
| |||
482 | { | |||
483 | // First, go over to see whether there is enforce inplace. | |||
484 | int enforce_idx = -1; | |||
485 | for (j = 0; enforce_idx < 0 && j < input_size; j++) | |||
486 | if (inputs[j] && ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)) | |||
487 | enforce_idx = j; | |||
488 | if (enforce_idx >= 0) | |||
489 | { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) ? 1 : 0 ), __extension__ ({ if (outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) ; else __assert_fail ("outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL" , "ccv_nnc_dynamic_graph.c", 489, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
490 | // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic. | |||
491 | if (outputs[i] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
492 | { | |||
493 | const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[i]->symbol.d))); | |||
494 | if (enforce_idx >= 0) | |||
495 | { assert(!bind->destinations || bind->destinations->rnum == 0)((void) sizeof ((!bind->destinations || bind->destinations ->rnum == 0) ? 1 : 0), __extension__ ({ if (!bind->destinations || bind->destinations->rnum == 0) ; else __assert_fail ("!bind->destinations || bind->destinations->rnum == 0" , "ccv_nnc_dynamic_graph.c", 495, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
496 | if (bind->sources && bind->sources->rnum > 0) | |||
497 | { | |||
498 | const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]); | |||
499 | // If this is enforce output, make sure the tensor view is taken by the output. | |||
500 | if (enforce_idx >= 0) | |||
501 | { | |||
502 | outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output. | |||
503 | old_var->tensor_view = 0; | |||
504 | } | |||
505 | } | |||
506 | } | |||
507 | } | |||
508 | ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
509 | if (parallel_count > 1) | |||
510 | { | |||
511 | const int max_device_id_size = per_input_size + per_output_size; | |||
512 | assert(max_device_id_size > 0)((void) sizeof ((max_device_id_size > 0) ? 1 : 0), __extension__ ({ if (max_device_id_size > 0) ; else __assert_fail ("max_device_id_size > 0" , "ccv_nnc_dynamic_graph.c", 512, __extension__ __PRETTY_FUNCTION__ ); })); | |||
513 | int device_ids[max_device_id_size]; | |||
514 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
515 | ccv_nnc_stream_signal_t* signal; | |||
516 | if (stream_context) | |||
517 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
518 | for (i = 0; i
| |||
519 | { | |||
520 | int flag = 0; | |||
521 | for (j = 0; !flag
| |||
522 | if (input_tensors[i * per_input_size + j]) | |||
| ||||
523 | flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type)((input_tensors[i * per_input_size + j]->info.type) & 0x3 ) == CCV_TENSOR_GPU_MEMORY); | |||
524 | for (j = 0; j < per_output_size; j++) | |||
525 | { | |||
526 | output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context)ccv_nnc_tensor_from_variable_impl(graph, outputs[j + i * per_output_size ], stream_context) : 0; | |||
527 | if (output_tensors[j] && !flag) | |||
528 | flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type)((output_tensors[j]->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY); | |||
529 | } | |||
530 | const int stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
531 | const int tensor_type = flag ? CCV_TENSOR_GPU_MEMORY : CCV_TENSOR_CPU_MEMORY; | |||
532 | const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size); | |||
533 | ccv_nnc_stream_context_t* stream_0 = 0; | |||
534 | for (j = 0; j < device_id_size; j++) | |||
535 | { | |||
536 | int type = stream_type; | |||
537 | CCV_STREAM_SET_DEVICE_ID(type, device_ids[j])(type) = (((type) & ~0xfff00) | (((device_ids[j]) & 0xfff ) << 8)); | |||
538 | ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type); | |||
539 | if (!stream_0) | |||
540 | stream_0 = stream; | |||
541 | } | |||
542 | // Wait signal to finish. | |||
543 | if (stream_context) | |||
544 | { | |||
545 | if (stream_0) | |||
546 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
547 | else | |||
548 | ccv_nnc_stream_context_wait(stream_context); | |||
549 | } | |||
550 | if (stream_0) | |||
551 | { | |||
552 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = { | |||
553 | .graph = graph, | |||
554 | .stream_type = stream_type | |||
555 | }; | |||
556 | ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery); | |||
557 | } | |||
558 | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size , per_output_size); fflush(stdout); } } while (0); | |||
559 | int k; | |||
560 | for (k = 0; k < per_input_size; k++) | |||
561 | { | |||
562 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? CCV_TENSOR_GET_DEVICE_ID(input_tensors[k + i * per_input_size]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", k + 1, input_tensors[k + i * per_input_size ], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? (((input_tensors[k + i * per_input_size]-> info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | |||
563 | if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
564 | ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]); | |||
565 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
566 | } | |||
567 | ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0); | |||
568 | for (k = 0; k < per_output_size; k++) | |||
569 | { | |||
570 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors [k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? (((output_tensors[k]->info.type) & 0xfff00) >> 8 ) : -1)); fflush(stdout); } } while (0); | |||
571 | if (output_tensors[k] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
572 | ccv_nnc_print_tensor_info(output_tensors[k]); | |||
573 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
574 | } | |||
575 | if (stream_context && stream_0) | |||
576 | { | |||
577 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
578 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
579 | } | |||
580 | streams[i] = stream_0; | |||
581 | } | |||
582 | if (!stream_context) | |||
583 | for (i = 0; i < parallel_count; i++) | |||
584 | if (streams[i]) | |||
585 | ccv_nnc_stream_context_wait(streams[i]); | |||
586 | } else { | |||
587 | for (i = 0; i < per_output_size; i++) | |||
588 | output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(graph, outputs[i], stream_context ) : 0; | |||
589 | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size , per_output_size); fflush(stdout); } } while (0); | |||
590 | for (i = 0; i < per_input_size; i++) | |||
591 | { | |||
592 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? CCV_TENSOR_GET_DEVICE_ID(input_tensors[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", i + 1, input_tensors[i], (input_tensors [i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? ( ((input_tensors[i]->info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | |||
593 | if (input_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
594 | ccv_nnc_print_tensor_info(input_tensors[i]); | |||
595 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
596 | } | |||
597 | ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context); | |||
598 | for (i = 0; i < per_output_size; i++) | |||
599 | { | |||
600 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", i + 1, output_tensors[i], (output_tensors [i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? (((output_tensors[i]->info.type) & 0xfff00) >> 8 ) : -1)); fflush(stdout); } } while (0); | |||
601 | if (output_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
602 | ccv_nnc_print_tensor_info(output_tensors[i]); | |||
603 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
604 | } | |||
605 | } | |||
606 | int inputs_are_constants = 1; | |||
607 | for (i = 0; inputs_are_constants && i < input_size; i++) | |||
608 | if (inputs[i] && inputs[i]->type != CCV_NNC_TENSOR_CONSTANT) | |||
609 | inputs_are_constants = 0; | |||
610 | if (input_size > 0 && !inputs_are_constants && !graph->no_grad) // No need to record the execution if there is no input or we disabled gradient computation. | |||
611 | { | |||
612 | ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)({ typeof (1) _a = (1); typeof (output_size) _b = (output_size ); (_a > _b) ? _a : _b; })]; | |||
613 | for (i = 0; i < output_size; i++) | |||
614 | if (outputs[i]) | |||
615 | { | |||
616 | assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT)((void) sizeof ((outputs[i]->type != CCV_NNC_TENSOR_CONSTANT ) ? 1 : 0), __extension__ ({ if (outputs[i]->type != CCV_NNC_TENSOR_CONSTANT ) ; else __assert_fail ("outputs[i]->type != CCV_NNC_TENSOR_CONSTANT" , "ccv_nnc_dynamic_graph.c", 616, __extension__ __PRETTY_FUNCTION__ ); })); | |||
617 | output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]); | |||
618 | } else | |||
619 | output_symbols[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
620 | int t; | |||
621 | for (t = 0; t < parallel_count; t++) | |||
622 | { | |||
623 | ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0); | |||
624 | if (graph_execs) | |||
625 | graph_execs[t] = graph_exec; | |||
626 | // This needs to be done before we set the new sources on the outputs. | |||
627 | for (i = 0; i < per_input_size; i++) | |||
628 | { | |||
629 | ccv_array_t* const input_source = input_sources[i + t * per_input_size]; | |||
630 | if (input_source) | |||
631 | for (j = 0; j < input_source->rnum; j++) | |||
632 | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ | |||
633 | .d = *(int*)ccv_array_get(input_source, j)((void*)(((char*)((input_source)->data)) + (size_t)(input_source )->rsize * (size_t)(j))), | |||
634 | .graph = graph->tape | |||
635 | }, graph_exec); | |||
636 | ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size]; | |||
637 | if (input_alias_source) | |||
638 | for (j = 0; j < input_alias_source->rnum; j++) | |||
639 | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ | |||
640 | .d = *(int*)ccv_array_get(input_alias_source, j)((void*)(((char*)((input_alias_source)->data)) + (size_t)( input_alias_source)->rsize * (size_t)(j))), | |||
641 | .graph = graph->tape | |||
642 | }, graph_exec); | |||
643 | } | |||
644 | for (i = 0; i < per_input_size; i++) | |||
645 | { | |||
646 | ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size]; | |||
647 | if (!input || input->type == CCV_NNC_TENSOR_CONSTANT) | |||
648 | continue; | |||
649 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(input_symbols[i + t * per_input_size ].d))); | |||
650 | if (!bind->destinations) | |||
651 | bind->destinations = ccv_array_new(sizeof(int), 1, 0); | |||
652 | ccv_array_add_unique_int(bind->destinations, graph_exec.d); | |||
653 | if (input->alias_index_ref) | |||
654 | { | |||
655 | const int alias_index = input->alias_index_ref - 1; | |||
656 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 656, __extension__ __PRETTY_FUNCTION__ ); })); | |||
657 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
658 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))); | |||
659 | if (!root_bind->destinations) | |||
660 | root_bind->destinations = ccv_array_new(sizeof(int), 1, 0); | |||
661 | ccv_array_add_unique_int(root_bind->destinations, graph_exec.d); | |||
662 | } | |||
663 | } | |||
664 | for (i = 0; i < per_output_size; i++) | |||
665 | { | |||
666 | ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size]; | |||
667 | if (!output) | |||
668 | continue; | |||
669 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(output_symbols[i + t * per_output_size ].d))); | |||
670 | assert(!bind->sources)((void) sizeof ((!bind->sources) ? 1 : 0), __extension__ ( { if (!bind->sources) ; else __assert_fail ("!bind->sources" , "ccv_nnc_dynamic_graph.c", 670, __extension__ __PRETTY_FUNCTION__ ); })); // This is a new symbol, therefore, no binded sources associated yet. | |||
671 | bind->sources = ccv_array_new(sizeof(int), 1, 0); | |||
672 | ccv_array_add_unique_int(bind->sources, graph_exec.d); | |||
673 | if (output->alias_index_ref) | |||
674 | { | |||
675 | const int alias_index = output->alias_index_ref - 1; | |||
676 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 676, __extension__ __PRETTY_FUNCTION__ ); })); | |||
677 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
678 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))); | |||
679 | if (!root_bind->sources) | |||
680 | root_bind->sources = ccv_array_new(sizeof(int), 1, 0); | |||
681 | ccv_array_add_unique_int(root_bind->sources, graph_exec.d); | |||
682 | } | |||
683 | } | |||
684 | } | |||
685 | } | |||
686 | // Now, able to free some of the reused outputs. | |||
687 | for (i = 0; i < freeable_size; i++) | |||
688 | ccv_nnc_tensor_variable_free(graph, freeables[i]); | |||
689 | } | |||
690 | ||||
691 | int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context) | |||
692 | { | |||
693 | ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0); | |||
694 | return CCV_NNC_EXEC_SUCCESS; | |||
695 | } | |||
696 | ||||
697 | static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d) | |||
698 | { | |||
699 | if (bind->alias_ref) | |||
700 | bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(bind->alias_ref - 1))); | |||
701 | if (!bind->sources || bind->sources->rnum == 0) | |||
702 | return 1; | |||
703 | int i; | |||
704 | for (i = 0; i < bind->sources->rnum; i++) | |||
705 | { | |||
706 | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | |||
707 | const ccv_nnc_graph_exec_symbol_t exec_symbol = { | |||
708 | .d = exec_symbol_d, | |||
709 | .graph = graph->tape | |||
710 | }; | |||
711 | const int* outputs; int output_size; | |||
712 | ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size); | |||
713 | int j; | |||
714 | for (j = 0; j < output_size; j++) | |||
715 | if (outputs[j] >= 0 && outputs[j] != symbol_d) // If output is me, it is the only output. | |||
716 | { | |||
717 | assert(outputs[j] < graph->binds->rnum)((void) sizeof ((outputs[j] < graph->binds->rnum) ? 1 : 0), __extension__ ({ if (outputs[j] < graph->binds-> rnum) ; else __assert_fail ("outputs[j] < graph->binds->rnum" , "ccv_nnc_dynamic_graph.c", 717, __extension__ __PRETTY_FUNCTION__ ); })); | |||
718 | const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | |||
719 | // This is in use and is it not a constant symbol. | |||
720 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
721 | return 0; | |||
722 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
723 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
724 | if (other_bind->destinations && other_bind->destinations->rnum > 0) | |||
725 | return 0; | |||
726 | } | |||
727 | } | |||
728 | return 1; | |||
729 | } | |||
730 | ||||
731 | static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) | |||
732 | { | |||
733 | int i; | |||
734 | if (bind->destinations) | |||
735 | { | |||
736 | int flag = 0; | |||
737 | for (i = 0; !flag && i < bind->destinations->rnum; i++) | |||
738 | { | |||
739 | const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(i))); | |||
740 | if (exec_symbol_d == freed_exec_symbol_d) | |||
741 | { | |||
742 | if (i < bind->destinations->rnum - 1) | |||
743 | *(int*)ccv_array_get(bind->destinations, i)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(i))) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(bind->destinations ->rnum - 1))); | |||
744 | --bind->destinations->rnum; | |||
745 | flag = 1; | |||
746 | } | |||
747 | } | |||
748 | // This symbol can be freed. | |||
749 | if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) | |||
750 | { | |||
751 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | |||
752 | if (bind->alias_ref) | |||
753 | { | |||
754 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(bind->alias_ref - 1))); | |||
755 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
756 | root_bind = bind; | |||
757 | } | |||
758 | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. | |||
759 | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. | |||
760 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && | |||
761 | ((!root_bind->sources || root_bind->sources->rnum == 0) || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | |||
762 | root_bind->destinations->rnum == 0) | |||
763 | { | |||
764 | if (root_bind->sources) | |||
765 | for (i = 0; i < root_bind->sources->rnum; i++) | |||
766 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | |||
767 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
768 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
769 | .d = tensor_index, | |||
770 | .graph = graph->tape | |||
771 | }); | |||
772 | } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. | |||
773 | bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) { | |||
774 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
775 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
776 | .d = tensor_index, | |||
777 | .graph = graph->tape | |||
778 | }); | |||
779 | } | |||
780 | } | |||
781 | } | |||
782 | } | |||
783 | ||||
784 | static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) | |||
785 | { | |||
786 | int i; | |||
787 | if (bind->sources) | |||
788 | { | |||
789 | int flag = 0; | |||
790 | for (i = 0; !flag && i < bind->sources->rnum; i++) | |||
791 | { | |||
792 | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | |||
793 | if (exec_symbol_d == freed_exec_symbol_d) | |||
794 | { | |||
795 | if (i < bind->sources->rnum - 1) | |||
796 | *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(bind->sources->rnum - 1))); | |||
797 | --bind->sources->rnum; | |||
798 | flag = 1; | |||
799 | } | |||
800 | } | |||
801 | // This symbol can be freed. | |||
802 | if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) | |||
803 | { | |||
804 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | |||
805 | if (bind->alias_ref) | |||
806 | { | |||
807 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(bind->alias_ref - 1))); | |||
808 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
809 | root_bind = bind; | |||
810 | } | |||
811 | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. | |||
812 | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. | |||
813 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && | |||
814 | (root_bind->sources->rnum == 0 || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | |||
815 | (!root_bind->destinations || root_bind->destinations->rnum == 0)) | |||
816 | { | |||
817 | for (i = 0; i < root_bind->sources->rnum; i++) | |||
818 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | |||
819 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
820 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
821 | .d = tensor_index, | |||
822 | .graph = graph->tape | |||
823 | }); | |||
824 | } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. | |||
825 | bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) { | |||
826 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
827 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
828 | .d = tensor_index, | |||
829 | .graph = graph->tape | |||
830 | }); | |||
831 | } | |||
832 | } | |||
833 | } | |||
834 | } | |||
835 | ||||
836 | static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws) | |||
837 | { | |||
838 | int i; | |||
839 | for (i = 0; i < input_size; i++) | |||
840 | if (inputs[i] >= 0 && inputs[i] < binds->rnum) | |||
841 | { | |||
842 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i])((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(inputs[i]))); | |||
843 | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
844 | continue; | |||
845 | if (bind->alias_ref) | |||
846 | { | |||
847 | const int alias_to = bind->alias_ref - 1; | |||
848 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(alias_to))); | |||
849 | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) | |||
850 | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); | |||
851 | } | |||
852 | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws); | |||
853 | } | |||
854 | // Note that this works because there is no overlap of inputs / outputs. (What about alias?). | |||
855 | for (i = 0; i < output_size; i++) | |||
856 | if (outputs[i] >= 0 && outputs[i] < binds->rnum) | |||
857 | { | |||
858 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i])((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(outputs[i]))); | |||
859 | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
860 | continue; | |||
861 | if (bind->alias_ref) | |||
862 | { | |||
863 | const int alias_to = bind->alias_ref - 1; | |||
864 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(alias_to))); | |||
865 | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) | |||
866 | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); | |||
867 | } | |||
868 | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws); | |||
869 | } | |||
870 | } | |||
871 | ||||
872 | static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol) | |||
873 | { | |||
874 | if (!graph->stateful_execs) | |||
875 | return; | |||
876 | assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({ if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0" , "ccv_nnc_dynamic_graph.c", 876, __extension__ __PRETTY_FUNCTION__ ); })); | |||
877 | ccv_array_t* const stateful_execs = graph->stateful_execs; | |||
878 | ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol); | |||
879 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
880 | if (!stateful_exec) | |||
881 | return; | |||
882 | // If there is no backward, no need to apply gradients. | |||
883 | // Otherwise, if we applied gradients, we can free it as well. | |||
884 | // We don't free this stateful exec because apply gradients doesn't require any variables alive. | |||
885 | if (!stateful_exec->did_backward_but_not_apply_gradients) | |||
886 | { | |||
887 | const int index = stateful_exec->index; | |||
888 | ccfreefree(stateful_exec); | |||
889 | if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0) | |||
890 | graph->reuse_stateful_exec = index; | |||
891 | *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index)((void*)(((char*)((stateful_execs)->data)) + (size_t)(stateful_execs )->rsize * (size_t)(index))) = 0; | |||
892 | } else | |||
893 | stateful_exec->should_free = 1; | |||
894 | } | |||
895 | ||||
896 | void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
897 | { | |||
898 | // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output. | |||
899 | if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
900 | { | |||
901 | // If it is not a free variable, when can we free the symbol and the underlying variable? | |||
902 | // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one; | |||
903 | // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well. | |||
904 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(tensor_variable->symbol.d ))); | |||
905 | // There should be no source associated with it no more. | |||
906 | int free_symbol = 0; | |||
907 | // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to | |||
908 | // compute gradient because I am the only variable it can compute gradient for). | |||
909 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | |||
910 | if (bind->alias_ref) | |||
911 | { | |||
912 | const int alias_to = bind->alias_ref - 1; | |||
913 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(alias_to))); | |||
914 | } | |||
915 | const int sources_and_is_only_output = (root_bind->sources && root_bind->sources->rnum > 0) && _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d); | |||
916 | if (!root_bind->sources || root_bind->sources->rnum == 0 || sources_and_is_only_output) | |||
917 | { | |||
918 | int i, j; | |||
919 | free_symbol = 1; // Assume we can free this symbol. | |||
920 | if (!graph->ws) | |||
921 | graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? root_bind->destinations->rnum : 0, 0); | |||
922 | ccv_array_t* const ws = graph->ws; | |||
923 | ccv_array_clear(ws); | |||
924 | if (root_bind->destinations) | |||
925 | for (i = 0; i < root_bind->destinations->rnum; i++) | |||
926 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i)((void*)(((char*)((root_bind->destinations)->data)) + ( size_t)(root_bind->destinations)->rsize * (size_t)(i)))); | |||
927 | const int ws_init_size = ws->rnum; | |||
928 | // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free. | |||
929 | if (root_bind->sources) | |||
930 | for (i = 0; i < root_bind->sources->rnum; i++) | |||
931 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | |||
932 | // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want | |||
933 | // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol. | |||
934 | if (ws_init_size == 0) | |||
935 | free_symbol = (!bind->alias_ref || root_bind->index < 0); | |||
936 | // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources. | |||
937 | for (i = 0; i < ws_init_size; i++) | |||
938 | { | |||
939 | const int exec_symbol_d = *(int*)ccv_array_get(ws, i)((void*)(((char*)((ws)->data)) + (size_t)(ws)->rsize * ( size_t)(i))); | |||
940 | const ccv_nnc_graph_exec_symbol_t symbol = { | |||
941 | .d = exec_symbol_d, | |||
942 | .graph = graph->tape | |||
943 | }; | |||
944 | const int* inputs; int input_size; | |||
945 | const int* outputs; int output_size; | |||
946 | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); | |||
947 | int flag = 0; // flag denotes whether there are cases to keep this exec symbol. | |||
948 | if (sources_and_is_only_output) | |||
949 | { | |||
950 | // If there are sources, check whether we have outputs or not. If we do, we cannot free this. | |||
951 | for (j = 0; !flag && j < output_size; j++) | |||
952 | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum) | |||
953 | { | |||
954 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | |||
955 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
956 | flag = 1; | |||
957 | else { | |||
958 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
959 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
960 | flag = (other_bind->destinations && other_bind->destinations->rnum > 0); | |||
961 | } | |||
962 | } | |||
963 | } else { | |||
964 | // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this. | |||
965 | for (j = 0; !flag && j < input_size; j++) | |||
966 | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d) | |||
967 | { | |||
968 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(inputs[j]))); | |||
969 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
970 | flag = 1; | |||
971 | else { | |||
972 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
973 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
974 | flag = (other_bind->sources && other_bind->sources->rnum > 0); | |||
975 | } | |||
976 | } | |||
977 | } | |||
978 | // This exec can be freed if there is no input required or there is no output required. | |||
979 | free_symbol = (free_symbol && !flag); | |||
980 | if (!flag) | |||
981 | { | |||
982 | // Go over inputs and remove all references from binded destinations. | |||
983 | // and go over outputs remove all references from binded sources. | |||
984 | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); | |||
985 | const int* outgoings; int outgoing_size; | |||
986 | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); | |||
987 | for (j = 0; j < outgoing_size; j++) | |||
988 | ccv_array_add_unique_int(ws, outgoings[j]); | |||
989 | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); | |||
990 | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); | |||
991 | } | |||
992 | } | |||
993 | if (free_symbol) | |||
994 | { | |||
995 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
996 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | |||
997 | // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over. | |||
998 | for (i = ws_init_size; i < ws->rnum; i++) | |||
999 | { | |||
1000 | const int exec_symbol_d = *(int*)ccv_array_get(ws, i)((void*)(((char*)((ws)->data)) + (size_t)(ws)->rsize * ( size_t)(i))); | |||
1001 | const ccv_nnc_graph_exec_symbol_t symbol = { | |||
1002 | .d = exec_symbol_d, | |||
1003 | .graph = graph->tape | |||
1004 | }; | |||
1005 | const int* inputs; int input_size; | |||
1006 | const int* outputs; int output_size; | |||
1007 | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); | |||
1008 | int flag = 0; | |||
1009 | for (j = 0; !flag && j < input_size; j++) | |||
1010 | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum) | |||
1011 | { | |||
1012 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(inputs[j]))); | |||
1013 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
1014 | flag = 1; | |||
1015 | else { | |||
1016 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
1017 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
1018 | flag = (other_bind->sources && other_bind->sources->rnum > 0); | |||
1019 | } | |||
1020 | } | |||
1021 | if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done. | |||
1022 | { | |||
1023 | int output_flag = 0; | |||
1024 | for (j = 0; !output_flag && j < output_size; j++) | |||
1025 | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum) | |||
1026 | { | |||
1027 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | |||
1028 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
1029 | output_flag = 1; | |||
1030 | else { | |||
1031 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
1032 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
1033 | output_flag = (other_bind->destinations && other_bind->destinations->rnum > 0); | |||
1034 | } | |||
1035 | } | |||
1036 | if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination). | |||
1037 | flag = 0; | |||
1038 | } | |||
1039 | // Went over all the inputs, it turns out no more inputs has other references, safe to remove. | |||
1040 | if (!flag) | |||
1041 | { | |||
1042 | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); | |||
1043 | const int* outgoings; int outgoing_size; | |||
1044 | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); | |||
1045 | // It it has outgoings, add that for further inspection. | |||
1046 | for (j = 0; j < outgoing_size; j++) | |||
1047 | ccv_array_add_unique_int(ws, outgoings[j]); | |||
1048 | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); | |||
1049 | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); | |||
1050 | } | |||
1051 | } | |||
1052 | } | |||
1053 | } | |||
1054 | // If this symbol is not freed, move the tensor view to the bind. | |||
1055 | if (!free_symbol) | |||
1056 | { | |||
1057 | // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias | |||
1058 | // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this | |||
1059 | // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the | |||
1060 | // alias in that process. | |||
1061 | if (bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) | |||
1062 | { | |||
1063 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
1064 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | |||
1065 | } else { | |||
1066 | bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists. | |||
1067 | bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback. | |||
1068 | bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context. | |||
1069 | bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind. | |||
1070 | tensor_variable->tensor_view = 0; | |||
1071 | } | |||
1072 | } | |||
1073 | } | |||
1074 | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1); | |||
1075 | } | |||
1076 | ||||
1077 | void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask) | |||
1078 | { | |||
1079 | int i, j; | |||
1080 | ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0); | |||
1081 | for (i = 0; i < source_variable_size; i++) | |||
1082 | { | |||
1083 | if (source_variables[i]->symbol.d < 0) | |||
1084 | continue; | |||
1085 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(source_variables[i]->symbol .d))); | |||
1086 | if (bind->destinations && bind->destinations->rnum > 0) | |||
1087 | for (j = 0; j < bind->destinations->rnum; j++) | |||
1088 | { | |||
1089 | // It is ok to have duplicate symbols. | |||
1090 | const int d = *(int*)ccv_array_get(bind->destinations, j)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(j))); | |||
1091 | ccv_nnc_graph_exec_symbol_t symbol = { | |||
1092 | .d = d, | |||
1093 | .graph = graph->tape | |||
1094 | }; | |||
1095 | ccv_array_push(sources_destinations, &symbol); | |||
1096 | } | |||
1097 | } | |||
1098 | const int source_size = sources_destinations->rnum; | |||
1099 | for (i = 0; i < destination_variable_size; i++) | |||
1100 | { | |||
1101 | if (destination_variables[i]->symbol.d < 0) | |||
1102 | continue; | |||
1103 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(destination_variables[i]-> symbol.d))); | |||
1104 | if (bind->sources && bind->sources->rnum > 0) | |||
1105 | for (j = 0; j < bind->sources->rnum; j++) | |||
1106 | { | |||
1107 | // It is ok to have duplicate symbols. | |||
1108 | const int d = *(int*)ccv_array_get(bind->sources, j)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(j))); | |||
1109 | ccv_nnc_graph_exec_symbol_t symbol = { | |||
1110 | .d = d, | |||
1111 | .graph = graph->tape | |||
1112 | }; | |||
1113 | ccv_array_push(sources_destinations, &symbol); | |||
1114 | } | |||
1115 | } | |||
1116 | const int destination_size = sources_destinations->rnum - source_size; | |||
1117 | if (source_size == 0 || destination_size == 0) | |||
1118 | { | |||
1119 | ccv_array_free(sources_destinations); | |||
1120 | return; | |||
1121 | } | |||
1122 | const int bitmask_size = ((source_size + 63) >> 6); | |||
1123 | assert(bitmask_size < 256)((void) sizeof ((bitmask_size < 256) ? 1 : 0), __extension__ ({ if (bitmask_size < 256) ; else __assert_fail ("bitmask_size < 256" , "ccv_nnc_dynamic_graph.c", 1123, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1124 | uint64_t exec_bitmask[bitmask_size]; | |||
1125 | ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0)((void*)(((char*)((sources_destinations)->data)) + (size_t )(sources_destinations)->rsize * (size_t)(0))), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size)((void*)(((char*)((sources_destinations)->data)) + (size_t )(sources_destinations)->rsize * (size_t)(source_size))), destination_size, exec_bitmask); | |||
1126 | int k = 0; | |||
1127 | for (i = 0; i < source_variable_size; i++) | |||
1128 | { | |||
1129 | if (source_variables[i]->symbol.d < 0) | |||
1130 | { | |||
1131 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
1132 | continue; | |||
1133 | } | |||
1134 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(source_variables[i]->symbol .d))); | |||
1135 | int flag = 0; | |||
1136 | if (bind->destinations && bind->destinations->rnum > 0) | |||
1137 | { | |||
1138 | assert(k <= source_size - bind->destinations->rnum)((void) sizeof ((k <= source_size - bind->destinations-> rnum) ? 1 : 0), __extension__ ({ if (k <= source_size - bind ->destinations->rnum) ; else __assert_fail ("k <= source_size - bind->destinations->rnum" , "ccv_nnc_dynamic_graph.c", 1138, __extension__ __PRETTY_FUNCTION__ ); })); | |||
1139 | for (j = 0; !flag && j < bind->destinations->rnum; j++) | |||
1140 | flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]); | |||
1141 | k += bind->destinations->rnum; | |||
1142 | } | |||
1143 | if (flag) | |||
1144 | bitmask[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
1145 | else | |||
1146 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
1147 | } | |||
1148 | ccv_array_free(sources_destinations); | |||
1149 | } | |||
1150 | ||||
1151 | int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type) | |||
1152 | { | |||
1153 | return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type); | |||
1154 | } | |||
1155 | ||||
1156 | void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out) | |||
1157 | { | |||
1158 | ccv_nnc_symbolic_graph_dot(graph->tape, flags, out); | |||
1159 | } |