File: | nnc/ccv_nnc_graph.c |
Warning: | line 1258, column 41 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #include "ccv_internal.h" | |||
5 | #include "_ccv_nnc_graph.h" | |||
6 | ||||
7 | // MARK - Level-2 API | |||
8 | ||||
9 | ccv_nnc_graph_t* ccv_nnc_graph_new(void) | |||
10 | { | |||
11 | ccv_nnc_graph_t* graph = (ccv_nnc_graph_t*)cccalloccalloc(1, sizeof(ccv_nnc_graph_t)); | |||
12 | graph->exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), 5, 0); | |||
13 | return graph; | |||
14 | } | |||
15 | ||||
16 | void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size) | |||
17 | { | |||
18 | if (!graph->sources) | |||
19 | graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), source_size, 0); | |||
20 | else | |||
21 | ccv_array_clear(graph->sources); | |||
22 | int i; | |||
23 | for (i = 0; i < source_size; i++) | |||
24 | ccv_array_push(graph->sources, sources + i); | |||
25 | graph->topsorted = 0; | |||
26 | } | |||
27 | ||||
28 | ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph) | |||
29 | { | |||
30 | return graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(0))) : 0; | |||
31 | } | |||
32 | ||||
33 | int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph) | |||
34 | { | |||
35 | return graph->sources ? graph->sources->rnum : 0; | |||
36 | } | |||
37 | ||||
38 | void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size) | |||
39 | { | |||
40 | if (!graph->destinations) | |||
41 | graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), destination_size, 0); | |||
42 | else | |||
43 | ccv_array_clear(graph->sources); | |||
44 | int i; | |||
45 | for (i = 0; i < destination_size; i++) | |||
46 | ccv_array_push(graph->destinations, destinations + i); | |||
47 | graph->topsorted = 0; | |||
48 | } | |||
49 | ||||
50 | ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph) | |||
51 | { | |||
52 | return graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))) : 0; | |||
53 | } | |||
54 | ||||
55 | int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph) | |||
56 | { | |||
57 | return graph->destinations ? graph->destinations->rnum : 0; | |||
58 | } | |||
59 | ||||
60 | void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd) | |||
61 | { | |||
62 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 62, __extension__ __PRETTY_FUNCTION__); } )); | |||
63 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 63, __extension__ __PRETTY_FUNCTION__); } )); | |||
64 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
65 | exec_info->cmd = cmd; | |||
66 | } | |||
67 | ||||
68 | ccv_nnc_cmd_t ccv_nnc_graph_exec_cmd(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec) | |||
69 | { | |||
70 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 70, __extension__ __PRETTY_FUNCTION__); } )); | |||
71 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 71, __extension__ __PRETTY_FUNCTION__); } )); | |||
72 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
73 | return exec_info->cmd; | |||
74 | } | |||
75 | ||||
76 | void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint) | |||
77 | { | |||
78 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 78, __extension__ __PRETTY_FUNCTION__); } )); | |||
79 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 79, __extension__ __PRETTY_FUNCTION__); } )); | |||
80 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
81 | exec_info->hint = hint; | |||
82 | } | |||
83 | ||||
84 | static int _ccv_nnc_tensor_multiview_level_count(const ccv_nnc_tensor_multiview_t* const mv) | |||
85 | { | |||
86 | if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) | |||
87 | return 1; | |||
88 | const int count = mv->kind + mv->repeat; | |||
89 | int i, c = 0; | |||
90 | for (i = 0; i < count; i++) | |||
91 | { | |||
92 | ccv_nnc_tensor_t* tv = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i]; | |||
93 | if (tv == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10))) | |||
94 | c = ccv_max(c, 1)({ typeof (c) _a = (c); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
95 | else | |||
96 | c = ccv_max(c, _ccv_nnc_tensor_multiview_level_count((ccv_nnc_tensor_multiview_t*)tv))({ typeof (c) _a = (c); typeof (_ccv_nnc_tensor_multiview_level_count ((ccv_nnc_tensor_multiview_t*)tv)) _b = (_ccv_nnc_tensor_multiview_level_count ((ccv_nnc_tensor_multiview_t*)tv)); (_a > _b) ? _a : _b; } ); | |||
97 | } | |||
98 | return c + 1; | |||
99 | } | |||
100 | ||||
101 | static ccv_nnc_graph_tensor_wrap_t* _ccv_nnc_graph_tensor_wrap_new(const ccv_nnc_tensor_multiview_t* const mv) | |||
102 | { | |||
103 | const int level_count = _ccv_nnc_tensor_multiview_level_count(mv); | |||
104 | ccv_nnc_graph_tensor_wrap_t* tensor_wrap = (ccv_nnc_graph_tensor_wrap_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_tensor_wrap_t) + sizeof(ccv_nnc_tensor_t*) * (level_count - 1)); | |||
105 | tensor_wrap->update_required = 0; | |||
106 | tensor_wrap->count = level_count; | |||
107 | tensor_wrap->index = 0; | |||
108 | tensor_wrap->tensors[0] = (ccv_nnc_tensor_t*)mv; | |||
109 | return tensor_wrap; | |||
110 | } | |||
111 | ||||
112 | static void _ccv_nnc_graph_exec_rewind(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph) | |||
113 | { | |||
114 | if (!info->tensor_wraps_ref) | |||
115 | return; | |||
116 | int i; | |||
117 | assert(info->tensor_wraps_ref <= graph->tensor_wraps->rnum)((void) sizeof ((info->tensor_wraps_ref <= graph->tensor_wraps ->rnum) ? 1 : 0), __extension__ ({ if (info->tensor_wraps_ref <= graph->tensor_wraps->rnum) ; else __assert_fail ( "info->tensor_wraps_ref <= graph->tensor_wraps->rnum" , "ccv_nnc_graph.c", 117, __extension__ __PRETTY_FUNCTION__); })); | |||
118 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref - 1)));; | |||
119 | // Rewind from tensor wraps. | |||
120 | for (i = 0; i < info->input_size; i++) | |||
121 | if (tensor_wrap_array->tensor_wraps[i]) | |||
122 | info->inputs[i] = tensor_wrap_array->tensor_wraps[i]->tensors[0]; | |||
123 | const int d = info->input_size; | |||
124 | for (i = 0; i < info->output_size; i++) | |||
125 | if (tensor_wrap_array->tensor_wraps[d + i]) | |||
126 | info->outputs[i] = tensor_wrap_array->tensor_wraps[d + i]->tensors[0]; | |||
127 | const int dd = info->input_size + info->output_size; | |||
128 | for (i = 0; i < info->update_size; i++) | |||
129 | if (tensor_wrap_array->tensor_wraps[dd + i]) | |||
130 | info->updates[i] = tensor_wrap_array->tensor_wraps[dd + i]->tensors[0]; | |||
131 | } | |||
132 | ||||
133 | static void _ccv_nnc_graph_tensor_wrap_free(ccv_nnc_graph_tensor_wrap_t* const tensor_wrap) | |||
134 | { | |||
135 | ccfreefree(tensor_wrap); | |||
136 | } | |||
137 | ||||
138 | ccv_nnc_graph_tensor_wrap_array_t* ccv_nnc_get_tensor_wrap_array(ccv_nnc_graph_t* const graph, const int tensor_wrap_size, int* const tensor_wraps_ref) | |||
139 | { | |||
140 | ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = *tensor_wraps_ref ? (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, *tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(*tensor_wraps_ref - 1))) : 0; | |||
141 | // Otherwise, find an open slot. | |||
142 | if (!tensor_wrap_array_ref) | |||
143 | { | |||
144 | if (!graph->tensor_wraps) | |||
145 | graph->tensor_wraps = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wrap_array_t*), 0, 0); | |||
146 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = 0; | |||
147 | ccv_array_push(graph->tensor_wraps, &tensor_wrap_array); | |||
148 | tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, graph->tensor_wraps->rnum - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(graph->tensor_wraps ->rnum - 1))); | |||
149 | *tensor_wraps_ref = graph->tensor_wraps->rnum; | |||
150 | } | |||
151 | int i; | |||
152 | if (*tensor_wrap_array_ref) | |||
153 | { | |||
154 | if ((*tensor_wrap_array_ref)->size != tensor_wrap_size) | |||
155 | *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)ccreallocrealloc(*tensor_wrap_array_ref, sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1)); | |||
156 | for (i = (*tensor_wrap_array_ref)->size; i < tensor_wrap_size; i++) | |||
157 | (*tensor_wrap_array_ref)->tensor_wraps[i] = 0; | |||
158 | } else | |||
159 | *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)cccalloccalloc(sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1), 1); | |||
160 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref; | |||
161 | tensor_wrap_array->size = tensor_wrap_size; | |||
162 | return tensor_wrap_array; | |||
163 | } | |||
164 | ||||
165 | void ccv_nnc_set_tensor_wraps(ccv_nnc_graph_tensor_wrap_t** const tensor_wraps, ccv_nnc_tensor_t* const* const tensors, const int tensor_size) | |||
166 | { | |||
167 | int i; | |||
168 | for (i = 0; i < tensor_size; i++) | |||
169 | if (tensors[i]) | |||
170 | { | |||
171 | if (CCV_IS_TENSOR_MULTIVIEW(tensors[i])((*(int*)(tensors[i])) & CCV_TENSOR_MULTIVIEW) && | |||
172 | ((ccv_nnc_tensor_multiview_t*)tensors[i])->anchor != CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1) | |||
173 | { | |||
174 | if (!tensor_wraps[i] || tensors[i] != tensor_wraps[i]->tensors[0]) | |||
175 | { | |||
176 | if (tensor_wraps[i]) | |||
177 | _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]); | |||
178 | tensor_wraps[i] = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)tensors[i]); | |||
179 | } | |||
180 | } else { | |||
181 | if (tensor_wraps[i]) | |||
182 | _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]); | |||
183 | tensor_wraps[i] = 0; | |||
184 | } | |||
185 | } | |||
186 | } | |||
187 | ||||
188 | void ccv_nnc_graph_register_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d) | |||
189 | { | |||
190 | ccv_nnc_graph_t* p = graph; | |||
191 | const ccv_nnc_graph_tensor_wraps_ref_t tensor_wraps_ref = { | |||
192 | .d = tensor_wraps_ref_d, | |||
193 | .graph = graph, | |||
194 | }; | |||
195 | do { | |||
196 | if (!p->tensor_wraps_refs) | |||
197 | { | |||
198 | p->tensor_wraps_refs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wraps_ref_t), 0, 0); | |||
199 | ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref); | |||
200 | } else { | |||
201 | int i; | |||
202 | int has_tensor_wraps_ref = 0; | |||
203 | for (i = 0; !has_tensor_wraps_ref && i < p->tensor_wraps_refs->rnum; i++) | |||
204 | { | |||
205 | ccv_nnc_graph_tensor_wraps_ref_t* tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t )(p->tensor_wraps_refs)->rsize * (size_t)(i))); | |||
206 | has_tensor_wraps_ref = (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph); | |||
207 | } | |||
208 | if (!has_tensor_wraps_ref) | |||
209 | ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref); | |||
210 | } | |||
211 | p = p->p; | |||
212 | } while (p); | |||
213 | } | |||
214 | ||||
215 | static void _ccv_nnc_graph_redo_tensor_wraps(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph) | |||
216 | { | |||
217 | int i; | |||
218 | const int has_wrap = ccv_nnc_tensors_have_wraps(info->inputs, info->input_size) || | |||
219 | ccv_nnc_tensors_have_wraps(info->outputs, info->output_size) || | |||
220 | ccv_nnc_tensors_have_wraps(info->updates, info->update_size); | |||
221 | if (has_wrap) | |||
222 | { | |||
223 | const int tensor_wrap_size = info->input_size + info->output_size + info->update_size; | |||
224 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = ccv_nnc_get_tensor_wrap_array(graph, tensor_wrap_size, &info->tensor_wraps_ref); | |||
225 | ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps, info->inputs, info->input_size); | |||
226 | const int d = info->input_size; | |||
227 | ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + d, info->outputs, info->output_size); | |||
228 | const int dd = info->input_size + info->output_size; | |||
229 | ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + dd, info->updates, info->update_size); | |||
230 | } else if (info->tensor_wraps_ref) { | |||
231 | ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref - 1))); | |||
232 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref; | |||
233 | if (tensor_wrap_array) | |||
234 | { | |||
235 | for (i = 0; i < tensor_wrap_array->size; i++) | |||
236 | if (tensor_wrap_array->tensor_wraps[i]) | |||
237 | _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[i]); | |||
238 | ccfreefree(tensor_wrap_array); | |||
239 | *tensor_wrap_array_ref = 0; | |||
240 | info->tensor_wraps_ref = 0; | |||
241 | } | |||
242 | } | |||
243 | } | |||
244 | ||||
245 | static void _ccv_nnc_graph_deregister_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d) | |||
246 | { | |||
247 | ccv_nnc_graph_t* p = graph; | |||
248 | do { | |||
249 | int i; | |||
250 | // Remove from the array. | |||
251 | if (p->tensor_wraps_refs) | |||
252 | for (i = 0; i < p->tensor_wraps_refs->rnum; i++) | |||
253 | { | |||
254 | ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t )(p->tensor_wraps_refs)->rsize * (size_t)(i))); | |||
255 | if (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph) | |||
256 | { | |||
257 | --p->tensor_wraps_refs->rnum; | |||
258 | if (i < p->tensor_wraps_refs->rnum) | |||
259 | memcpy(tensor_wraps_ref, tensor_wraps_ref + 1, sizeof(ccv_nnc_graph_exec_t) * (p->tensor_wraps_refs->rnum - i)); | |||
260 | break; | |||
261 | } | |||
262 | } | |||
263 | p = p->p; | |||
264 | } while (p); | |||
265 | } | |||
266 | ||||
267 | void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size) | |||
268 | { | |||
269 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 269, __extension__ __PRETTY_FUNCTION__); })); | |||
270 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 270, __extension__ __PRETTY_FUNCTION__); })); | |||
271 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
272 | assert(input_flag_size <= info->input_size)((void) sizeof ((input_flag_size <= info->input_size) ? 1 : 0), __extension__ ({ if (input_flag_size <= info-> input_size) ; else __assert_fail ("input_flag_size <= info->input_size" , "ccv_nnc_graph.c", 272, __extension__ __PRETTY_FUNCTION__); })); | |||
273 | assert(output_flag_size <= info->output_size)((void) sizeof ((output_flag_size <= info->output_size) ? 1 : 0), __extension__ ({ if (output_flag_size <= info-> output_size) ; else __assert_fail ("output_flag_size <= info->output_size" , "ccv_nnc_graph.c", 273, __extension__ __PRETTY_FUNCTION__); })); | |||
274 | if (info->input_size + info->output_size == 0) | |||
275 | return; | |||
276 | if (!info->input_flags) | |||
277 | { | |||
278 | info->input_flags = (int*)cccalloccalloc(info->input_size + info->output_size, sizeof(int)); | |||
279 | info->output_flags = info->input_flags + info->input_size; | |||
280 | } | |||
281 | if (input_flag_size > 0) | |||
282 | memcpy(info->input_flags, input_flags, sizeof(int) * input_flag_size); | |||
283 | if (output_flag_size > 0) | |||
284 | memcpy(info->output_flags, output_flags, sizeof(int) * output_flag_size); | |||
285 | } | |||
286 | ||||
287 | void ccv_nnc_graph_exec_pair_with(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t pair_exec) | |||
288 | { | |||
289 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 289, __extension__ __PRETTY_FUNCTION__); })); | |||
290 | assert(exec.d >= 0)((void) sizeof ((exec.d >= 0) ? 1 : 0), __extension__ ({ if (exec.d >= 0) ; else __assert_fail ("exec.d >= 0", "ccv_nnc_graph.c" , 290, __extension__ __PRETTY_FUNCTION__); })); | |||
291 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 291, __extension__ __PRETTY_FUNCTION__); })); | |||
292 | assert(pair_exec.graph == graph || pair_exec.graph == graph->pair)((void) sizeof ((pair_exec.graph == graph || pair_exec.graph == graph->pair) ? 1 : 0), __extension__ ({ if (pair_exec.graph == graph || pair_exec.graph == graph->pair) ; else __assert_fail ("pair_exec.graph == graph || pair_exec.graph == graph->pair" , "ccv_nnc_graph.c", 292, __extension__ __PRETTY_FUNCTION__); })); | |||
293 | assert(pair_exec.d >= 0)((void) sizeof ((pair_exec.d >= 0) ? 1 : 0), __extension__ ({ if (pair_exec.d >= 0) ; else __assert_fail ("pair_exec.d >= 0" , "ccv_nnc_graph.c", 293, __extension__ __PRETTY_FUNCTION__); })); | |||
294 | if (pair_exec.graph == graph) | |||
295 | { assert(pair_exec.d < graph->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph->exec_info ->rnum) ; else __assert_fail ("pair_exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 295, __extension__ __PRETTY_FUNCTION__); })); } | |||
296 | else | |||
297 | { assert(pair_exec.d < graph->pair->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->pair->exec_info ->rnum) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph ->pair->exec_info->rnum) ; else __assert_fail ("pair_exec.d < graph->pair->exec_info->rnum" , "ccv_nnc_graph.c", 297, __extension__ __PRETTY_FUNCTION__); })); } | |||
298 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
299 | exec_info->pair_ref = pair_exec.d + 1; | |||
300 | } | |||
301 | ||||
302 | static ccv_nnc_tensor_t* _ccv_nnc_any_tensor_from_tensor_multiview(ccv_nnc_tensor_multiview_t* const mv) | |||
303 | { | |||
304 | ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv; | |||
305 | while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW)) | |||
306 | { | |||
307 | ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor; | |||
308 | const int count = 0; | |||
309 | const int off = mv->kind; | |||
310 | const int mod = mv->repeat; | |||
311 | // If reached the root. | |||
312 | tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[count >= off ? ((count - off) % mod) + off : count]; // Unwrap. | |||
313 | } | |||
314 | return tensor; | |||
315 | } | |||
316 | ||||
317 | void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
318 | { | |||
319 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 319, __extension__ __PRETTY_FUNCTION__); })); | |||
320 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 320, __extension__ __PRETTY_FUNCTION__); })); | |||
321 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
322 | // De-register from the graph if it contains multiview tensors. | |||
323 | if (info->tensor_wraps_ref) | |||
324 | _ccv_nnc_graph_deregister_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
325 | // In case it is already executed, rewind. | |||
326 | _ccv_nnc_graph_exec_rewind(info, graph); | |||
327 | if (input_size == 0 && output_size == 0) | |||
328 | { | |||
329 | if (info->input_size > 0 || info->output_size > 0) | |||
330 | ccfreefree(info->inputs); | |||
331 | info->inputs = 0; | |||
332 | info->outputs = 0; | |||
333 | info->input_size = 0; | |||
334 | info->output_size = 0; | |||
335 | _ccv_nnc_graph_redo_tensor_wraps(info, graph); | |||
336 | if (info->tensor_wraps_ref) | |||
337 | ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
338 | return; | |||
339 | } | |||
340 | if (info->inputs) | |||
341 | info->inputs = (ccv_nnc_tensor_t**)ccreallocrealloc(info->inputs, sizeof(ccv_nnc_tensor_t*) * (input_size + output_size)); | |||
342 | else | |||
343 | info->inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size)); | |||
344 | info->outputs = info->inputs + input_size; | |||
345 | if (inputs) | |||
346 | memcpy(info->inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size); | |||
347 | if (outputs) | |||
348 | memcpy(info->outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size); | |||
349 | int i; | |||
350 | int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0; | |||
351 | for (i = 0; i < input_size + output_size; i++) | |||
352 | if (info->inputs[i]) | |||
353 | { | |||
354 | ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i])((*(int*)(info->inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i]; | |||
355 | tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype)((tensor->info.datatype) & 0xFF000); | |||
356 | } | |||
357 | info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes); | |||
358 | info->input_size = input_size; | |||
359 | info->output_size = output_size; | |||
360 | _ccv_nnc_graph_redo_tensor_wraps(info, graph); | |||
361 | // Register again if the tensor wraps exist. | |||
362 | if (info->tensor_wraps_ref) | |||
363 | ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
364 | // Free flags. | |||
365 | if (info->input_flags) | |||
366 | { | |||
367 | ccfreefree(info->input_flags); | |||
368 | info->input_flags = info->output_flags = 0; | |||
369 | } | |||
370 | } | |||
371 | ||||
372 | void ccv_nnc_graph_exec_add_as_affected(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update) | |||
373 | { | |||
374 | assert(CCV_IS_TENSOR_MULTIVIEW(update))((void) sizeof ((((*(int*)(update)) & CCV_TENSOR_MULTIVIEW )) ? 1 : 0), __extension__ ({ if (((*(int*)(update)) & CCV_TENSOR_MULTIVIEW )) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(update)", "ccv_nnc_graph.c" , 374, __extension__ __PRETTY_FUNCTION__); })); | |||
375 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 375, __extension__ __PRETTY_FUNCTION__); })); | |||
376 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 376, __extension__ __PRETTY_FUNCTION__); })); | |||
377 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
378 | const int register_tensor_wraps = !info->tensor_wraps_ref; | |||
379 | const int update_index = info->update_size; | |||
380 | ++info->update_size; | |||
381 | if (info->updates) | |||
382 | info->updates = (ccv_nnc_tensor_t**)ccreallocrealloc(info->updates, sizeof(ccv_nnc_tensor_t*) * info->update_size); | |||
383 | else | |||
384 | info->updates = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * info->update_size); | |||
385 | info->updates[update_index] = update; | |||
386 | _ccv_nnc_graph_redo_tensor_wraps(info, graph); | |||
387 | if (register_tensor_wraps) | |||
388 | ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
389 | } | |||
390 | ||||
391 | ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
392 | { | |||
393 | int d = graph->exec_info->rnum; | |||
394 | ccv_nnc_graph_exec_info_t info = { | |||
395 | .cmd = cmd, | |||
396 | .hint = hint, | |||
397 | .input_size = input_size, | |||
398 | .output_size = output_size, | |||
399 | }; | |||
400 | assert(inputs || input_size == 0)((void) sizeof ((inputs || input_size == 0) ? 1 : 0), __extension__ ({ if (inputs || input_size == 0) ; else __assert_fail ("inputs || input_size == 0" , "ccv_nnc_graph.c", 400, __extension__ __PRETTY_FUNCTION__); })); | |||
401 | assert(outputs || output_size == 0)((void) sizeof ((outputs || output_size == 0) ? 1 : 0), __extension__ ({ if (outputs || output_size == 0) ; else __assert_fail ("outputs || output_size == 0" , "ccv_nnc_graph.c", 401, __extension__ __PRETTY_FUNCTION__); })); | |||
402 | if (input_size > 0 || output_size > 0) | |||
403 | { | |||
404 | info.inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size)); | |||
405 | info.outputs = info.inputs + input_size; | |||
406 | if (inputs) | |||
407 | memcpy(info.inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size); | |||
408 | if (outputs) | |||
409 | memcpy(info.outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size); | |||
410 | info.input_size = input_size; | |||
411 | info.output_size = output_size; | |||
412 | int i; | |||
413 | int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0; | |||
414 | for (i = 0; i < input_size + output_size; i++) | |||
415 | if (info.inputs[i]) | |||
416 | { | |||
417 | ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i])((*(int*)(info.inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i]; | |||
418 | tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype)((tensor->info.datatype) & 0xFF000); | |||
419 | } | |||
420 | info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes); | |||
421 | } | |||
422 | _ccv_nnc_graph_redo_tensor_wraps(&info, graph); | |||
423 | // Add itself to the graph's wraps array, this will help the run time when we run the graph and do unwrapping. | |||
424 | if (info.tensor_wraps_ref) | |||
425 | ccv_nnc_graph_register_tensor_wraps(graph, info.tensor_wraps_ref - 1); | |||
426 | ccv_array_push(graph->exec_info, &info); | |||
427 | return (ccv_nnc_graph_exec_t){ | |||
428 | .d = d, | |||
429 | .graph = graph, | |||
430 | }; | |||
431 | } | |||
432 | ||||
433 | void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to) | |||
434 | { | |||
435 | ccv_nnc_graph_tensor_carry_over_t carry_over = { | |||
436 | .from = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)from), | |||
437 | .to = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)to) | |||
438 | }; | |||
439 | if (!graph->carry_overs) | |||
440 | graph->carry_overs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_carry_over_t), 0, 0); | |||
441 | ccv_array_push(graph->carry_overs, &carry_over); | |||
442 | } | |||
443 | ||||
444 | int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination) | |||
445 | { | |||
446 | assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__ ({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph" , "ccv_nnc_graph.c", 446, __extension__ __PRETTY_FUNCTION__); })); | |||
447 | assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__ ({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph" , "ccv_nnc_graph.c", 447, __extension__ __PRETTY_FUNCTION__); })); | |||
448 | assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (source.d < graph->exec_info ->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 448, __extension__ __PRETTY_FUNCTION__); })); | |||
449 | assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (destination.d < graph-> exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 449, __extension__ __PRETTY_FUNCTION__); })); | |||
450 | ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(source.d))); | |||
451 | if (src_info->outgoings == 0) | |||
452 | src_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0); | |||
453 | else { | |||
454 | int i; | |||
455 | // Check if this is already connected, if so, skip. | |||
456 | for (i = 0; i < src_info->outgoings->rnum; i++) | |||
457 | if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(i))) == destination.d) | |||
458 | return -1; | |||
459 | } | |||
460 | ccv_array_push(src_info->outgoings, &destination.d); | |||
461 | graph->topsorted = 0; | |||
462 | return 0; | |||
463 | } | |||
464 | ||||
465 | int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination) | |||
466 | { | |||
467 | assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__ ({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph" , "ccv_nnc_graph.c", 467, __extension__ __PRETTY_FUNCTION__); })); | |||
468 | assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__ ({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph" , "ccv_nnc_graph.c", 468, __extension__ __PRETTY_FUNCTION__); })); | |||
469 | assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (source.d < graph->exec_info ->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 469, __extension__ __PRETTY_FUNCTION__); })); | |||
470 | assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (destination.d < graph-> exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 470, __extension__ __PRETTY_FUNCTION__); })); | |||
471 | ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(source.d))); | |||
472 | if (!src_info->outgoings) | |||
473 | return -1; | |||
474 | int i; | |||
475 | // Check if this is already connected, if so, skip. | |||
476 | for (i = 0; i < src_info->outgoings->rnum; i++) | |||
477 | if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(i))) == destination.d) | |||
478 | { | |||
479 | if (i < src_info->outgoings->rnum - 1) | |||
480 | *(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(i))) = *(int*)ccv_array_get(src_info->outgoings, src_info->outgoings->rnum - 1)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(src_info->outgoings ->rnum - 1))); | |||
481 | --src_info->outgoings->rnum; | |||
482 | graph->topsorted = 0; | |||
483 | return 0; | |||
484 | } | |||
485 | return -1; | |||
486 | } | |||
487 | ||||
488 | int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph) | |||
489 | { | |||
490 | return graph->exec_info ? graph->exec_info->rnum : 0; | |||
491 | } | |||
492 | ||||
493 | void* ccv_nnc_graph_buffer(ccv_nnc_graph_t* const graph, int size) | |||
494 | { | |||
495 | if (graph->buffer_size >= size) | |||
496 | return graph->buffer; | |||
497 | graph->buffer_size = size; | |||
498 | graph->buffer = (graph->buffer) ? ccreallocrealloc(graph->buffer, size) : ccmallocmalloc(size); | |||
499 | return graph->buffer; | |||
500 | } | |||
501 | ||||
502 | void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size) | |||
503 | { | |||
504 | if (exec_cvt_size == 0 && graph->exec_info->rnum == 0) | |||
505 | { | |||
506 | graph->topsorted = 1; | |||
507 | return; | |||
508 | } | |||
509 | assert(exec_cvt_size == graph->exec_info->rnum)((void) sizeof ((exec_cvt_size == graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (exec_cvt_size == graph->exec_info ->rnum) ; else __assert_fail ("exec_cvt_size == graph->exec_info->rnum" , "ccv_nnc_graph.c", 509, __extension__ __PRETTY_FUNCTION__); })); | |||
510 | assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources ->rnum) ? 1 : 0), __extension__ ({ if (graph->sources && graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum" , "ccv_nnc_graph.c", 510, __extension__ __PRETTY_FUNCTION__); })); | |||
511 | assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph-> destinations->rnum) ? 1 : 0), __extension__ ({ if (graph-> destinations && graph->destinations->rnum) ; else __assert_fail ("graph->destinations && graph->destinations->rnum" , "ccv_nnc_graph.c", 511, __extension__ __PRETTY_FUNCTION__); })); | |||
512 | int i, j; | |||
513 | for (i = 0; i < exec_cvt_size; i++) | |||
514 | exec_cvt[i] = -1; | |||
515 | ccv_array_t* exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), graph->exec_info->rnum, 0); | |||
516 | // If there are breakpoints, it is more complicated, we first start to the breakpoints, and then continue from the breakpoints to the destinations. | |||
517 | if (graph->breakpoint_size) | |||
518 | { | |||
519 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, graph->breakpoints, graph->breakpoint_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->sources)->data)) + (size_t)(graph-> sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)((( char*)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_ [2] = { (graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void ) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ( { if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources )->data)) + (size_t)(graph->sources)->rsize * (size_t )(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_ ; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_ [d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph-> exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph ->breakpoint_size); _i_++) { ((void) sizeof (((graph->breakpoints )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph-> breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].r = 5; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { (( void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph ->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void *)(((char*)((graph->exec_info)->data)) + (size_t)(graph ->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings-> rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->breakpoint_size)) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_ ++) { ((void) sizeof (((graph->breakpoints)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_] .graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(graph->breakpoints)[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph-> breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_ [(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph.c" , 519, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_ [(graph->breakpoints)[_i_].d].c > 0) continue; _visit_-> node[_visit_->size].index = (((graph->breakpoints)[_i_] .d)); _visit_->node[_visit_->size].term = ((_incomings_ [(graph->breakpoints)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->size <= (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph-> exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
520 | for (i = 0; i < graph->breakpoint_size; i++) | |||
521 | exec_cvt[graph->breakpoints[i].d] = -2; // Mark this as breakpoints, so we will skip the first round. | |||
522 | ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))) const node __attribute__ ((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(( graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0)))) + idx; { | |||
523 | assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ ( { if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref" , "ccv_nnc_graph.c", 523, __extension__ __PRETTY_FUNCTION__); })); // If node has a pair ref, we cannot fix it up. | |||
524 | if (exec_cvt[idx] == -2) // Skip breakpoint. | |||
525 | continue; | |||
526 | // Loop over node and push to the array. | |||
527 | ccv_array_push(exec_info, node); | |||
528 | // Go to its sub-graph to fix exec_idx | |||
529 | for (i = 0; i < node->graph_ref_size; i++) | |||
530 | { | |||
531 | const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node )->_inline_graph_ref)[i] - 1; | |||
532 | if (graph_ref >= 0) | |||
533 | { | |||
534 | ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(graph_ref))); | |||
535 | sub_graph->exec_idx = exec_info->rnum; | |||
536 | } | |||
537 | } | |||
538 | exec_cvt[idx] = exec_info->rnum - 1; | |||
539 | } ccv_nnc_graph_visit_endfor} } | |||
540 | ccv_nnc_graph_visit_free(visit); | |||
541 | graph->breakpoint_offset = exec_info->rnum; | |||
542 | visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size ); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints )[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].r = 1; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } int _exist_size_ [2] = { (graph->breakpoint_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].r = 3; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_ ].r != 3) continue; _incomings_[_idx_].r = 4; if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> destinations)->data)) + (size_t)(graph->destinations)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r = 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations )->data)) + (size_t)(graph->destinations)->rsize * ( size_t)(0))))[_i_].d; } _exist_size_[0] = (graph->destinations ->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> destinations)->data)) + (size_t)(graph->destinations)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void) sizeof (( (graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c" , 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_ ] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->destinations->rnum)) { _exists_[_p_][_i_] = d; continue ; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((( (ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph->destinations->rnum)) { ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (graph->destinations->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void *)(((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r == 7) continue; if (!( 0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)(( void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d)) ; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_) ; } while (0);; ((void) sizeof ((_visit_->size <= (graph ->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (graph->exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 542, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
543 | ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))) const node __attribute__ ((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(( graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0)))) + idx; { | |||
544 | assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ ( { if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref" , "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__); })); // If node has a pair ref, we cannot fix it up. | |||
545 | // Loop over node and push to the array. | |||
546 | ccv_array_push(exec_info, node); | |||
547 | // Go to its sub-graph to fix exec_idx | |||
548 | for (i = 0; i < node->graph_ref_size; i++) | |||
549 | { | |||
550 | const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node )->_inline_graph_ref)[i] - 1; | |||
551 | if (graph_ref >= 0) | |||
552 | { | |||
553 | ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(graph_ref))); | |||
554 | sub_graph->exec_idx = exec_info->rnum; | |||
555 | } | |||
556 | } | |||
557 | exec_cvt[idx] = exec_info->rnum - 1; | |||
558 | } ccv_nnc_graph_visit_endfor} } | |||
559 | ccv_nnc_graph_visit_free(visit); | |||
560 | for (i = 0; i < graph->breakpoint_size; i++) | |||
561 | { assert(exec_cvt[graph->breakpoints[i].d] >= 0)((void) sizeof ((exec_cvt[graph->breakpoints[i].d] >= 0 ) ? 1 : 0), __extension__ ({ if (exec_cvt[graph->breakpoints [i].d] >= 0) ; else __assert_fail ("exec_cvt[graph->breakpoints[i].d] >= 0" , "ccv_nnc_graph.c", 561, __extension__ __PRETTY_FUNCTION__); })); } // All breakpoints should be assigned. | |||
562 | } else { | |||
563 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->sources)->data)) + (size_t)(graph-> sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)((( char*)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_ [2] = { (graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void ) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ( { if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources )->data)) + (size_t)(graph->sources)->rsize * (size_t )(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_ ; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_ [d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph-> exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph ->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r = 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations )->data)) + (size_t)(graph->destinations)->rsize * ( size_t)(0))))[_i_].d; } _exist_size_[0] = (graph->destinations ->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> destinations)->data)) + (size_t)(graph->destinations)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void) sizeof ( (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources )->data)) + (size_t)(graph->sources)->rsize * (size_t )(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (( (ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)-> data)) + (size_t)(graph->sources)->rsize * (size_t)(0)) ))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->destinations->rnum)) { _exists_[_p_][_i_] = d; continue ; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((( (ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph->destinations->rnum)) { ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (graph->destinations->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void *)(((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r == 7) continue; if (!( 0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)(( void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d)) ; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_) ; } while (0);; ((void) sizeof ((_visit_->size <= (graph ->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (graph->exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 563, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
564 | ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))) const node __attribute__ ((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(( graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0)))) + idx; { | |||
565 | assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ ( { if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref" , "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__); })); // If node has a pair ref, we cannot fix it up. | |||
566 | // Loop over node and push to the array. | |||
567 | ccv_array_push(exec_info, node); | |||
568 | // Go to its sub-graph to fix exec_idx | |||
569 | for (i = 0; i < node->graph_ref_size; i++) | |||
570 | { | |||
571 | const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node )->_inline_graph_ref)[i] - 1; | |||
572 | if (graph_ref >= 0) | |||
573 | { | |||
574 | ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(graph_ref))); | |||
575 | sub_graph->exec_idx = exec_info->rnum; | |||
576 | } | |||
577 | } | |||
578 | exec_cvt[idx] = exec_info->rnum - 1; | |||
579 | } ccv_nnc_graph_visit_endfor} } | |||
580 | ccv_nnc_graph_visit_free(visit); | |||
581 | } | |||
582 | assert(graph->exec_info->rnum == exec_info->rnum)((void) sizeof ((graph->exec_info->rnum == exec_info-> rnum) ? 1 : 0), __extension__ ({ if (graph->exec_info-> rnum == exec_info->rnum) ; else __assert_fail ("graph->exec_info->rnum == exec_info->rnum" , "ccv_nnc_graph.c", 582, __extension__ __PRETTY_FUNCTION__); })); | |||
583 | ccv_array_free(graph->exec_info); | |||
584 | graph->exec_info = exec_info; | |||
585 | for (i = 0; i < graph->sources->rnum; i++) | |||
586 | { | |||
587 | ccv_nnc_graph_exec_t* const source = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, i)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(i))); | |||
588 | source->d = exec_cvt[source->d]; | |||
589 | } | |||
590 | for (i = 0; i < graph->destinations->rnum; i++) | |||
591 | { | |||
592 | ccv_nnc_graph_exec_t* const destination = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(i))); | |||
593 | destination->d = exec_cvt[destination->d]; | |||
594 | } | |||
595 | // Update all outgoings to reflect the latest. | |||
596 | for (i = 0; i < exec_info->rnum; i++) | |||
597 | { | |||
598 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(exec_info, i)((void*)(((char*)((exec_info)->data)) + (size_t)(exec_info )->rsize * (size_t)(i))); | |||
599 | if (info->outgoings) | |||
600 | for (j = 0; j < info->outgoings->rnum; j++) | |||
601 | *(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)( info->outgoings)->rsize * (size_t)(j))) = exec_cvt[*(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)( info->outgoings)->rsize * (size_t)(j)))]; | |||
602 | } | |||
603 | graph->topsorted = 1; | |||
604 | } | |||
605 | ||||
606 | typedef struct { | |||
607 | int device_id; | |||
608 | int exec_idx; | |||
609 | ccv_array_t* signal_set; | |||
610 | ccv_array_t* command_set; // The set of command executed in this stream. In case there is a tie (on rank). We will check this. | |||
611 | } ccv_nnc_stream_data_t; | |||
612 | ||||
613 | static void _ccv_nnc_graph_schedule_assign_signals(ccv_array_t* const incoming, ccv_nnc_graph_exec_schedule_t* const node, ccv_array_t* const stream_data, int* const signal_size, ccv_nnc_graph_exec_schedule_t* const exec_info, const int exec_info_size) | |||
614 | { | |||
615 | assert(incoming->rnum > 0)((void) sizeof ((incoming->rnum > 0) ? 1 : 0), __extension__ ({ if (incoming->rnum > 0) ; else __assert_fail ("incoming->rnum > 0" , "ccv_nnc_graph.c", 615, __extension__ __PRETTY_FUNCTION__); })); | |||
616 | int i, j, k; | |||
617 | int wait_size = 0, max_wait_size = 0; | |||
618 | for (i = 0; i < incoming->rnum; i++) | |||
619 | { | |||
620 | const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)-> rsize * (size_t)(i))); | |||
621 | ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx; | |||
622 | assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->stream_size > 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0" , "ccv_nnc_graph.c", 622, __extension__ __PRETTY_FUNCTION__); })); | |||
623 | max_wait_size += incoming_exec_info->stream_size; | |||
624 | } | |||
625 | int waits[ccv_max(1, max_wait_size)({ typeof (1) _a = (1); typeof (max_wait_size) _b = (max_wait_size ); (_a > _b) ? _a : _b; })]; | |||
626 | assert(node->stream_size > 0)((void) sizeof ((node->stream_size > 0) ? 1 : 0), __extension__ ({ if (node->stream_size > 0) ; else __assert_fail ("node->stream_size > 0" , "ccv_nnc_graph.c", 626, __extension__ __PRETTY_FUNCTION__); })); | |||
627 | for (i = 0; i < incoming->rnum; i++) | |||
628 | { | |||
629 | const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)-> rsize * (size_t)(i))); | |||
630 | assert(incoming_idx < exec_info_size)((void) sizeof ((incoming_idx < exec_info_size) ? 1 : 0), __extension__ ({ if (incoming_idx < exec_info_size) ; else __assert_fail ("incoming_idx < exec_info_size", "ccv_nnc_graph.c", 630, __extension__ __PRETTY_FUNCTION__); })); | |||
631 | assert(incoming_idx >= 0)((void) sizeof ((incoming_idx >= 0) ? 1 : 0), __extension__ ({ if (incoming_idx >= 0) ; else __assert_fail ("incoming_idx >= 0" , "ccv_nnc_graph.c", 631, __extension__ __PRETTY_FUNCTION__); })); | |||
632 | ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx; | |||
633 | assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->stream_size > 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0" , "ccv_nnc_graph.c", 633, __extension__ __PRETTY_FUNCTION__); })); | |||
634 | int stream_synced = 1; | |||
635 | // If the current node's stream is a subset of the incoming node's stream, there | |||
636 | // is no need to sync with signal, because we are already synced with the incoming. | |||
637 | for (j = 0; stream_synced && j < node->stream_size; j++) | |||
638 | { | |||
639 | const int s = SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node )._heap_streams)[j]; | |||
640 | assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >= 0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 640 , __extension__ __PRETTY_FUNCTION__); })); | |||
641 | int flag = 0; | |||
642 | for (k = 0; !flag && k < incoming_exec_info->stream_size; k++) | |||
643 | flag = (SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_streams : (*incoming_exec_info)._heap_streams)[k] == s); | |||
644 | stream_synced = flag; | |||
645 | } | |||
646 | if (stream_synced) | |||
647 | continue; | |||
648 | // Otherwise, find the streams we need to sync with, and create signals for these. | |||
649 | for (j = 0; j < incoming_exec_info->stream_size; j++) | |||
650 | { | |||
651 | const int s = SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_streams : (*incoming_exec_info)._heap_streams)[j]; | |||
652 | assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >= 0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 652 , __extension__ __PRETTY_FUNCTION__); })); | |||
653 | int flag = 0; | |||
654 | for (k = 0; !flag && k < node->stream_size; k++) | |||
655 | flag = (SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node )._heap_streams)[k] == s); | |||
656 | if (!flag) // Need to have a signal. | |||
657 | { | |||
658 | if (SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j] < 0) | |||
659 | SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j] = (*signal_size)++; | |||
660 | else { | |||
661 | int flag = 0; | |||
662 | // If any of the stream the current node has already seen this signal, we are good already. | |||
663 | for (k = 0; !flag && k < node->stream_size; k++) | |||
664 | { | |||
665 | assert(SCHEDULE_STREAMS(*node)[k] >= 0)((void) sizeof ((((*node).stream_size <= 1 ? (*node)._inline_streams : (*node)._heap_streams)[k] >= 0) ? 1 : 0), __extension__ ({ if (((*node).stream_size <= 1 ? (*node)._inline_streams : (*node)._heap_streams)[k] >= 0) ; else __assert_fail ("SCHEDULE_STREAMS(*node)[k] >= 0" , "ccv_nnc_graph.c", 665, __extension__ __PRETTY_FUNCTION__); })); | |||
666 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(((*node).stream_size <= 1 ? (*node) ._inline_streams : (*node)._heap_streams)[k]))); | |||
667 | flag = (data->signal_set && ccv_array_find_int(data->signal_set, SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j])); | |||
668 | } | |||
669 | if (flag) | |||
670 | continue; | |||
671 | } | |||
672 | // Otherwise, we need to wait for this. Currently, our granularity is about wait on all streams. | |||
673 | waits[wait_size++] = SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j]; | |||
674 | // All streams on this node have seen this signal. | |||
675 | for (k = 0; k < node->stream_size; k++) | |||
676 | { | |||
677 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(((*node).stream_size <= 1 ? (*node) ._inline_streams : (*node)._heap_streams)[k]))); | |||
678 | if (!data->signal_set) | |||
679 | data->signal_set = ccv_array_new(sizeof(int), 0, 0); | |||
680 | ccv_array_push(data->signal_set, &SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j]); | |||
681 | } | |||
682 | } | |||
683 | } | |||
684 | } | |||
685 | node->wait_size = wait_size; | |||
686 | if (wait_size > 0) | |||
687 | { | |||
688 | node->waits = node->waits ? ccreallocrealloc(node->waits, sizeof(int) * wait_size) : ccmallocmalloc(sizeof(int) * wait_size); | |||
689 | memcpy(node->waits, waits, sizeof(int) * wait_size); | |||
690 | } | |||
691 | } | |||
692 | ||||
693 | typedef struct { | |||
694 | int rank; | |||
695 | ccv_array_t* outgoings; | |||
696 | } ccv_nnc_incoming_t; | |||
697 | ||||
698 | static int _ccv_nnc_device_ids_for_stream_data(ccv_nnc_graph_exec_info_t* const node, const int device_id, ccv_array_t* const stream_data, int* const device_ids, const int max_device_id_size) | |||
699 | { | |||
700 | // TODO: I need to re-think whether this is GPU only or not. | |||
701 | int device_id_size = ccv_nnc_device_ids_for_io(node->inputs, node->input_size, node->outputs, node->output_size, CCV_TENSOR_GPU_MEMORY, device_ids, max_device_id_size); | |||
702 | if (device_id_size == 0) | |||
703 | { | |||
704 | // If there is a default data, use that device id. Otherwise, use the device id passed in (this will be the default data device id). | |||
705 | if (stream_data->rnum > 0) | |||
706 | { | |||
707 | ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(0))); | |||
708 | device_ids[0] = default_data->device_id; | |||
709 | } else | |||
710 | device_ids[0] = device_id >= 0 ? device_id : 0; | |||
711 | device_id_size = 1; | |||
712 | } | |||
713 | return device_id_size; | |||
714 | } | |||
715 | ||||
716 | void ccv_nnc_graph_static_schedule_free(ccv_nnc_graph_static_schedule_t* const schedule) | |||
717 | { | |||
718 | int i; | |||
719 | ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info; | |||
720 | for (i = 0; i < schedule->exec_info_size; i++) | |||
721 | { | |||
722 | if (schd_info[i].stream_size > 1) | |||
723 | ccfreefree(schd_info[i]._heap_streams); | |||
724 | if (schd_info[i].waits) | |||
725 | ccfreefree(schd_info[i].waits); | |||
726 | } | |||
727 | if (schedule->stream_1s) | |||
728 | ccfreefree(schedule->stream_1s); | |||
729 | if (schedule->waits) | |||
730 | ccfreefree(schedule->waits); | |||
731 | if (schedule->psort) | |||
732 | ccfreefree(schedule->psort); | |||
733 | if (schedule->begin) | |||
734 | ccv_nnc_stream_signal_free(schedule->begin); | |||
735 | if (schedule->end) | |||
736 | ccv_nnc_stream_signal_free(schedule->end); | |||
737 | ccfreefree(schedule); | |||
738 | } | |||
739 | ||||
740 | static ccv_nnc_graph_static_schedule_t* _ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int device_id, const int max_stream_count, ccv_nnc_stream_context_t* const stream_context, const ccv_nnc_graph_exec_t* const _sources, const int _source_size, const ccv_nnc_graph_exec_t* const _destinations, const int _destination_size) | |||
741 | { | |||
742 | assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources ->rnum) ? 1 : 0), __extension__ ({ if (graph->sources && graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum" , "ccv_nnc_graph.c", 742, __extension__ __PRETTY_FUNCTION__); })); | |||
| ||||
743 | assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph-> destinations->rnum) ? 1 : 0), __extension__ ({ if (graph-> destinations && graph->destinations->rnum) ; else __assert_fail ("graph->destinations && graph->destinations->rnum" , "ccv_nnc_graph.c", 743, __extension__ __PRETTY_FUNCTION__); })); | |||
744 | assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__ ({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted" , "ccv_nnc_graph.c", 744, __extension__ __PRETTY_FUNCTION__); })); // Only support this on a topsorted graph. | |||
745 | const int exec_info_size = graph->exec_info->rnum; | |||
746 | assert(exec_info_size > 0)((void) sizeof ((exec_info_size > 0) ? 1 : 0), __extension__ ({ if (exec_info_size > 0) ; else __assert_fail ("exec_info_size > 0" , "ccv_nnc_graph.c", 746, __extension__ __PRETTY_FUNCTION__); })); | |||
747 | const ccv_nnc_graph_exec_t* const sources = _sources == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(0))) : _sources; | |||
748 | const int source_size = _sources
| |||
749 | if (!_sources
| |||
750 | { assert(_source_size == 0)((void) sizeof ((_source_size == 0) ? 1 : 0), __extension__ ( { if (_source_size == 0) ; else __assert_fail ("_source_size == 0" , "ccv_nnc_graph.c", 750, __extension__ __PRETTY_FUNCTION__); })); } | |||
751 | const ccv_nnc_graph_exec_t* const destinations = _destinations == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))) : _destinations; | |||
752 | const int destination_size = _destinations
| |||
753 | if (!_destinations
| |||
754 | { assert(_destination_size == 0)((void) sizeof ((_destination_size == 0) ? 1 : 0), __extension__ ({ if (_destination_size == 0) ; else __assert_fail ("_destination_size == 0" , "ccv_nnc_graph.c", 754, __extension__ __PRETTY_FUNCTION__); })); } | |||
755 | const int root_schedule = (_sources
| |||
756 | ccv_nnc_graph_static_schedule_t* const schedule = cccalloccalloc(1, sizeof(ccv_nnc_graph_static_schedule_t) + sizeof(ccv_nnc_graph_exec_schedule_t) * (exec_info_size - 1)); | |||
757 | schedule->exec_info_size = exec_info_size; | |||
758 | ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info; | |||
759 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))); | |||
760 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ( (sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0 , }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_ ]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_ [_idx_].r != 1) continue; _incomings_[_idx_].r = 2; if ((exec_info )[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_] .outgoings->rnum; _j_++) { const int d = *(int*)((void*)(( (char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t)( (exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); ++ _incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_ [d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 760 , __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].r = 5; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_ [0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)", "ccv_nnc_graph.c" , 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size ); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0 ; _i_ < (source_size); _i_++) { ((void) sizeof (((sources) [_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1 ; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings ) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size) ) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void ) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 760 , __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(destinations )[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_ [(destinations)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0", "ccv_nnc_graph.c" , 760, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_ [(destinations)[_i_].d].c > 0) continue; _visit_->node[ _visit_->size].index = (((destinations)[_i_].d)); _visit_-> node[_visit_->size].term = ((_incomings_[(destinations)[_i_ ].d].d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_ ); } while (0);; ((void) sizeof ((_visit_->size <= (exec_info_size )) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size )) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
761 | if (!root_schedule
| |||
762 | { | |||
763 | // If this is not a root schedule, we need to do partial topsort. | |||
764 | int psort_size = 0; | |||
765 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
766 | ++psort_size; | |||
767 | } ccv_nnc_graph_visit_endfor} } | |||
768 | schedule->psort = (int*)ccmallocmalloc(sizeof(int) * psort_size); | |||
769 | schedule->psort_size = psort_size; | |||
770 | psort_size = 0; | |||
771 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
772 | schedule->psort[psort_size++] = idx; | |||
773 | } ccv_nnc_graph_visit_endfor} } | |||
774 | } | |||
775 | int i, j, k; | |||
776 | // Generate exec dependencies (or, in other words, partial ordering of executions). | |||
777 | ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(exec_info_size, exec_info_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0); | |||
778 | int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_info_size * 2); | |||
779 | int buf_size; | |||
780 | #define for_block(x, val) \ | |||
781 | do { \ | |||
782 | if (((int32_t*)val)[0] > 0) \ | |||
783 | { \ | |||
784 | buf[buf_size * 2] = x; \ | |||
785 | buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \ | |||
786 | ++buf_size; \ | |||
787 | } \ | |||
788 | } while (0) | |||
789 | for (i = 0; i < exec_info_size; i++) | |||
790 | schd_info[i].stream_size = -1; | |||
791 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int term __attribute__((unused)) = (visit)->node[_i_ ].term; typeof ((exec_info)) const node __attribute__((unused )) = (exec_info) + idx; { | |||
792 | buf_size = 0; /* save all its parent deps to this buffer */ | |||
793 | ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx); | |||
794 | schd_info[idx].stream_size = 0; | |||
795 | if (vector) | |||
796 | CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S : { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F: { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S: { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F: { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.f64 + (0))); } } } while (0); break; } default: { do { int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep )->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR ) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block ((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size [(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep )->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector )->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.u8 + (0))); } } } while (0); } } } while (0); | |||
797 | if (!node->outgoings) | |||
798 | continue; | |||
799 | for (i = 0; i < node->outgoings->rnum; i++) | |||
800 | { | |||
801 | int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
802 | const int32_t one = 1; | |||
803 | ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx); | |||
804 | /* If not found, set, if the current node is the destination node, no need | |||
805 | * set itself as parent of subsequent nodes because its terminal nature. */ | |||
806 | if (!term && (!cell.i32 || cell.i32[0] == 0)) | |||
807 | ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one); | |||
808 | for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */ | |||
809 | { | |||
810 | ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]); | |||
811 | /* If not found, set */ | |||
812 | if (!cell.i32 || cell.i32[0] == 0) | |||
813 | ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]); | |||
814 | else { | |||
815 | /* Otherwise, set to the longest one */ | |||
816 | int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2 + 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; }); | |||
817 | ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep); | |||
818 | } | |||
819 | } | |||
820 | } | |||
821 | } ccv_nnc_graph_visit_endfor} } | |||
822 | #undef for_block | |||
823 | ccfreefree(buf); | |||
824 | // Algorithm to allocate signals and streams for this graph. | |||
825 | ccv_array_t* const stream_data = ccv_array_new(sizeof(ccv_nnc_stream_data_t), 0, 0); | |||
826 | ccv_array_t** const outgoings = cccalloccalloc(exec_info_size, sizeof(ccv_array_t*)); | |||
827 | ccv_nnc_incoming_t* const incomings = cccalloccalloc(exec_info_size, sizeof(ccv_nnc_incoming_t)); | |||
828 | int max_device_id_size = 1; | |||
829 | // Filter out outgoing nodes that we will be able to access it afterwards anyway. | |||
830 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
831 | max_device_id_size = ccv_max(node->input_size + node->output_size, max_device_id_size)({ typeof (node->input_size + node->output_size) _a = ( node->input_size + node->output_size); typeof (max_device_id_size ) _b = (max_device_id_size); (_a > _b) ? _a : _b; }); | |||
832 | if (node->outgoings
| |||
833 | { | |||
834 | outgoings[idx] = ccv_array_new(sizeof(int), 0, 0); | |||
835 | for (i = 0; i
| |||
836 | { | |||
837 | const int di = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
838 | // Skip if we haven't accessed this exec. | |||
839 | if (schd_info[di].stream_size < 0) | |||
840 | continue; | |||
841 | int flag = 0; | |||
842 | for (j = 0; !flag && j < node->outgoings->rnum; j++) | |||
843 | { | |||
844 | if (j != i) | |||
845 | { | |||
846 | const int dj = *(int*)ccv_array_get(node->outgoings, j)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(j))); | |||
847 | ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, di, dj); | |||
848 | flag = (cell.i32 && cell.i32[0]); | |||
849 | } | |||
850 | } | |||
851 | if (!flag) | |||
852 | { | |||
853 | ccv_array_push(outgoings[idx], &di); | |||
854 | if (!incomings[di].outgoings) | |||
855 | incomings[di].outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
856 | ccv_array_push(incomings[di].outgoings, &idx); | |||
857 | } | |||
858 | } | |||
859 | } | |||
860 | } ccv_nnc_graph_visit_endfor} } | |||
861 | #define visitor(node, idx, _) \ | |||
862 | if (node->outgoings) \ | |||
863 | for (i = 0; i < node->outgoings->rnum; i++) \ | |||
864 | { \ | |||
865 | const int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); \ | |||
866 | node->rank = ccv_max(incomings[d].rank + 1, node->rank)({ typeof (incomings[d].rank + 1) _a = (incomings[d].rank + 1 ); typeof (node->rank) _b = (node->rank); (_a > _b) ? _a : _b; }); \ | |||
867 | } | |||
868 |