Bug Summary

File:nnc/ccv_nnc_graph.c
Warning:line 352, column 7
Branch condition evaluates to a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_graph.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/14.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2022-06-22-151334-490440-1 -x c ccv_nnc_graph.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_graph.h"
6
7// MARK - Level-2 API
8
9ccv_nnc_graph_t* ccv_nnc_graph_new(void)
10{
11 ccv_nnc_graph_t* graph = (ccv_nnc_graph_t*)cccalloccalloc(1, sizeof(ccv_nnc_graph_t));
12 graph->exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), 5, 0);
13 return graph;
14}
15
16void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size)
17{
18 if (!graph->sources)
19 graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), source_size, 0);
20 else
21 ccv_array_clear(graph->sources);
22 int i;
23 for (i = 0; i < source_size; i++)
24 ccv_array_push(graph->sources, sources + i);
25 graph->topsorted = 0;
26}
27
28ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph)
29{
30 return graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0;
31}
32
33int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph)
34{
35 return graph->sources ? graph->sources->rnum : 0;
36}
37
38void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
39{
40 if (!graph->destinations)
41 graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), destination_size, 0);
42 else
43 ccv_array_clear(graph->sources);
44 int i;
45 for (i = 0; i < destination_size; i++)
46 ccv_array_push(graph->destinations, destinations + i);
47 graph->topsorted = 0;
48}
49
50ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph)
51{
52 return graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0;
53}
54
55int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph)
56{
57 return graph->destinations ? graph->destinations->rnum : 0;
58}
59
60void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd)
61{
62 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 62, __extension__ __PRETTY_FUNCTION__); }
))
;
63 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 63, __extension__ __PRETTY_FUNCTION__); }
))
;
64 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
65 exec_info->cmd = cmd;
66}
67
68ccv_nnc_cmd_t ccv_nnc_graph_exec_cmd(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec)
69{
70 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 70, __extension__ __PRETTY_FUNCTION__); }
))
;
71 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 71, __extension__ __PRETTY_FUNCTION__); }
))
;
72 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
73 return exec_info->cmd;
74}
75
76void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint)
77{
78 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 78, __extension__ __PRETTY_FUNCTION__); }
))
;
79 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 79, __extension__ __PRETTY_FUNCTION__); }
))
;
80 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
81 exec_info->hint = hint;
82}
83
84static int _ccv_nnc_tensor_multiview_level_count(const ccv_nnc_tensor_multiview_t* const mv)
85{
86 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
87 return 1;
88 const int count = mv->kind + mv->repeat;
89 int i, c = 0;
90 for (i = 0; i < count; i++)
91 {
92 ccv_nnc_tensor_t* tv = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i];
93 if (tv == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)))
94 c = ccv_max(c, 1)({ typeof (c) _a = (c); typeof (1) _b = (1); (_a > _b) ? _a
: _b; })
;
95 else
96 c = ccv_max(c, _ccv_nnc_tensor_multiview_level_count((ccv_nnc_tensor_multiview_t*)tv))({ typeof (c) _a = (c); typeof (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)) _b = (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)); (_a > _b) ? _a : _b; }
)
;
97 }
98 return c + 1;
99}
100
101static ccv_nnc_graph_tensor_wrap_t* _ccv_nnc_graph_tensor_wrap_new(const ccv_nnc_tensor_multiview_t* const mv)
102{
103 const int level_count = _ccv_nnc_tensor_multiview_level_count(mv);
104 ccv_nnc_graph_tensor_wrap_t* tensor_wrap = (ccv_nnc_graph_tensor_wrap_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_tensor_wrap_t) + sizeof(ccv_nnc_tensor_t*) * (level_count - 1));
105 tensor_wrap->update_required = 0;
106 tensor_wrap->count = level_count;
107 tensor_wrap->index = 0;
108 tensor_wrap->tensors[0] = (ccv_nnc_tensor_t*)mv;
109 return tensor_wrap;
110}
111
112static void _ccv_nnc_graph_exec_rewind(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
113{
114 if (!info->tensor_wraps_ref)
115 return;
116 int i;
117 assert(info->tensor_wraps_ref <= graph->tensor_wraps->rnum)((void) sizeof ((info->tensor_wraps_ref <= graph->tensor_wraps
->rnum) ? 1 : 0), __extension__ ({ if (info->tensor_wraps_ref
<= graph->tensor_wraps->rnum) ; else __assert_fail (
"info->tensor_wraps_ref <= graph->tensor_wraps->rnum"
, "ccv_nnc_graph.c", 117, __extension__ __PRETTY_FUNCTION__);
}))
;
118 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;;
119 // Rewind from tensor wraps.
120 for (i = 0; i < info->input_size; i++)
121 if (tensor_wrap_array->tensor_wraps[i])
122 info->inputs[i] = tensor_wrap_array->tensor_wraps[i]->tensors[0];
123 const int d = info->input_size;
124 for (i = 0; i < info->output_size; i++)
125 if (tensor_wrap_array->tensor_wraps[d + i])
126 info->outputs[i] = tensor_wrap_array->tensor_wraps[d + i]->tensors[0];
127 const int dd = info->input_size + info->output_size;
128 for (i = 0; i < info->update_size; i++)
129 if (tensor_wrap_array->tensor_wraps[dd + i])
130 info->updates[i] = tensor_wrap_array->tensor_wraps[dd + i]->tensors[0];
131}
132
133static void _ccv_nnc_graph_tensor_wrap_free(ccv_nnc_graph_tensor_wrap_t* const tensor_wrap)
134{
135 ccfreefree(tensor_wrap);
136}
137
138ccv_nnc_graph_tensor_wrap_array_t* ccv_nnc_get_tensor_wrap_array(ccv_nnc_graph_t* const graph, const int tensor_wrap_size, int* const tensor_wraps_ref)
139{
140 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = *tensor_wraps_ref ? (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, *tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(*tensor_wraps_ref
- 1)))
: 0;
141 // Otherwise, find an open slot.
142 if (!tensor_wrap_array_ref)
143 {
144 if (!graph->tensor_wraps)
145 graph->tensor_wraps = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wrap_array_t*), 0, 0);
146 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = 0;
147 ccv_array_push(graph->tensor_wraps, &tensor_wrap_array);
148 tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, graph->tensor_wraps->rnum - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(graph->tensor_wraps
->rnum - 1)))
;
149 *tensor_wraps_ref = graph->tensor_wraps->rnum;
150 }
151 int i;
152 if (*tensor_wrap_array_ref)
153 {
154 if ((*tensor_wrap_array_ref)->size != tensor_wrap_size)
155 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)ccreallocrealloc(*tensor_wrap_array_ref, sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1));
156 for (i = (*tensor_wrap_array_ref)->size; i < tensor_wrap_size; i++)
157 (*tensor_wrap_array_ref)->tensor_wraps[i] = 0;
158 } else
159 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)cccalloccalloc(sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1), 1);
160 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
161 tensor_wrap_array->size = tensor_wrap_size;
162 return tensor_wrap_array;
163}
164
165void ccv_nnc_set_tensor_wraps(ccv_nnc_graph_tensor_wrap_t** const tensor_wraps, ccv_nnc_tensor_t* const* const tensors, const int tensor_size)
166{
167 int i;
168 for (i = 0; i < tensor_size; i++)
169 if (tensors[i])
170 {
171 if (CCV_IS_TENSOR_MULTIVIEW(tensors[i])((*(int*)(tensors[i])) & CCV_TENSOR_MULTIVIEW) &&
172 ((ccv_nnc_tensor_multiview_t*)tensors[i])->anchor != CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1)
173 {
174 if (!tensor_wraps[i] || tensors[i] != tensor_wraps[i]->tensors[0])
175 {
176 if (tensor_wraps[i])
177 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
178 tensor_wraps[i] = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)tensors[i]);
179 }
180 } else {
181 if (tensor_wraps[i])
182 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
183 tensor_wraps[i] = 0;
184 }
185 }
186}
187
188void ccv_nnc_graph_register_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
189{
190 ccv_nnc_graph_t* p = graph;
191 const ccv_nnc_graph_tensor_wraps_ref_t tensor_wraps_ref = {
192 .d = tensor_wraps_ref_d,
193 .graph = graph,
194 };
195 do {
196 if (!p->tensor_wraps_refs)
197 {
198 p->tensor_wraps_refs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wraps_ref_t), 0, 0);
199 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
200 } else {
201 int i;
202 int has_tensor_wraps_ref = 0;
203 for (i = 0; !has_tensor_wraps_ref && i < p->tensor_wraps_refs->rnum; i++)
204 {
205 ccv_nnc_graph_tensor_wraps_ref_t* tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
206 has_tensor_wraps_ref = (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph);
207 }
208 if (!has_tensor_wraps_ref)
209 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
210 }
211 p = p->p;
212 } while (p);
213}
214
215static void _ccv_nnc_graph_redo_tensor_wraps(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
216{
217 int i;
218 const int has_wrap = ccv_nnc_tensors_have_wraps(info->inputs, info->input_size) ||
219 ccv_nnc_tensors_have_wraps(info->outputs, info->output_size) ||
220 ccv_nnc_tensors_have_wraps(info->updates, info->update_size);
221 if (has_wrap)
222 {
223 const int tensor_wrap_size = info->input_size + info->output_size + info->update_size;
224 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = ccv_nnc_get_tensor_wrap_array(graph, tensor_wrap_size, &info->tensor_wraps_ref);
225 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps, info->inputs, info->input_size);
226 const int d = info->input_size;
227 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + d, info->outputs, info->output_size);
228 const int dd = info->input_size + info->output_size;
229 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + dd, info->updates, info->update_size);
230 } else if (info->tensor_wraps_ref) {
231 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;
232 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
233 if (tensor_wrap_array)
234 {
235 for (i = 0; i < tensor_wrap_array->size; i++)
236 if (tensor_wrap_array->tensor_wraps[i])
237 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[i]);
238 ccfreefree(tensor_wrap_array);
239 *tensor_wrap_array_ref = 0;
240 info->tensor_wraps_ref = 0;
241 }
242 }
243}
244
245static void _ccv_nnc_graph_deregister_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
246{
247 ccv_nnc_graph_t* p = graph;
248 do {
249 int i;
250 // Remove from the array.
251 if (p->tensor_wraps_refs)
252 for (i = 0; i < p->tensor_wraps_refs->rnum; i++)
253 {
254 ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
255 if (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph)
256 {
257 --p->tensor_wraps_refs->rnum;
258 if (i < p->tensor_wraps_refs->rnum)
259 memcpy(tensor_wraps_ref, tensor_wraps_ref + 1, sizeof(ccv_nnc_graph_exec_t) * (p->tensor_wraps_refs->rnum - i));
260 break;
261 }
262 }
263 p = p->p;
264 } while (p);
265}
266
267void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size)
268{
269 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 269, __extension__ __PRETTY_FUNCTION__);
}))
;
270 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 270, __extension__ __PRETTY_FUNCTION__);
}))
;
271 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
272 assert(input_flag_size <= info->input_size)((void) sizeof ((input_flag_size <= info->input_size) ?
1 : 0), __extension__ ({ if (input_flag_size <= info->
input_size) ; else __assert_fail ("input_flag_size <= info->input_size"
, "ccv_nnc_graph.c", 272, __extension__ __PRETTY_FUNCTION__);
}))
;
273 assert(output_flag_size <= info->output_size)((void) sizeof ((output_flag_size <= info->output_size)
? 1 : 0), __extension__ ({ if (output_flag_size <= info->
output_size) ; else __assert_fail ("output_flag_size <= info->output_size"
, "ccv_nnc_graph.c", 273, __extension__ __PRETTY_FUNCTION__);
}))
;
274 if (info->input_size + info->output_size == 0)
275 return;
276 if (!info->input_flags)
277 {
278 info->input_flags = (int*)cccalloccalloc(info->input_size + info->output_size, sizeof(int));
279 info->output_flags = info->input_flags + info->input_size;
280 }
281 if (input_flag_size > 0)
282 memcpy(info->input_flags, input_flags, sizeof(int) * input_flag_size);
283 if (output_flag_size > 0)
284 memcpy(info->output_flags, output_flags, sizeof(int) * output_flag_size);
285}
286
287void ccv_nnc_graph_exec_pair_with(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t pair_exec)
288{
289 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 289, __extension__ __PRETTY_FUNCTION__);
}))
;
290 assert(exec.d >= 0)((void) sizeof ((exec.d >= 0) ? 1 : 0), __extension__ ({ if
(exec.d >= 0) ; else __assert_fail ("exec.d >= 0", "ccv_nnc_graph.c"
, 290, __extension__ __PRETTY_FUNCTION__); }))
;
291 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 291, __extension__ __PRETTY_FUNCTION__);
}))
;
292 assert(pair_exec.graph == graph || pair_exec.graph == graph->pair)((void) sizeof ((pair_exec.graph == graph || pair_exec.graph ==
graph->pair) ? 1 : 0), __extension__ ({ if (pair_exec.graph
== graph || pair_exec.graph == graph->pair) ; else __assert_fail
("pair_exec.graph == graph || pair_exec.graph == graph->pair"
, "ccv_nnc_graph.c", 292, __extension__ __PRETTY_FUNCTION__);
}))
;
293 assert(pair_exec.d >= 0)((void) sizeof ((pair_exec.d >= 0) ? 1 : 0), __extension__
({ if (pair_exec.d >= 0) ; else __assert_fail ("pair_exec.d >= 0"
, "ccv_nnc_graph.c", 293, __extension__ __PRETTY_FUNCTION__);
}))
;
294 if (pair_exec.graph == graph)
295 { assert(pair_exec.d < graph->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph->exec_info
->rnum) ; else __assert_fail ("pair_exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 295, __extension__ __PRETTY_FUNCTION__);
}))
; }
296 else
297 { assert(pair_exec.d < graph->pair->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->pair->exec_info
->rnum) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph
->pair->exec_info->rnum) ; else __assert_fail ("pair_exec.d < graph->pair->exec_info->rnum"
, "ccv_nnc_graph.c", 297, __extension__ __PRETTY_FUNCTION__);
}))
; }
298 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
299 exec_info->pair_ref = pair_exec.d + 1;
300}
301
302static ccv_nnc_tensor_t* _ccv_nnc_any_tensor_from_tensor_multiview(ccv_nnc_tensor_multiview_t* const mv)
303{
304 ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv;
305 while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
306 {
307 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
308 const int count = 0;
309 const int off = mv->kind;
310 const int mod = mv->repeat;
311 // If reached the root.
312 tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[count >= off ? ((count - off) % mod) + off : count]; // Unwrap.
313 }
314 return tensor;
315}
316
317void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
318{
319 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 319, __extension__ __PRETTY_FUNCTION__);
}))
;
1
Assuming field 'd' is < field 'rnum'
2
Taking true branch
320 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 320, __extension__ __PRETTY_FUNCTION__);
}))
;
3
Assuming 'graph' is equal to field 'graph'
4
Taking true branch
321 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
322 // De-register from the graph if it contains multiview tensors.
323 if (info->tensor_wraps_ref)
5
Assuming field 'tensor_wraps_ref' is 0
6
Taking false branch
324 _ccv_nnc_graph_deregister_tensor_wraps(graph, info->tensor_wraps_ref - 1);
325 // In case it is already executed, rewind.
326 _ccv_nnc_graph_exec_rewind(info, graph);
327 if (input_size == 0 && output_size == 0)
7
Assuming 'input_size' is not equal to 0
328 {
329 if (info->input_size > 0 || info->output_size > 0)
330 ccfreefree(info->inputs);
331 info->inputs = 0;
332 info->outputs = 0;
333 info->input_size = 0;
334 info->output_size = 0;
335 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
336 if (info->tensor_wraps_ref)
337 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
338 return;
339 }
340 if (info->inputs)
8
Assuming field 'inputs' is null
9
Taking false branch
341 info->inputs = (ccv_nnc_tensor_t**)ccreallocrealloc(info->inputs, sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
342 else
343 info->inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
10
Storing uninitialized value
344 info->outputs = info->inputs + input_size;
345 if (inputs)
11
Assuming 'inputs' is null
12
Taking false branch
346 memcpy(info->inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
347 if (outputs)
13
Assuming 'outputs' is null
14
Taking false branch
348 memcpy(info->outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
349 int i;
350 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
351 for (i = 0; i < input_size + output_size; i++)
15
The value 0 is assigned to 'i'
16
Assuming the condition is true
17
Loop condition is true. Entering loop body
352 if (info->inputs[i])
18
Branch condition evaluates to a garbage value
353 {
354 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i])((*(int*)(info->inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i];
355 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
356 }
357 info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes);
358 info->input_size = input_size;
359 info->output_size = output_size;
360 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
361 // Register again if the tensor wraps exist.
362 if (info->tensor_wraps_ref)
363 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
364 // Free flags.
365 if (info->input_flags)
366 {
367 ccfreefree(info->input_flags);
368 info->input_flags = info->output_flags = 0;
369 }
370}
371
372void ccv_nnc_graph_exec_add_as_affected(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update)
373{
374 assert(CCV_IS_TENSOR_MULTIVIEW(update))((void) sizeof ((((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(update)", "ccv_nnc_graph.c"
, 374, __extension__ __PRETTY_FUNCTION__); }))
;
375 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 375, __extension__ __PRETTY_FUNCTION__);
}))
;
376 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 376, __extension__ __PRETTY_FUNCTION__);
}))
;
377 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
378 const int register_tensor_wraps = !info->tensor_wraps_ref;
379 const int update_index = info->update_size;
380 ++info->update_size;
381 if (info->updates)
382 info->updates = (ccv_nnc_tensor_t**)ccreallocrealloc(info->updates, sizeof(ccv_nnc_tensor_t*) * info->update_size);
383 else
384 info->updates = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * info->update_size);
385 info->updates[update_index] = update;
386 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
387 if (register_tensor_wraps)
388 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
389}
390
391ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
392{
393 int d = graph->exec_info->rnum;
394 ccv_nnc_graph_exec_info_t info = {
395 .cmd = cmd,
396 .hint = hint,
397 .input_size = input_size,
398 .output_size = output_size,
399 };
400 assert(inputs || input_size == 0)((void) sizeof ((inputs || input_size == 0) ? 1 : 0), __extension__
({ if (inputs || input_size == 0) ; else __assert_fail ("inputs || input_size == 0"
, "ccv_nnc_graph.c", 400, __extension__ __PRETTY_FUNCTION__);
}))
;
401 assert(outputs || output_size == 0)((void) sizeof ((outputs || output_size == 0) ? 1 : 0), __extension__
({ if (outputs || output_size == 0) ; else __assert_fail ("outputs || output_size == 0"
, "ccv_nnc_graph.c", 401, __extension__ __PRETTY_FUNCTION__);
}))
;
402 if (input_size > 0 || output_size > 0)
403 {
404 info.inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
405 info.outputs = info.inputs + input_size;
406 if (inputs)
407 memcpy(info.inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
408 if (outputs)
409 memcpy(info.outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
410 info.input_size = input_size;
411 info.output_size = output_size;
412 int i;
413 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
414 for (i = 0; i < input_size + output_size; i++)
415 if (info.inputs[i])
416 {
417 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i])((*(int*)(info.inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i];
418 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
419 }
420 info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes);
421 }
422 _ccv_nnc_graph_redo_tensor_wraps(&info, graph);
423 // Add itself to the graph's wraps array, this will help the run time when we run the graph and do unwrapping.
424 if (info.tensor_wraps_ref)
425 ccv_nnc_graph_register_tensor_wraps(graph, info.tensor_wraps_ref - 1);
426 ccv_array_push(graph->exec_info, &info);
427 return (ccv_nnc_graph_exec_t){
428 .d = d,
429 .graph = graph,
430 };
431}
432
433void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to)
434{
435 ccv_nnc_graph_tensor_carry_over_t carry_over = {
436 .from = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)from),
437 .to = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)to)
438 };
439 if (!graph->carry_overs)
440 graph->carry_overs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_carry_over_t), 0, 0);
441 ccv_array_push(graph->carry_overs, &carry_over);
442}
443
444int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
445{
446 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 446, __extension__ __PRETTY_FUNCTION__);
}))
;
447 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 447, __extension__ __PRETTY_FUNCTION__);
}))
;
448 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 448, __extension__ __PRETTY_FUNCTION__);
}))
;
449 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 449, __extension__ __PRETTY_FUNCTION__);
}))
;
450 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
451 if (src_info->outgoings == 0)
452 src_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0);
453 else {
454 int i;
455 // Check if this is already connected, if so, skip.
456 for (i = 0; i < src_info->outgoings->rnum; i++)
457 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
458 return -1;
459 }
460 ccv_array_push(src_info->outgoings, &destination.d);
461 graph->topsorted = 0;
462 return 0;
463}
464
465int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
466{
467 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 467, __extension__ __PRETTY_FUNCTION__);
}))
;
468 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 468, __extension__ __PRETTY_FUNCTION__);
}))
;
469 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 469, __extension__ __PRETTY_FUNCTION__);
}))
;
470 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 470, __extension__ __PRETTY_FUNCTION__);
}))
;
471 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
472 if (!src_info->outgoings)
473 return -1;
474 int i, j = -1;
475 // Check if this is already connected, if so, skip.
476 for (i = 0; i < src_info->outgoings->rnum; i++)
477 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
478 {
479 j = i;
480 break;
481 }
482 if (j < 0)
483 return -1;
484 if (j < src_info->outgoings->rnum - 1)
485 *(int*)ccv_array_get(src_info->outgoings, j)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(j)))
= *(int*)ccv_array_get(src_info->outgoings, src_info->outgoings->rnum - 1)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(src_info->outgoings
->rnum - 1)))
;
486 --src_info->outgoings->rnum;
487 ccv_nnc_graph_exec_info_t* dest_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, destination.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(destination.d)))
;
488 if (dest_info->outgoings)
489 for (i = 0; i < dest_info->outgoings->rnum; i++)
490 ccv_array_add_unique_int(src_info->outgoings, *(int*)ccv_array_get(dest_info->outgoings, i)((void*)(((char*)((dest_info->outgoings)->data)) + (size_t
)(dest_info->outgoings)->rsize * (size_t)(i)))
);
491 graph->topsorted = 0;
492 return 0;
493}
494
495int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph)
496{
497 return graph->exec_info ? graph->exec_info->rnum : 0;
498}
499
500void* ccv_nnc_graph_buffer(ccv_nnc_graph_t* const graph, int size)
501{
502 if (graph->buffer_size >= size)
503 return graph->buffer;
504 graph->buffer_size = size;
505 graph->buffer = (graph->buffer) ? ccreallocrealloc(graph->buffer, size) : ccmallocmalloc(size);
506 return graph->buffer;
507}
508
509void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size)
510{
511 assert(exec_cvt_size == graph->exec_info->rnum)((void) sizeof ((exec_cvt_size == graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (exec_cvt_size == graph->exec_info
->rnum) ; else __assert_fail ("exec_cvt_size == graph->exec_info->rnum"
, "ccv_nnc_graph.c", 511, __extension__ __PRETTY_FUNCTION__);
}))
;
512 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 512, __extension__ __PRETTY_FUNCTION__);
}))
;
513 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
}))
;
514 int i, j;
515 for (i = 0; i < exec_cvt_size; i++)
516 exec_cvt[i] = -1;
517 ccv_array_t* exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), graph->exec_info->rnum, 0);
518 // If there are breakpoints, it is more complicated, we first start to the breakpoints, and then continue from the breakpoints to the destinations.
519 if (graph->breakpoint_size)
520 {
521 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, graph->breakpoints, graph->breakpoint_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->sources
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->sources)->data)) + (size_t)(
graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph->breakpoint_size); _i_++) { ((void) sizeof
(((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 521, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph->breakpoints)[_i_].d].d = 1; } for (_i_ = 0; _i_ <
(graph->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } }
else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_q_][_exist_size_[_q_]] = d
; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) =
(_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[(graph->breakpoints)[_i_].d].r == 4)
continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph->
breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_
[(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 521, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(graph->breakpoints)[_i_].d].c > 0) continue; _visit_->
node[_visit_->size].index = (((graph->breakpoints)[_i_]
.d)); _visit_->node[_visit_->size].term = ((_incomings_
[(graph->breakpoints)[_i_].d].d)); ++_visit_->size;; } if
(_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
((_visit_->size <= (graph->exec_info->rnum)) ? 1
: 0), __extension__ ({ if (_visit_->size <= (graph->
exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
522 for (i = 0; i < graph->breakpoint_size; i++)
523 exec_cvt[graph->breakpoints[i].d] = -2; // Mark this as breakpoints, so we will skip the first round.
524 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
525 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 525, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
526 if (exec_cvt[idx] == -2) // Skip breakpoint.
527 continue;
528 // Loop over node and push to the array.
529 ccv_array_push(exec_info, node);
530 // Go to its sub-graph to fix exec_idx
531 for (i = 0; i < node->graph_ref_size; i++)
532 {
533 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
534 if (graph_ref >= 0)
535 {
536 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
537 sub_graph->exec_idx = exec_info->rnum;
538 }
539 }
540 exec_cvt[idx] = exec_info->rnum - 1;
541 } ccv_nnc_graph_visit_endfor} }
542 ccv_nnc_graph_visit_free(visit);
543 graph->breakpoint_offset = exec_info->rnum;
544 visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } int
_exist_size_[2] = { (graph->breakpoint_size), 0, }; int _p_
= 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 1) continue; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 2) continue; _incomings_[_idx_].r = 2
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges
= _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; }
_edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_
; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++
_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) =
(_i_)); } for (_i_ = 0; _i_ < (graph->destinations->
rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->breakpoint_size); _i_++) { ((void) sizeof ((
(graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 544, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 544, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
545 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
546 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 546, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
547 // Loop over node and push to the array.
548 ccv_array_push(exec_info, node);
549 // Go to its sub-graph to fix exec_idx
550 for (i = 0; i < node->graph_ref_size; i++)
551 {
552 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
553 if (graph_ref >= 0)
554 {
555 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
556 sub_graph->exec_idx = exec_info->rnum;
557 }
558 }
559 exec_cvt[idx] = exec_info->rnum - 1;
560 } ccv_nnc_graph_visit_endfor} }
561 ccv_nnc_graph_visit_free(visit);
562 for (i = 0; i < graph->breakpoint_size; i++)
563 { assert(exec_cvt[graph->breakpoints[i].d] >= 0)((void) sizeof ((exec_cvt[graph->breakpoints[i].d] >= 0
) ? 1 : 0), __extension__ ({ if (exec_cvt[graph->breakpoints
[i].d] >= 0) ; else __assert_fail ("exec_cvt[graph->breakpoints[i].d] >= 0"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
}))
; } // All breakpoints should be assigned.
564 } else {
565 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->sources
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->sources)->data)) + (size_t)(
graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->destinations
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->sources->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 565, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
566 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
567 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 567, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
568 // Loop over node and push to the array.
569 ccv_array_push(exec_info, node);
570 // Go to its sub-graph to fix exec_idx
571 for (i = 0; i < node->graph_ref_size; i++)
572 {
573 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
574 if (graph_ref >= 0)
575 {
576 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
577 sub_graph->exec_idx = exec_info->rnum;
578 }
579 }
580 exec_cvt[idx] = exec_info->rnum - 1;
581 } ccv_nnc_graph_visit_endfor} }
582 ccv_nnc_graph_visit_free(visit);
583 }
584 assert(graph->exec_info->rnum == exec_info->rnum)((void) sizeof ((graph->exec_info->rnum == exec_info->
rnum) ? 1 : 0), __extension__ ({ if (graph->exec_info->
rnum == exec_info->rnum) ; else __assert_fail ("graph->exec_info->rnum == exec_info->rnum"
, "ccv_nnc_graph.c", 584, __extension__ __PRETTY_FUNCTION__);
}))
;
585 ccv_array_free(graph->exec_info);
586 graph->exec_info = exec_info;
587 for (i = 0; i < graph->sources->rnum; i++)
588 {
589 ccv_nnc_graph_exec_t* const source = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, i)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(i)))
;
590 source->d = exec_cvt[source->d];
591 }
592 for (i = 0; i < graph->destinations->rnum; i++)
593 {
594 ccv_nnc_graph_exec_t* const destination = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
595 destination->d = exec_cvt[destination->d];
596 }
597 // Update all outgoings to reflect the latest.
598 for (i = 0; i < exec_info->rnum; i++)
599 {
600 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(exec_info, i)((void*)(((char*)((exec_info)->data)) + (size_t)(exec_info
)->rsize * (size_t)(i)))
;
601 if (info->outgoings)
602 for (j = 0; j < info->outgoings->rnum; j++)
603 *(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
= exec_cvt[*(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
];
604 }
605 graph->topsorted = 1;
606}
607
608typedef struct {
609 int device_id;
610 int exec_idx;
611 ccv_array_t* signal_set;
612 ccv_array_t* command_set; // The set of command executed in this stream. In case there is a tie (on rank). We will check this.
613} ccv_nnc_stream_data_t;
614
615static void _ccv_nnc_graph_schedule_assign_signals(ccv_array_t* const incoming, ccv_nnc_graph_exec_schedule_t* const node, ccv_array_t* const stream_data, int* const signal_size, ccv_nnc_graph_exec_schedule_t* const exec_info, const int exec_info_size)
616{
617 assert(incoming->rnum > 0)((void) sizeof ((incoming->rnum > 0) ? 1 : 0), __extension__
({ if (incoming->rnum > 0) ; else __assert_fail ("incoming->rnum > 0"
, "ccv_nnc_graph.c", 617, __extension__ __PRETTY_FUNCTION__);
}))
;
618 int i, j, k;
619 int wait_size = 0, max_wait_size = 0;
620 for (i = 0; i < incoming->rnum; i++)
621 {
622 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
623 ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx;
624 assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ?
1 : 0), __extension__ ({ if (incoming_exec_info->stream_size
> 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0"
, "ccv_nnc_graph.c", 624, __extension__ __PRETTY_FUNCTION__);
}))
;
625 max_wait_size += incoming_exec_info->stream_size;
626 }
627 int waits[ccv_max(1, max_wait_size)({ typeof (1) _a = (1); typeof (max_wait_size) _b = (max_wait_size
); (_a > _b) ? _a : _b; })
];
628 assert(node->stream_size > 0)((void) sizeof ((node->stream_size > 0) ? 1 : 0), __extension__
({ if (node->stream_size > 0) ; else __assert_fail ("node->stream_size > 0"
, "ccv_nnc_graph.c", 628, __extension__ __PRETTY_FUNCTION__);
}))
;
629 for (i = 0; i < incoming->rnum; i++)
630 {
631 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
632 assert(incoming_idx < exec_info_size)((void) sizeof ((incoming_idx < exec_info_size) ? 1 : 0), __extension__
({ if (incoming_idx < exec_info_size) ; else __assert_fail
("incoming_idx < exec_info_size", "ccv_nnc_graph.c", 632,
__extension__ __PRETTY_FUNCTION__); }))
;
633 assert(incoming_idx >= 0)((void) sizeof ((incoming_idx >= 0) ? 1 : 0), __extension__
({ if (incoming_idx >= 0) ; else __assert_fail ("incoming_idx >= 0"
, "ccv_nnc_graph.c", 633, __extension__ __PRETTY_FUNCTION__);
}))
;
634 ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx;
635 assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ?
1 : 0), __extension__ ({ if (incoming_exec_info->stream_size
> 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0"
, "ccv_nnc_graph.c", 635, __extension__ __PRETTY_FUNCTION__);
}))
;
636 int stream_synced = 1;
637 // If the current node's stream is a subset of the incoming node's stream, there
638 // is no need to sync with signal, because we are already synced with the incoming.
639 for (j = 0; stream_synced && j < node->stream_size; j++)
640 {
641 const int s = SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[j];
642 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 642
, __extension__ __PRETTY_FUNCTION__); }))
;
643 int flag = 0;
644 for (k = 0; !flag && k < incoming_exec_info->stream_size; k++)
645 flag = (SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_streams : (*incoming_exec_info)._heap_streams)
[k] == s);
646 stream_synced = flag;
647 }
648 if (stream_synced)
649 continue;
650 // Otherwise, find the streams we need to sync with, and create signals for these.
651 for (j = 0; j < incoming_exec_info->stream_size; j++)
652 {
653 const int s = SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_streams : (*incoming_exec_info)._heap_streams)
[j];
654 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 654
, __extension__ __PRETTY_FUNCTION__); }))
;
655 int flag = 0;
656 for (k = 0; !flag && k < node->stream_size; k++)
657 flag = (SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[k] == s);
658 if (!flag) // Need to have a signal.
659 {
660 if (SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j] < 0)
661 SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j] = (*signal_size)++;
662 else {
663 int flag = 0;
664 // If any of the stream the current node has already seen this signal, we are good already.
665 for (k = 0; !flag && k < node->stream_size; k++)
666 {
667 assert(SCHEDULE_STREAMS(*node)[k] >= 0)((void) sizeof ((((*node).stream_size <= 1 ? (*node)._inline_streams
: (*node)._heap_streams)[k] >= 0) ? 1 : 0), __extension__
({ if (((*node).stream_size <= 1 ? (*node)._inline_streams
: (*node)._heap_streams)[k] >= 0) ; else __assert_fail ("SCHEDULE_STREAMS(*node)[k] >= 0"
, "ccv_nnc_graph.c", 667, __extension__ __PRETTY_FUNCTION__);
}))
;
668 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((*node).stream_size <= 1 ? (*node)
._inline_streams : (*node)._heap_streams)[k])))
;
669 flag = (data->signal_set && ccv_array_find_int(data->signal_set, SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j]));
670 }
671 if (flag)
672 continue;
673 }
674 // Otherwise, we need to wait for this. Currently, our granularity is about wait on all streams.
675 waits[wait_size++] = SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j];
676 // All streams on this node have seen this signal.
677 for (k = 0; k < node->stream_size; k++)
678 {
679 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((*node).stream_size <= 1 ? (*node)
._inline_streams : (*node)._heap_streams)[k])))
;
680 if (!data->signal_set)
681 data->signal_set = ccv_array_new(sizeof(int), 0, 0);
682 ccv_array_push(data->signal_set, &SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j]);
683 }
684 }
685 }
686 }
687 node->wait_size = wait_size;
688 if (wait_size > 0)
689 {
690 node->waits = node->waits ? ccreallocrealloc(node->waits, sizeof(int) * wait_size) : ccmallocmalloc(sizeof(int) * wait_size);
691 memcpy(node->waits, waits, sizeof(int) * wait_size);
692 }
693}
694
695typedef struct {
696 int rank;
697 ccv_array_t* outgoings;
698} ccv_nnc_incoming_t;
699
700static int _ccv_nnc_device_ids_for_stream_data(ccv_nnc_graph_exec_info_t* const node, const int device_id, ccv_array_t* const stream_data, int* const device_ids, const int max_device_id_size)
701{
702 // TODO: I need to re-think whether this is GPU only or not.
703 int device_id_size = ccv_nnc_device_ids_for_io(node->inputs, node->input_size, node->outputs, node->output_size, CCV_TENSOR_GPU_MEMORY, device_ids, max_device_id_size);
704 if (device_id_size == 0)
705 {
706 // If there is a default data, use that device id. Otherwise, use the device id passed in (this will be the default data device id).
707 if (stream_data->rnum > 0)
708 {
709 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
710 device_ids[0] = default_data->device_id;
711 } else
712 device_ids[0] = device_id >= 0 ? device_id : 0;
713 device_id_size = 1;
714 }
715 return device_id_size;
716}
717
718void ccv_nnc_graph_static_schedule_free(ccv_nnc_graph_static_schedule_t* const schedule)
719{
720 int i;
721 ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info;
722 for (i = 0; i < schedule->exec_info_size; i++)
723 {
724 if (schd_info[i].stream_size > 1)
725 ccfreefree(schd_info[i]._heap_streams);
726 if (schd_info[i].waits)
727 ccfreefree(schd_info[i].waits);
728 }
729 if (schedule->stream_1s)
730 ccfreefree(schedule->stream_1s);
731 if (schedule->waits)
732 ccfreefree(schedule->waits);
733 if (schedule->psort)
734 ccfreefree(schedule->psort);
735 if (schedule->begin)
736 ccv_nnc_stream_signal_free(schedule->begin);
737 if (schedule->end)
738 ccv_nnc_stream_signal_free(schedule->end);
739 ccfreefree(schedule);
740}
741
742static ccv_nnc_graph_static_schedule_t* _ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int device_id, ccv_nnc_stream_context_t* const stream_context, const ccv_nnc_graph_exec_t* const _sources, const int _source_size, const ccv_nnc_graph_exec_t* const _destinations, const int _destination_size)
743{
744 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 744, __extension__ __PRETTY_FUNCTION__);
}))
;
745 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 745, __extension__ __PRETTY_FUNCTION__);
}))
;
746 assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__
({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted"
, "ccv_nnc_graph.c", 746, __extension__ __PRETTY_FUNCTION__);
}))
; // Only support this on a topsorted graph.
747 const int exec_info_size = graph->exec_info->rnum;
748 assert(exec_info_size > 0)((void) sizeof ((exec_info_size > 0) ? 1 : 0), __extension__
({ if (exec_info_size > 0) ; else __assert_fail ("exec_info_size > 0"
, "ccv_nnc_graph.c", 748, __extension__ __PRETTY_FUNCTION__);
}))
;
749 const ccv_nnc_graph_exec_t* const sources = _sources == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: _sources;
750 const int source_size = _sources == 0 ? graph->sources->rnum : _source_size;
751 if (!_sources)
752 { assert(_source_size == 0)((void) sizeof ((_source_size == 0) ? 1 : 0), __extension__ (
{ if (_source_size == 0) ; else __assert_fail ("_source_size == 0"
, "ccv_nnc_graph.c", 752, __extension__ __PRETTY_FUNCTION__);
}))
; }
753 const ccv_nnc_graph_exec_t* const destinations = _destinations == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: _destinations;
754 const int destination_size = _destinations == 0 ? graph->destinations->rnum : _destination_size;
755 if (!_destinations)
756 { assert(_destination_size == 0)((void) sizeof ((_destination_size == 0) ? 1 : 0), __extension__
({ if (_destination_size == 0) ; else __assert_fail ("_destination_size == 0"
, "ccv_nnc_graph.c", 756, __extension__ __PRETTY_FUNCTION__);
}))
; }
757 const int root_schedule = (_sources == 0 && _destinations == 0);
758 ccv_nnc_graph_static_schedule_t* const schedule = cccalloccalloc(1, sizeof(ccv_nnc_graph_static_schedule_t) + sizeof(ccv_nnc_graph_exec_schedule_t) * (exec_info_size - 1));
759 schedule->exec_info_size = exec_info_size;
760 ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info;
761 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0)))
;
762 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = (exec_info_size + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (
(sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_
[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r == 1) continue; _incomings_[
_idx_].r = 1; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size);
_i_++) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else
__assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_graph.c"
, 762, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r == 2) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0
; _i_ < (source_size); _i_++) { ((void) sizeof (((sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1
; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 3 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 3 && _d_ < (destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void
) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 762
, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(destinations
)[_i_].d].r == 4) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(destinations)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if
(_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(destinations)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 762, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(destinations)[_i_].d].c > 0) continue; _visit_->node[
_visit_->size].index = (((destinations)[_i_].d)); _visit_->
node[_visit_->size].term = ((_incomings_[(destinations)[_i_
].d].d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_
); } while (0);; ((void) sizeof ((_visit_->size <= (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size
)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
763 if (!root_schedule)
764 {
765 // If this is not a root schedule, we need to do partial topsort.
766 int psort_size = 0;
767 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
768 ++psort_size;
769 } ccv_nnc_graph_visit_endfor} }
770 schedule->psort = (int*)ccmallocmalloc(sizeof(int) * psort_size);
771 schedule->psort_size = psort_size;
772 psort_size = 0;
773 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
774 schedule->psort[psort_size++] = idx;
775 } ccv_nnc_graph_visit_endfor} }
776 }
777 int i, j, k;
778 // Generate exec dependencies (or, in other words, partial ordering of executions).
779 ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(exec_info_size, exec_info_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
780 int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_info_size * 2);
781 int buf_size;
782#define for_block(x, val) \
783 do { \
784 if (((int32_t*)val)[0] > 0) \
785 { \
786 buf[buf_size * 2] = x; \
787 buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \
788 ++buf_size; \
789 } \
790 } while (0)
791 for (i = 0; i < exec_info_size; i++)
792 schd_info[i].stream_size = -1;
793 ccv_nnc_graph_visit_for(visit, exec_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_info)) const node __attribute__((unused
)) = (exec_info) + idx;
{
794 buf_size = 0; /* save all its parent deps to this buffer */
795 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
796 schd_info[idx].stream_size = 0;
797 if (vector)
798 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
799 if (!node->outgoings)
800 continue;
801 for (i = 0; i < node->outgoings->rnum; i++)
802 {
803 int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
804 const int32_t one = 1;
805 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx);
806 /* If not found, set, if the current node is the destination node, no need
807 * set itself as parent of subsequent nodes because its terminal nature. */
808 if (!term && (!cell.i32 || cell.i32[0] == 0))
809 ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one);
810 for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */
811 {
812 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]);
813 /* If not found, set */
814 if (!cell.i32 || cell.i32[0] == 0)
815 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]);
816 else {
817 /* Otherwise, set to the longest one */
818 int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2
+ 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; })
;
819 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep);
820 }
821 }
822 }
823 } ccv_nnc_graph_visit_endfor} }
824#undef for_block
825 ccfreefree(buf);
826 // Algorithm to allocate signals and streams for this graph.
827 ccv_array_t* const stream_data = ccv_array_new(sizeof(ccv_nnc_stream_data_t), 0, 0);
828 ccv_array_t** const outgoings = cccalloccalloc(exec_info_size, sizeof(ccv_array_t*));
829 ccv_nnc_incoming_t* const incomings = cccalloccalloc(exec_info_size, sizeof(ccv_nnc_incoming_t));
830 int max_device_id_size = 1;
831 // Filter out outgoing nodes that we will be able to access it afterwards anyway.
832 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
833 max_device_id_size = ccv_max(node->input_size + node->output_size, max_device_id_size)({ typeof (node->input_size + node->output_size) _a = (
node->input_size + node->output_size); typeof (max_device_id_size
) _b = (max_device_id_size); (_a > _b) ? _a : _b; })
;
834 if (node->outgoings)
835 {
836 outgoings[idx] = ccv_array_new(sizeof(int), 0, 0);
837 for (i = 0; i < node->outgoings->rnum; i++)
838 {
839 const int di = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
840 // Skip if we haven't accessed this exec.
841 if (schd_info[di].stream_size < 0)
842 continue;
843 int flag = 0;
844 for (j = 0; !flag && j < node->outgoings->rnum; j++)
845 {
846 if (j != i)
847 {
848 const int dj = *(int*)ccv_array_get(node->outgoings, j)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(j)))
;
849 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, di, dj);
850 flag = (cell.i32 && cell.i32[0]);
851 }
852 }
853 if (!flag)
854 {
855 ccv_array_push(outgoings[idx], &di);
856 if (!incomings[di].outgoings)
857 incomings[di].outgoings = ccv_array_new(sizeof(int), 1, 0);
858 ccv_array_push(incomings[di].outgoings, &idx);
859 }
860 }
861 }
862 } ccv_nnc_graph_visit_endfor} }
863#define visitor(node, idx, _) \
864 if (node->outgoings) \
865 for (i = 0; i < node->outgoings->rnum; i++) \
866 { \
867 const int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
; \
868 node->rank = ccv_max(incomings[d].rank + 1, node->rank)({ typeof (incomings[d].rank + 1) _a = (incomings[d].rank + 1
); typeof (node->rank) _b = (node->rank); (_a > _b) ?
_a : _b; })
; \
869 }
870 CCV_NNC_GRAPH_VISIT(graph, incomings, exec_info_size, destinations, destination_size, sources, source_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_
+= ((incomings)[_i_].outgoings) ? (incomings)[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (exec_info_size + _incoming_edges_
> 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_)
_incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2
+ _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_info_size
) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size
)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size
)), (int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size
), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size)
; for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void)
sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 870
, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] =
(destinations)[_i_].d; } int _exist_size_[2] = { (destination_size
), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 1) continue; _incomings_[_idx_].r = 1
; if ((incomings)[_idx_].outgoings) for (_j_ = 0; _j_ < (incomings
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((incomings)[_idx_].outgoings)->data)) + (
size_t)((incomings)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d
; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_
) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++
) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == graph)
; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c"
, 870, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 2) continue
; _incomings_[_idx_].r = 2; if ((incomings)[_idx_].outgoings)
for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((incomings
)[_idx_].outgoings)->data)) + (size_t)((incomings)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; _exists_[
_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) =
(_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(source_size); _i_++) { ((void) sizeof (((sources)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph
== graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] =
(source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (
_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ =
0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ =
_exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue;
_incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges > 0
) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(source_size); _i_++) { ((void) sizeof (((sources)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph
== graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(sources)[_i_].d].d = 1; } for (_i_ = 0; _i_
< (destination_size); _i_++) { ((void) sizeof (((destinations
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (destinations)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (destination_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor(((incomings) + _idx_
), (_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d
) { ++_d_; _incomings_[_idx_].r = 4; } if ((incomings)[_idx_]
.outgoings) { if ((incomings)[_idx_].outgoings->rnum == 1)
{ const int d = *(int*)((void*)(((char*)(((incomings)[_idx_]
.outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(source_size)) { _exists_[_p_][_i_] = d; continue; } } else for
(_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum; _j_
++) { const int d = *(int*)((void*)(((char*)(((incomings)[_idx_
].outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(source_size)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_
[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void
) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == graph) ; else __assert_fail (
"(sources)[_i_].graph == graph", "ccv_nnc_graph.c", 870, __extension__
__PRETTY_FUNCTION__); })); if (_incomings_[(sources)[_i_].d]
.r == 4) continue; if (!(0)) { ((void) sizeof ((_incomings_[(
sources)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_
[(sources)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(sources)[_i_].d].c == 0"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[(sources)[_i_].d].c > 0) continue
; visitor(((incomings) + (sources)[_i_].d), ((sources)[_i_].d
), (_incomings_[(sources)[_i_].d].d)); } if (_heap_mem_) free
(_incomings_); } while (0);
;
871#undef visitor
872 int device_ids[max_device_id_size];
873 int outgoing_device_ids[max_device_id_size];
874 int signal_size = 0;
875 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
876 // Go through the incomings.
877 const int device_id_size = _ccv_nnc_device_ids_for_stream_data(node, device_id, stream_data, device_ids, max_device_id_size);
878 if (schd_info[idx].stream_size == 0)
879 {
880 schd_info[idx].stream_size = device_id_size; // At least at the same size as the device_id_size.
881 if (device_id_size > 1)
882 {
883 schd_info[idx]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * device_id_size * 2);
884 schd_info[idx]._heap_signals = (schd_info[idx]._heap_streams + device_id_size);
885 }
886 for (i = 0; i < device_id_size; i++)
887 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = -1, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] = -1;
888 }
889 for (i = 0; i < device_id_size; i++)
890 // Go through until the end to assign streams.
891 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] < 0)
892 {
893 int stream_idx = -1;
894 int stream_has_command = 0;
895 // First, find a good stream in stream data (the stream is good if it can be recycled, and it has the same command).
896 // Otherwise, we prefer a usable stream (it doesn't have the command, but it can be recycled).
897 for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++)
898 {
899 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(j)))
;
900 if (data->device_id == device_ids[i])
901 {
902 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, data->exec_idx);
903 // If there is a path to conclude that exec_idx is before idx, then we can reuse
904 // this stream. Otherwise the work in this "empty stream" could still be ongoing,
905 // and we may delay the following work unnecessarily.
906 if (cell.i32 && cell.i32[0] > 0)
907 {
908 if (ccv_array_find_uint(data->command_set, node->cmd.cmd))
909 stream_idx = j, stream_has_command = 1;
910 else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet.
911 stream_idx = j;
912 }
913 }
914 }
915 if (stream_idx < 0)
916 {
917 stream_idx = stream_data->rnum;
918 const ccv_nnc_stream_data_t data = {
919 .device_id = device_ids[i],
920 };
921 ccv_array_push(stream_data, &data);
922 }
923 assert(stream_idx >= 0)((void) sizeof ((stream_idx >= 0) ? 1 : 0), __extension__ (
{ if (stream_idx >= 0) ; else __assert_fail ("stream_idx >= 0"
, "ccv_nnc_graph.c", 923, __extension__ __PRETTY_FUNCTION__);
}))
;
924 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
925 if (!data->command_set)
926 data->command_set = ccv_array_new(sizeof(uint32_t), 1, 0);
927 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = stream_idx;
928 ccv_array_add_unique_uint(data->command_set, node->cmd.cmd);
929 // Assign all subsequent node to use this stream.
930 int outgoing_idx = idx;
931 while (outgoings[outgoing_idx] && outgoings[outgoing_idx]->rnum)
932 {
933 int highest_rank = -1;
934 int highest_idx = -1;
935 int stream_n = -1;
936 int stream_has_command = 0;
937 for (j = 0; j < outgoings[outgoing_idx]->rnum; j++)
938 {
939 const int d = *(int*)ccv_array_get(outgoings[outgoing_idx], j)((void*)(((char*)((outgoings[outgoing_idx])->data)) + (size_t
)(outgoings[outgoing_idx])->rsize * (size_t)(j)))
;
940 // This is not outside of our scope at this point.
941 assert(schd_info[d].stream_size >= 0)((void) sizeof ((schd_info[d].stream_size >= 0) ? 1 : 0), __extension__
({ if (schd_info[d].stream_size >= 0) ; else __assert_fail
("schd_info[d].stream_size >= 0", "ccv_nnc_graph.c", 941,
__extension__ __PRETTY_FUNCTION__); }))
;
942 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + d;
943 const int outgoing_device_id_size = _ccv_nnc_device_ids_for_stream_data(outgoing_node, device_id, stream_data, outgoing_device_ids, max_device_id_size);
944 if (schd_info[d].stream_size == 0)
945 {
946 schd_info[d].stream_size = outgoing_device_id_size; // At least at the same size as the device_id_size.
947 if (outgoing_device_id_size > 1)
948 {
949 schd_info[d]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * outgoing_device_id_size * 2);
950 schd_info[d]._heap_signals = (schd_info[d]._heap_streams + outgoing_device_id_size);
951 }
952 for (k = 0; k < outgoing_device_id_size; k++)
953 SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams
: (schd_info[d])._heap_streams)
[k] = -1, SCHEDULE_SIGNALS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_signals
: (schd_info[d])._heap_signals)
[k] = -1;
954 }
955 assert(schd_info[d].stream_size == outgoing_device_id_size)((void) sizeof ((schd_info[d].stream_size == outgoing_device_id_size
) ? 1 : 0), __extension__ ({ if (schd_info[d].stream_size == outgoing_device_id_size
) ; else __assert_fail ("schd_info[d].stream_size == outgoing_device_id_size"
, "ccv_nnc_graph.c", 955, __extension__ __PRETTY_FUNCTION__);
}))
;
956 for (k = 0; k < outgoing_device_id_size; k++)
957 // If it should be on the same device and the stream is not assign, potentially.
958 if (outgoing_device_ids[k] == device_ids[i] &&
959 SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams
: (schd_info[d])._heap_streams)
[k] < 0 &&
960 (incomings[d].rank > highest_rank ||
961 (incomings[d].rank == highest_rank &&
962 !stream_has_command && ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd))))
963 {
964 highest_rank = incomings[d].rank;
965 highest_idx = d;
966 stream_n = k;
967 // This is 1 if rank is the same (thus, I must break the tie already), if the rank is not the same, we need to compute this.
968 stream_has_command = (incomings[d].rank == highest_rank || ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd));
969 }
970 }
971 if (highest_idx >= 0)
972 {
973 outgoing_idx = highest_idx;
974 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + outgoing_idx;
975 assert(stream_n >= 0)((void) sizeof ((stream_n >= 0) ? 1 : 0), __extension__ ({
if (stream_n >= 0) ; else __assert_fail ("stream_n >= 0"
, "ccv_nnc_graph.c", 975, __extension__ __PRETTY_FUNCTION__);
}))
;
976 SCHEDULE_STREAMS(schd_info[outgoing_idx])((schd_info[outgoing_idx]).stream_size <= 1 ? (schd_info[outgoing_idx
])._inline_streams : (schd_info[outgoing_idx])._heap_streams)
[stream_n] = stream_idx;
977 ccv_array_add_unique_uint(data->command_set, outgoing_node->cmd.cmd);
978 } else
979 break;
980 }
981 data->exec_idx = outgoing_idx;
982 }
983 } ccv_nnc_graph_visit_endfor} }
984 // Go through to assign signals when necessary.
985 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
986 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
987 _ccv_nnc_graph_schedule_assign_signals(incomings[idx].outgoings, schd_info + idx, stream_data, &signal_size, schd_info, exec_info_size);
988 } ccv_nnc_graph_visit_endfor} }
989 for (i = 0; i < exec_info_size; i++)
990 if (outgoings[i])
991 ccv_array_free(outgoings[i]);
992 ccfreefree(outgoings);
993 ccv_matrix_free(exec_dep);
994 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
995 if (device_id >= 0)
996 {
997 // If the default stream (stream 0) is not the same as desired stream, swap with the one that is.
998 if (default_data->device_id != device_id)
999 {
1000 int exchange_stream_idx = -1;
1001 // Find the stream idx to exchange.
1002 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1003 int flag = 0;
1004 for(i = 0; !flag && i < schd_info[idx].stream_size; i++)
1005 {
1006 const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i];
1007 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
1008 if (data->device_id == device_id)
1009 {
1010 exchange_stream_idx = stream_idx;
1011 flag = 1;
1012 }
1013 }
1014 if (flag)
1015 break;
1016 } ccv_nnc_graph_visit_endfor} }
1017 assert(exchange_stream_idx >= 0)((void) sizeof ((exchange_stream_idx >= 0) ? 1 : 0), __extension__
({ if (exchange_stream_idx >= 0) ; else __assert_fail ("exchange_stream_idx >= 0"
, "ccv_nnc_graph.c", 1017, __extension__ __PRETTY_FUNCTION__)
; }))
;
1018 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1019 for (i = 0; i < schd_info[idx].stream_size; i++)
1020 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == 0)
1021 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = -1;
1022 } ccv_nnc_graph_visit_endfor} }
1023 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1024 for (i = 0; i < schd_info[idx].stream_size; i++)
1025 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == exchange_stream_idx)
1026 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = 0;
1027 } ccv_nnc_graph_visit_endfor} }
1028 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1029 for (i = 0; i < schd_info[idx].stream_size; i++)
1030 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == -1)
1031 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = exchange_stream_idx;
1032 } ccv_nnc_graph_visit_endfor} }
1033 ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, exchange_stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(exchange_stream_idx)))
)->device_id = default_data->device_id;
1034 default_data->device_id = device_id;
1035 }
1036 }
1037 int graph_stream_1_size = 0;
1038 for (i = 0; i < source_size; i++)
1039 {
1040 const int idx = sources[i].d;
1041 // If it has incoming nodes, check whether these are on stream 0.
1042 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
1043 {
1044 int flag = 0;
1045 const ccv_array_t* const incoming = incomings[idx].outgoings;
1046 for (j = 0; !flag && j < incoming->rnum; j++)
1047 {
1048 const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(j)))
;
1049 for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++)
1050 flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx
])._inline_streams : (schd_info[incoming_idx])._heap_streams)
[k] == 0); // If this is the default stream, we already have a good start.
1051 }
1052 if (flag)
1053 continue;
1054 }
1055 for (j = 0; j < schd_info[idx].stream_size; j++)
1056 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this is not the default stream, we need explicit begin signal to start.
1057 ++graph_stream_1_size;
1058 }
1059 if (graph_stream_1_size > 0)
1060 {
1061 schedule->stream_1s = ccmallocmalloc(sizeof(int) * graph_stream_1_size);
1062 graph_stream_1_size = 0;
1063 for (i = 0; i < source_size; i++)
1064 {
1065 const int idx = sources[i].d;
1066 // If it has incoming nodes, check whether these are on stream 0.
1067 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
1068 {
1069 int flag = 0;
1070 const ccv_array_t* const incoming = incomings[idx].outgoings;
1071 for (j = 0; !flag && j < incoming->rnum; j++)
1072 {
1073 const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(j)))
;
1074 for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++)
1075 flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx
])._inline_streams : (schd_info[incoming_idx])._heap_streams)
[k] == 0); // If this is the default stream, we already have a good start.
1076 }
1077 if (flag)
1078 continue;
1079 }
1080 for (j = 0; j < schd_info[idx].stream_size; j++)
1081 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this is not the default stream, we need explicit begin signal to start.
1082 {
1083 const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j];
1084 int flag = 0;
1085 for (k = 0; !flag && k < graph_stream_1_size; k++)
1086 flag = (stream_idx == schedule->stream_1s[k]);
1087 if (!flag)
1088 schedule->stream_1s[graph_stream_1_size++] = stream_idx;
1089 }
1090 }
1091 schedule->stream_1_size = graph_stream_1_size;
1092 }
1093 for (i = 0; i < exec_info_size; i++)
1094 if (incomings[i].outgoings)
1095 ccv_array_free(incomings[i].outgoings);
1096 ccfreefree(incomings);
1097 int graph_wait_size = 0;
1098 for (i = 0; i < destination_size; i++)
1099 {
1100 const int idx = destinations[i].d;
1101 for (j = 0; j < schd_info[idx].stream_size; j++)
1102 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
1103 ++graph_wait_size;
1104 }
1105 if (graph_wait_size > 0)
1106 {
1107 schedule->waits = ccmallocmalloc(sizeof(int) * graph_wait_size);
1108 graph_wait_size = 0;
1109 for (i = 0; i < destination_size; i++)
1110 {
1111 const int idx = destinations[i].d;
1112 for (j = 0; j < schd_info[idx].stream_size; j++)
1113 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
1114 {
1115 ccv_nnc_stream_data_t* const default_stream_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
1116 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j] < 0)
1117 SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j] = signal_size++;
1118 else if (default_stream_data->signal_set && ccv_array_find_int(default_stream_data->signal_set, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j]))
1119 continue;
1120 schedule->waits[graph_wait_size++] = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j];
1121 }
1122 }
1123 schedule->wait_size = graph_wait_size;
1124 }
1125 for (i = 0; i < stream_data->rnum; i++)
1126 {
1127 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1128 if (data->signal_set)
1129 ccv_array_free(data->signal_set);
1130 assert(data->command_set)((void) sizeof ((data->command_set) ? 1 : 0), __extension__
({ if (data->command_set) ; else __assert_fail ("data->command_set"
, "ccv_nnc_graph.c", 1130, __extension__ __PRETTY_FUNCTION__)
; }))
;
1131 ccv_array_free(data->command_set);
1132 }
1133 // Allocate streams & signals
1134 int default_stream_type = stream_type;
1135 CCV_STREAM_SET_DEVICE_ID(default_stream_type, default_data->device_id)(default_stream_type) = (((default_stream_type) & ~0xfff00
) | (((default_data->device_id) & 0xfff) << 8))
;
1136 if (root_schedule)
1137 {
1138 assert(!graph->streams)((void) sizeof ((!graph->streams) ? 1 : 0), __extension__ (
{ if (!graph->streams) ; else __assert_fail ("!graph->streams"
, "ccv_nnc_graph.c", 1138, __extension__ __PRETTY_FUNCTION__)
; }))
;
1139 graph->stream_size = stream_data->rnum;
1140 graph->streams = (ccv_nnc_stream_context_t**)ccmallocmalloc(sizeof(ccv_nnc_stream_context_t*) * graph->stream_size);
1141 graph->block_stream_tasks = (co_routine_t**)cccalloccalloc(graph->stream_size, sizeof(co_routine_t*));
1142 if (stream_context)
1143 graph->streams[0] = stream_context;
1144 for (i = (stream_context ? 1 : 0); i < stream_data->rnum; i++)
1145 {
1146 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1147 int type = stream_type;
1148 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1149 graph->streams[i] = ccv_nnc_stream_context_new(type);
1150 }
1151 graph->signal_size = signal_size;
1152 graph->signals = (ccv_nnc_stream_signal_t**)cccalloccalloc(signal_size, sizeof(ccv_nnc_stream_signal_t*));
1153 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1154 for (i = 0; i < schd_info[idx].stream_size; i++)
1155 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1156 {
1157 const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i];
1158 if (!graph->signals[signal])
1159 {
1160 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ?
(schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams
)[i])))
;
1161 int type = stream_type;
1162 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1163 graph->signals[signal] = ccv_nnc_stream_signal_new(type);
1164 }
1165 }
1166 } ccv_nnc_graph_visit_endfor} }
1167 } else {
1168 assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ (
{ if (graph->streams) ; else __assert_fail ("graph->streams"
, "ccv_nnc_graph.c", 1168, __extension__ __PRETTY_FUNCTION__)
; }))
;
1169 assert(graph->stream_size >= stream_data->rnum)((void) sizeof ((graph->stream_size >= stream_data->
rnum) ? 1 : 0), __extension__ ({ if (graph->stream_size >=
stream_data->rnum) ; else __assert_fail ("graph->stream_size >= stream_data->rnum"
, "ccv_nnc_graph.c", 1169, __extension__ __PRETTY_FUNCTION__)
; }))
;
1170 // Find streams to proper allocated stream based on the type we need.
1171 int* const stream_idxs = (int*)ccmallocmalloc(sizeof(int) * (stream_data->rnum + signal_size));
1172 uint64_t* const stream_used = (uint64_t*)cccalloccalloc(((graph->stream_size + 63) >> 6) + ((graph->signal_size + 63) >> 6), sizeof(uint64_t));
1173 for (i = 0; i < stream_data->rnum; i++)
1174 {
1175 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1176 int type = stream_type;
1177 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1178 for (j = 0; j < graph->stream_size; j++)
1179 if (!(stream_used[j >> 6] & ((uint64_t)1 << (j & 63))))
1180 {
1181 const int stream_type = ccv_nnc_stream_context_type(graph->streams[j]);
1182 if (stream_type == type)
1183 {
1184 stream_idxs[i] = j;
1185 stream_used[j >> 6] |= ((uint64_t)1 << (j & 63));
1186 break;
1187 }
1188 }
1189 }
1190 assert(graph->signal_size >= signal_size)((void) sizeof ((graph->signal_size >= signal_size) ? 1
: 0), __extension__ ({ if (graph->signal_size >= signal_size
) ; else __assert_fail ("graph->signal_size >= signal_size"
, "ccv_nnc_graph.c", 1190, __extension__ __PRETTY_FUNCTION__)
; }))
;
1191 // Find signals to proper allocated signal based on the type we need.
1192 int* const signal_idxs = stream_idxs + stream_data->rnum;
1193 uint64_t* const signal_used = stream_used + ((graph->stream_size + 63) >> 6);
1194 for (i = 0; i < signal_size; i++)
1195 signal_idxs[i] = -1;
1196 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1197 for (i = 0; i < schd_info[idx].stream_size; i++)
1198 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1199 {
1200 const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i];
1201 if (signal_idxs[signal] < 0)
1202 {
1203 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ?
(schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams
)[i])))
;
1204 int type = stream_type;
1205 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1206 for (j = 0; j < graph->signal_size; j++)
1207 if (!(signal_used[j >> 6] & ((uint64_t)1 << (j & 63))))
1208 {
1209 const int signal_type = ccv_nnc_stream_signal_type(graph->signals[j]);
1210 if (signal_type == type)
1211 {
1212 signal_idxs[signal] = j;
1213 signal_used[j >> 6] |= ((uint64_t)1 << (j & 63));
1214 break;
1215 }
1216 }
1217 }
1218 }
1219 } ccv_nnc_graph_visit_endfor} }
1220 // Now rebind streams and signals from the schedule.
1221 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1222 for (i = 0; i < schd_info[idx].stream_size; i++)
1223 {
1224 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = stream_idxs[SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i]];
1225 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1226 SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] = signal_idxs[SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i]];
1227 }
1228 for (i = 0; i < schd_info[idx].wait_size; i++)
1229 schd_info[idx].waits[i] = signal_idxs[schd_info[idx].waits[i]];
1230 } ccv_nnc_graph_visit_endfor} }
1231 for (i = 0; i < schedule->stream_1_size; i++)
1232 schedule->stream_1s[i] = stream_idxs[schedule->stream_1s[i]];
1233 for (i = 0; i < schedule->wait_size; i++)
1234 schedule->waits[i] = signal_idxs[schedule->waits[i]];
1235 // Rebind who is the stream 0 (default stream).
1236 schedule->stream_0 = stream_idxs[0];
1237 ccfreefree(stream_used);
1238 ccfreefree(stream_idxs);
1239 }
1240 assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ (
{ if (graph->streams) ; else __assert_fail ("graph->streams"
, "ccv_nnc_graph.c", 1240, __extension__ __PRETTY_FUNCTION__)
; }))
;
1241 ccv_nnc_graph_visit_free(visit);
1242 for (i = 0; i < signal_size; i++)
1243 { assert(graph->signals[i])((void) sizeof ((graph->signals[i]) ? 1 : 0), __extension__
({ if (graph->signals[i]) ; else __assert_fail ("graph->signals[i]"
, "ccv_nnc_graph.c", 1243, __extension__ __PRETTY_FUNCTION__)
; }))
; }
1244 if (schedule->stream_1_size)
1245 schedule->begin = ccv_nnc_stream_signal_new(default_stream_type);
1246 schedule->end = ccv_nnc_stream_signal_new(default_stream_type);
1247 // Do this recursively for its sub graphs.
1248 if (graph->sub_graphs)
1249 for (i = 0; i < graph->sub_graphs->rnum; i++)
1250 {
1251 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
;
1252 if (sub_graph && !sub_graph->default_schedule)
1253 {
1254 const int exec_idx = sub_graph->exec_idx - 1;
1255 assert(schd_info[exec_idx].stream_size == 1)((void) sizeof ((schd_info[exec_idx].stream_size == 1) ? 1 : 0
), __extension__ ({ if (schd_info[exec_idx].stream_size == 1)
; else __assert_fail ("schd_info[exec_idx].stream_size == 1"
, "ccv_nnc_graph.c", 1255, __extension__ __PRETTY_FUNCTION__)
; }))
;
1256 const int stream_idx = SCHEDULE_STREAMS(schd_info[exec_idx])((schd_info[exec_idx]).stream_size <= 1 ? (schd_info[exec_idx
])._inline_streams : (schd_info[exec_idx])._heap_streams)
[0];
1257 const int device_id = ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
)->device_id;
1258 sub_graph->default_schedule = _ccv_nnc_graph_static_schedule_new(sub_graph, stream_type, device_id, graph->streams[stream_idx], 0, 0, 0, 0);
1259 }
1260 }
1261 ccv_array_free(stream_data);
1262 return schedule;
1263}
1264void ccv_nnc_graph_set_default_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type)
1265{
1266 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1266, __extension__ __PRETTY_FUNCTION__)
; }))
;
1267 if (graph->default_schedule)
1268 ccv_nnc_graph_static_schedule_free(graph->default_schedule);
1269 graph->default_schedule = _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, 0, 0, 0, 0, 0);
1270}
1271
1272ccv_nnc_graph_static_schedule_t* ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1273{
1274 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1274, __extension__ __PRETTY_FUNCTION__)
; }))
;
1275 return _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, 0, sources, source_size, destinations, destination_size);
1276}
1277
1278ccv_nnc_stream_context_t* ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph)
1279{
1280 if (graph->streams && graph->stream_size > 0)
1281 return graph->streams[0];
1282 return 0;
1283}
1284
1285static void _ccv_nnc_graph_dot_exec(const int index, const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, ccv_nnc_stream_context_t** const streams, const int flags, FILE* out)
1286{
1287 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1288 fputc('{', out);
1289 fprintf(out, "node%d", index);
1290 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1291 {
1292 fputs("|Command: ", out);
1293 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1294 if (schd_info)
1295 {
1296 if (schd_info->stream_size > 0)
1297 {
1298 int i, flag = 0;
1299 fputs("|Stream: ", out);
1300 for (i = 0; i < schd_info->stream_size; i++)
1301 {
1302 const int device_id = streams ? CCV_TENSOR_GET_DEVICE_ID(streams[SCHEDULE_STREAMS(*schd_info)[i]]->type)(((streams[((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)[i]]->type) & 0xfff00) >>
8)
: 0;
1303 if (i == 0)
1304 fprintf(out, "%d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)
[i], device_id);
1305 else
1306 fprintf(out, ", %d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)
[i], device_id);
1307 }
1308 for (i = 0; i < schd_info->stream_size; i++)
1309 if (SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i] >= 0)
1310 {
1311 if (!flag)
1312 {
1313 flag = 1;
1314 fprintf(out, "|Signal: %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i]);
1315 } else
1316 fprintf(out, ", %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i]);
1317 }
1318 }
1319 if (schd_info->wait_size > 0)
1320 {
1321 fputs("|Wait: ", out);
1322 int i;
1323 for (i = 0; i < schd_info->wait_size - 1; i++)
1324 fprintf(out, "%d, ", schd_info->waits[i]);
1325 fprintf(out, "%d", schd_info->waits[schd_info->wait_size - 1]);
1326 }
1327 }
1328 fputc('}', out);
1329 }
1330}
1331
1332static void _ccv_nnc_graph_dot_tensor(const int index, const ccv_nnc_tensor_t* const tensor, const int zone, const int flags, const int depth, FILE* out)
1333{
1334 // if it has an alias pointer, or, it is a long form.
1335 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1336 fputc('{', out);
1337 const int is_tensor_view = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW);
1338 if (is_tensor_view)
1339 fprintf(out, "tensorview%d", index);
1340 else
1341 fprintf(out, "tensor%d", index);
1342 int i;
1343 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1344 fputc('\'', out);
1345 if (CCV_GET_TAPE_ALLOC(tensor->type)((tensor->type) & CCV_TAPE_ALLOC))
1346 fputs(" (t)", out);
1347 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1348 {
1349 const int device_id = CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8);
1350 fprintf(out, "|d%d|zone%d", device_id, zone);
1351 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1352 fputc('\'', out);
1353 uintptr_t aptr = (uintptr_t)tensor->data.u8;
1354 const int* ainc = is_tensor_view ? ((ccv_nnc_tensor_view_t*)(tensor))->inc : tensor->info.dim;
1355 // For the last one, we don't extend to full ainc.
1356 size_t ainc_size = (ccv_nnc_dimension_count(ainc) - ainc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1357 // Print out the range as well.
1358 fprintf(out, "|{%#010x|%#010x}|%d", (uint32_t)aptr, (uint32_t)(aptr + ainc_size - 1), tensor->info.dim[0]);
1359 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor->info.dim[i]; i++)
1360 fprintf(out, "x%d", tensor->info.dim[i]);
1361 fputc('}', out);
1362 }
1363}
1364
1365typedef struct {
1366 int index;
1367 int name;
1368 int zone;
1369 uintptr_t tensor_ref;
1370 uintptr_t start_ptr;
1371 uintptr_t end_ptr;
1372} ccv_nnc_tensor_dot_t;
1373
1374typedef struct {
1375 ccv_nnc_tensor_dot_t* dots;
1376 int* remap;
1377 int* rename_zone;
1378 int* rename_index;
1379} ccv_nnc_tensor_dot_recovery_t;
1380
1381// First sort by start_ptr, then sort by tensor ptr (so that we will have the same tensor sorted to one cluster).
1382#define less_than(i1, i2, aux) ((i1).start_ptr < (i2).start_ptr || ((i1).start_ptr == (i2).start_ptr && (i1).tensor_ref < (i2).tensor_ref))
1383static CCV_IMPLEMENT_QSORT(_ccv_nnc_tensor_dot_sort_by_ptr, ccv_nnc_tensor_dot_t, less_than)void _ccv_nnc_tensor_dot_sort_by_ptr(ccv_nnc_tensor_dot_t *array
, size_t total, int aux) { int isort_thresh = 7; ccv_nnc_tensor_dot_t
t; int sp = 0; struct { ccv_nnc_tensor_dot_t *lb; ccv_nnc_tensor_dot_t
*ub; } stack[48]; if( total <= 1 ) return; stack[0].lb = array
; stack[0].ub = array + (total - 1); while( sp >= 0 ) { ccv_nnc_tensor_dot_t
* left = stack[sp].lb; ccv_nnc_tensor_dot_t* right = stack[sp
--].ub; for(;;) { int i, n = (int)(right - left) + 1, m; ccv_nnc_tensor_dot_t
* ptr; ccv_nnc_tensor_dot_t* ptr2; if( n <= isort_thresh )
{ insert_sort: for( ptr = left + 1; ptr <= right; ptr++ )
{ for( ptr2 = ptr; ptr2 > left && less_than(ptr2[
0],ptr2[-1], aux); ptr2--) (((t)) = ((ptr2[0])), ((ptr2[0])) =
((ptr2[-1])), ((ptr2[-1])) = ((t))); } break; } else { ccv_nnc_tensor_dot_t
* left0; ccv_nnc_tensor_dot_t* left1; ccv_nnc_tensor_dot_t* right0
; ccv_nnc_tensor_dot_t* right1; ccv_nnc_tensor_dot_t* pivot; ccv_nnc_tensor_dot_t
* a; ccv_nnc_tensor_dot_t* b; ccv_nnc_tensor_dot_t* c; int swap_cnt
= 0; left0 = left; right0 = right; pivot = left + (n/2); if(
n > 40 ) { int d = n / 8; a = left, b = left + d, c = left
+ 2*d; left = less_than(*a, *b, aux) ? (less_than(*b, *c, aux
) ? b : (less_than(*a, *c, aux) ? c : a)) : (less_than(*c, *b
, aux) ? b : (less_than(*a, *c, aux) ? a : c)); a = pivot - d
, b = pivot, c = pivot + d; pivot = less_than(*a, *b, aux) ? (
less_than(*b, *c, aux) ? b : (less_than(*a, *c, aux) ? c : a)
) : (less_than(*c, *b, aux) ? b : (less_than(*a, *c, aux) ? a
: c)); a = right - 2*d, b = right - d, c = right; right = less_than
(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than(*a, *
c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than(
*a, *c, aux) ? a : c)); } a = left, b = pivot, c = right; pivot
= less_than(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than
(*a, *c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than
(*a, *c, aux) ? a : c)); if( pivot != left0 ) { (((t)) = ((*pivot
)), ((*pivot)) = ((*left0)), ((*left0)) = ((t))); pivot = left0
; } left = left1 = left0 + 1; right = right1 = right0; for(;;
) { while( left <= right && !less_than(*pivot, *left
, aux) ) { if( !less_than(*left, *pivot, aux) ) { if( left >
left1 ) (((t)) = ((*left1)), ((*left1)) = ((*left)), ((*left
)) = ((t))); swap_cnt = 1; left1++; } left++; } while( left <=
right && !less_than(*right, *pivot, aux) ) { if( !less_than
(*pivot, *right, aux) ) { if( right < right1 ) (((t)) = ((
*right1)), ((*right1)) = ((*right)), ((*right)) = ((t))); swap_cnt
= 1; right1--; } right--; } if( left > right ) break; (((
t)) = ((*left)), ((*left)) = ((*right)), ((*right)) = ((t)));
swap_cnt = 1; left++; right--; } if( swap_cnt == 0 ) { left =
left0, right = right0; goto insert_sort; } n = ({ typeof ((int
)(left1 - left0)) _a = ((int)(left1 - left0)); typeof ((int)(
left - left1)) _b = ((int)(left - left1)); (_a < _b) ? _a :
_b; }); for( i = 0; i < n; i++ ) (((t)) = ((left0[i])), (
(left0[i])) = ((left[i-n])), ((left[i-n])) = ((t))); n = ({ typeof
((int)(right0 - right1)) _a = ((int)(right0 - right1)); typeof
((int)(right1 - right)) _b = ((int)(right1 - right)); (_a <
_b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left
[i])), ((left[i])) = ((right0[i-n+1])), ((right0[i-n+1])) = (
(t))); n = (int)(left - left1); m = (int)(right1 - right); if
( n > 1 ) { if( m > 1 ) { if( n > m ) { stack[++sp].
lb = left0; stack[sp].ub = left0 + n - 1; left = right0 - m +
1, right = right0; } else { stack[++sp].lb = right0 - m + 1;
stack[sp].ub = right0; left = left0, right = left0 + n - 1; }
} else left = left0, right = left0 + n - 1; } else if( m >
1 ) left = right0 - m + 1, right = right0; else break; } } }
}
1384#undef less_than
1385
1386static int _ccv_nnc_graph_dot_tensor_multiview_count(const ccv_nnc_tensor_multiview_t* const mv)
1387{
1388 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
1389 return 1;
1390 const int count = mv->kind + mv->repeat;
1391 int i, c = 0;
1392 for (i = 0; i < count; i++)
1393 c += _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
1394 return c;
1395}
1396
1397static void _ccv_nnc_graph_dot_tensor_multiview_tensor_dots(const ccv_nnc_tensor_multiview_t* const mv, ccv_nnc_tensor_dot_t* const tensor_dots, int* tensor_index)
1398{
1399 const int count = mv->kind + mv->repeat;
1400 int i;
1401 for (i = 0; i < count; i++)
1402 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1403 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], tensor_dots, tensor_index);
1404 else {
1405 tensor_dots[*tensor_index].name = *tensor_index;
1406 tensor_dots[*tensor_index].start_ptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1407 // Because tv's pointer will get updated, it is not correct in this case to have one tensor_ref.
1408 tensor_dots[*tensor_index].tensor_ref = tensor_dots[*tensor_index].start_ptr;
1409 const size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1410 tensor_dots[*tensor_index].end_ptr = tensor_dots[*tensor_index].start_ptr + dim_size - 1;
1411 ++(*tensor_index);
1412 }
1413}
1414
1415static ccv_nnc_tensor_dot_recovery_t _ccv_nnc_graph_tensor_dot_recovery(const ccv_nnc_graph_t* const graph)
1416{
1417 int i, j;
1418 // Recover tensor relationships for all tensors referenced in the graph.
1419 // Most notably, we have to give these indexes, and find if they point to
1420 // the same memory region, and whether they overlap. These information
1421 // are lost since we converted from symbolic form to the execution form.
1422 // and here we do our best to recover because that is easier to understand
1423 // if we want to present the graph visually (also, we don't want to put this
1424 // information into the tensor or execution graph to avoid overhead, thus,
1425 // recovering is the best we can do).
1426 int tensor_count = 0;
1427 for (i = 0; i < graph->exec_info->rnum; i++)
1428 {
1429 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1430 for (j = 0; j < exec_info->input_size; j++)
1431 if (exec_info->inputs[j])
1432 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[j])((*(int*)(exec_info->inputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->inputs[j]) : 1;
1433 for (j = 0; j < exec_info->output_size; j++)
1434 if (exec_info->outputs[j])
1435 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[j])((*(int*)(exec_info->outputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->outputs[j]) : 1;
1436 }
1437 ccv_nnc_tensor_dot_t* tensor_dots = tensor_count > 0 ? (ccv_nnc_tensor_dot_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_dot_t) * tensor_count) : 0;
1438 int k = 0;
1439 for (i = 0; i < graph->exec_info->rnum; i++)
1440 {
1441 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1442 for (j = 0; j < exec_info->input_size; j++)
1443 {
1444 ccv_nnc_tensor_t* tensor = exec_info->inputs[j];
1445 if (!tensor)
1446 continue;
1447 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1448 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1449 else {
1450 tensor_dots[k].name = k;
1451 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1452 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1453 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1454 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1455 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1456 ++k;
1457 }
1458 }
1459 for (j = 0; j < exec_info->output_size; j++)
1460 {
1461 ccv_nnc_tensor_t* tensor = exec_info->outputs[j];
1462 if (!tensor)
1463 continue;
1464 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1465 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1466 else {
1467 tensor_dots[k].name = k;
1468 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1469 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1470 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1471 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1472 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1473 ++k;
1474 }
1475 }
1476 }
1477 tensor_count = k; // We may over count, now shrink.
1478 // To group overlap memory into one zone, we sort it by start ptr first (secondary by the tensor pointer).
1479 _ccv_nnc_tensor_dot_sort_by_ptr(tensor_dots, tensor_count, 0);
1480 int index = 0, zone = 0;
1481 uintptr_t tensor_ref = tensor_count > 0 ? tensor_dots[0].tensor_ref : 0;
1482 uintptr_t end_ptr = tensor_count > 0 ? tensor_dots[0].end_ptr : 0;
1483 // Then, it is trivial, we go by end ptr. If the next start ptr is still within the end ptr (start ptr <= end ptr),
1484 // they are the same zone.
1485 for (i = 0; i < tensor_count; i++)
1486 {
1487 if (tensor_dots[i].tensor_ref != tensor_ref)
1488 {
1489 tensor_ref = tensor_dots[i].tensor_ref;
1490 ++index;
1491 }
1492 if (tensor_dots[i].start_ptr > end_ptr)
1493 {
1494 end_ptr = ccv_max(end_ptr, tensor_dots[i].end_ptr)({ typeof (end_ptr) _a = (end_ptr); typeof (tensor_dots[i].end_ptr
) _b = (tensor_dots[i].end_ptr); (_a > _b) ? _a : _b; })
;
1495 ++zone;
1496 }
1497 tensor_dots[i].index = index;
1498 tensor_dots[i].zone = zone;
1499 }
1500 // We already have index and zone assigned, but the problem is that these are not very human interpretable (because
1501 // it follows the pointer from low to high, not the tensor creation order). The following code renamed both the index
1502 // and the zone so that it is much more understandable.
1503 const int index_count = index + 1;
1504 const int zone_count = zone + 1;
1505 int* remap = (int*)ccmallocmalloc(sizeof(int) * (tensor_count + index_count + zone_count));
1506 int* rename_index = remap + tensor_count;
1507 int* rename_zone = rename_index + index_count;
1508 for (i = 0; i < tensor_count; i++)
1509 remap[tensor_dots[i].name] = i;
1510 for (i = 0; i < index_count; i++)
1511 rename_index[i] = -1;
1512 for (i = 0; i < zone_count; i++)
1513 rename_zone[i] = -1;
1514 index = 0;
1515 zone = 0;
1516 for (i = 0; i < tensor_count; i++)
1517 {
1518 ccv_nnc_tensor_dot_t* tensor_dot = tensor_dots + remap[i];
1519 if (rename_index[tensor_dot->index] == -1)
1520 rename_index[tensor_dot->index] = index++;
1521 if (rename_zone[tensor_dot->zone] == -1)
1522 rename_zone[tensor_dot->zone] = zone++;
1523 }
1524 ccv_nnc_tensor_dot_recovery_t recovery = {
1525 .dots = tensor_dots,
1526 .remap = remap,
1527 .rename_index = rename_index,
1528 .rename_zone = rename_zone,
1529 };
1530 return recovery;
1531}
1532
1533static void _ccv_nnc_graph_tensor_dot_recovery_free(const ccv_nnc_tensor_dot_recovery_t recovery)
1534{
1535 ccfreefree(recovery.dots);
1536 ccfreefree(recovery.remap);
1537}
1538
1539static void _ccv_nnc_graph_dot_tensor_multiview_one(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int depth, int* tensor_index, FILE* out)
1540{
1541 const int count = mv->kind + mv->repeat;
1542 int i, j;
1543 fputs("|{", out);
1544 for (i = 0; i < count; i++)
1545 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1546 {
1547 fprintf(out, "{%d", i);
1548 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1549 fputc('*', out); // Denotes that we loop on this.
1550 _ccv_nnc_graph_dot_tensor_multiview_one((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], recovery, depth, tensor_index, out);
1551 if (i == count - 1)
1552 fputc('}', out);
1553 else
1554 fputs("}|", out);
1555 } else {
1556 fprintf(out, "{%d", i);
1557 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1558 fputc('*', out); // Denotes that we loop on this.
1559 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1560 fprintf(out, "|zone%d", recovery.rename_zone[tensor_dot->zone]);
1561 for (j = 0; j < depth; j++)
1562 fputc('\'', out);
1563 uintptr_t aptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1564 // For the last one, we don't extend to full ainc.
1565 size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1566 // Print out the range as well.
1567 fprintf(out, "|{%#010x|%#010x}", (uint32_t)aptr, (uint32_t)(aptr + dim_size - 1));
1568 ++(*tensor_index);
1569 if (i == count - 1)
1570 fputc('}', out);
1571 else
1572 fputs("}|", out);
1573 }
1574 fputc('}', out);
1575}
1576
1577static void _ccv_nnc_graph_dot_tensor_multiview(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, int* tensor_index, FILE* out)
1578{
1579 // if it has an alias pointer, or, it is a long form.
1580 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1581 fputc('{', out);
1582 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1583 fprintf(out, "multiview%d", recovery.rename_index[tensor_dot->index]);
1584 int i;
1585 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1586 fputc('\'', out);
1587 if (CCV_GET_TAPE_ALLOC(mv->type)((mv->type) & CCV_TAPE_ALLOC))
1588 fputs(" (t)", out);
1589 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1590 {
1591 _ccv_nnc_graph_dot_tensor_multiview_one(mv, recovery, depth, tensor_index, out);
1592 const ccv_nnc_tensor_t* root = (ccv_nnc_tensor_t*)mv;
1593 while (CCV_IS_TENSOR_MULTIVIEW(root)((*(int*)(root)) & CCV_TENSOR_MULTIVIEW))
1594 root = CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)root)(((ccv_nnc_tensor_multiview_t*)root)->_heap_data ? ((ccv_nnc_tensor_multiview_t
*)root)->_heap_data : ((ccv_nnc_tensor_multiview_t*)root)->
_inline_data)
[0];
1595 fprintf(out, "|%d", root->info.dim[0]);
1596 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && root->info.dim[i]; i++)
1597 fprintf(out, "x%d", root->info.dim[i]);
1598 fputc('}', out);
1599 } else
1600 *tensor_index += _ccv_nnc_graph_dot_tensor_multiview_count(mv);
1601}
1602
1603static void _ccv_nnc_graph_dot_node(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int exec_index, ccv_nnc_stream_context_t** const streams, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* const tensor_index)
1604{
1605 fprintf(out, "node%d [shape=record,label=\"", exec_index);
1606 _ccv_nnc_graph_dot_exec(exec_index, exec_info, schd_info, streams, flags, out);
1607 int i;
1608 int k = *tensor_index;
1609 if (exec_info->input_size > 0)
1610 {
1611 fputs("|{Input", out);
1612 for (i = 0; i < exec_info->input_size; i++)
1613 if (exec_info->inputs[i])
1614 {
1615 fputc('|', out);
1616 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1617 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1618 else {
1619 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1620 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1621 ++k;
1622 }
1623 } else
1624 fputs("|-", out);
1625 fputc('}', out);
1626 }
1627 if (exec_info->output_size > 0)
1628 {
1629 fputs("|{Output", out);
1630 for (i = 0; i < exec_info->output_size; i++)
1631 if (exec_info->outputs[i])
1632 {
1633 fputc('|', out);
1634 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1635 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1636 else {
1637 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1638 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1639 ++k;
1640 }
1641 } else
1642 fputs("|-", out);
1643 fputc('}', out);
1644 }
1645 fputs("\"];\n", out);
1646 *tensor_index = k;
1647}
1648
1649static void _ccv_nnc_graph_dot_while_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const ccv_nnc_graph_t* const while_graph, const int flags, const int depth, FILE* out, int* tensor_index)
1650{
1651 int i;
1652 fprintf(out, "label=<<b>while%d </b>Command: ", exec_index);
1653 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1654 fputs(">;\n", out);
1655 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1656 int k = *tensor_index;
1657 if (exec_info->input_size > 0)
1658 {
1659 fputs("{Input|{", out);
1660 for (i = 0; i < exec_info->input_size; i++)
1661 {
1662 if (i > 0)
1663 fputc('|', out);
1664 if (exec_info->inputs[i])
1665 {
1666 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1667 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1668 else {
1669 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1670 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1671 ++k;
1672 }
1673 } else
1674 fputc('-', out);
1675 }
1676 fputs("}}", out);
1677 }
1678 if (exec_info->output_size > 0)
1679 {
1680 if (exec_info->input_size > 0)
1681 fputs("|", out);
1682 fputs("{Output|{", out);
1683 for (i = 0; i < exec_info->output_size; i++)
1684 {
1685 if (i > 0)
1686 fputc('|', out);
1687 if (exec_info->outputs[i])
1688 {
1689 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1690 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1691 else {
1692 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1693 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1694 ++k;
1695 }
1696 } else
1697 fputc('-', out);
1698 }
1699 fputs("}}", out);
1700 }
1701 fputs("}\"];\n", out);
1702 *tensor_index = k;
1703}
1704
1705static void _ccv_nnc_graph_dot_case_of_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* tensor_index)
1706{
1707 int i;
1708 fprintf(out, "label=<<b>caseof%d </b>Command: ", exec_index);
1709 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1710 fputs(">;\n", out);
1711 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1712 int k = *tensor_index;
1713 if (exec_info->input_size > 0)
1714 {
1715 fputs("{Input|{", out);
1716 for (i = 0; i < exec_info->input_size; i++)
1717 {
1718 if (i > 0)
1719 fputc('|', out);
1720 if (exec_info->inputs[i])
1721 {
1722 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1723 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1724 else {
1725 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1726 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1727 ++k;
1728 }
1729 } else
1730 fputc('-', out);
1731 }
1732 fputs("}}", out);
1733 }
1734 if (exec_info->output_size > 0)
1735 {
1736 if (exec_info->input_size > 0)
1737 fputs("|", out);
1738 fputs("{Output|{", out);
1739 for (i = 0; i < exec_info->output_size; i++)
1740 {
1741 if (i > 0)
1742 fputc('|', out);
1743 if (exec_info->outputs[i])
1744 {
1745 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1746 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1747 else {
1748 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1749 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1750 ++k;
1751 }
1752 } else
1753 fputc('-', out);
1754 }
1755 fputs("}}", out);
1756 }
1757 fputs("}\"];\n", out);
1758 *tensor_index = k;
1759}
1760
1761static void _ccv_nnc_graph_dot_sub_graphs(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_tensor_dot_recovery_t p_recovery, const ccv_array_t* const sub_graphs, const int flags, const int depth, FILE* out, int* tensor_index, int* exec_index)
1762{
1763 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
1764 {
1765 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1766 const ccv_nnc_graph_t* const while_graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[0] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0]
- 1)))
;
1767 // Output this node info within this subgraph.
1768 _ccv_nnc_graph_dot_while_label(exec_info, *exec_index, p_recovery, while_graph, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1769 } else if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
1770 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1771 _ccv_nnc_graph_dot_case_of_label(exec_info, *exec_index, p_recovery, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1772 }
1773 ++(*exec_index);
1774 int p;
1775 for (p = 0; p < exec_info->graph_ref_size; p++)
1776 {
1777 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1778 {
1779 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=\"\"\n", *exec_index, *exec_index);
1780 ++(*exec_index);
1781 }
1782 const ccv_nnc_graph_t* const graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[p] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[p]
- 1)))
;
1783 const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule;
1784 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1785 int i, j;
1786 int k = 0;
1787 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1788 // Output styles.
1789 for (i = 0; i < graph->exec_info->rnum; i++)
1790 {
1791 node_id[i] = *exec_index;
1792 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1793 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0])
1794 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, depth + 1, out, &k, exec_index);
1795 else {
1796 _ccv_nnc_graph_dot_node(exec_info,
1797 schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0,
1798 *exec_index, graph->streams, recovery, flags, depth, out, &k);
1799 ++(*exec_index);
1800 }
1801 }
1802 // Output connections.
1803 for (i = 0; i < graph->exec_info->rnum; i++)
1804 {
1805 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1806 if (exec_info->outgoings)
1807 for (j = 0; j < exec_info->outgoings->rnum; j++)
1808 {
1809 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1810 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1811 // If both are sub-graphs, have both tail and head specified.
1812 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1813 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1814 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1815 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1816 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1817 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1818 else
1819 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1820 }
1821 }
1822 fputs("}\n", out);
1823 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1824 ccfreefree(node_id);
1825 }
1826 // Extra subgraph cluster.
1827 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1828 fputs("}\n", out);
1829}
1830
1831void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out)
1832{
1833 fputs("digraph G {\ncompound=true;\n", out);
1834 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1835 int i, j;
1836 int k = 0, c = 0;
1837 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1838 const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule;
1839 // Output styles.
1840 for (i = 0; i < graph->exec_info->rnum; i++)
1841 {
1842 node_id[i] = c;
1843 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1844 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0])
1845 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, 1, out, &k, &c);
1846 else {
1847 _ccv_nnc_graph_dot_node(exec_info,
1848 schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0,
1849 c, graph->streams, recovery, flags, 0, out, &k);
1850 ++c;
1851 }
1852 }
1853 // Output connections.
1854 for (i = 0; i < graph->exec_info->rnum; i++)
1855 {
1856 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1857 if (exec_info->outgoings)
1858 for (j = 0; j < exec_info->outgoings->rnum; j++)
1859 {
1860 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1861 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1862 // If both are sub-graphs, have both tail and head specified.
1863 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1864 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1865 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1866 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1867 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1868 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1869 else
1870 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1871 }
1872 }
1873 fputs("}\n", out);
1874 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1875 ccfreefree(node_id);
1876}
1877
1878void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1879{
1880 // exec current node, for synchronous CPU execution, no stream unit.
1881 int i;
1882#define visitor(node, idx, ...) \
1883 do { \
1884 if (node->cmd.cmd == CCV_NNC_NOOP) \
1885 continue; \
1886 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) \
1887 for (i = 0; i < node->graph_ref_size; i++) \
1888 { \
1889 ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[i] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref
? (node)->_heap_graph_ref : (node)->_inline_graph_ref)
[i] - 1)))
; \
1890 ccv_nnc_graph_autotune(sub_graph, max_workspace_size, flags, 0, 0, 0, 0); \
1891 } \
1892 else { \
1893 /* Need to unwrap these tensors */ \
1894 for (i = 0; i < node->input_size + node->output_size; i++) \
1895 if (node->inputs[i] && CCV_IS_TENSOR_MULTIVIEW(node->inputs[i])((*(int*)(node->inputs[i])) & CCV_TENSOR_MULTIVIEW)) \
1896 node->inputs[i] = _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)node->inputs[i]); \
1897 PRINT(CCV_CLI_VERBOSE, "%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node
->cmd.cmd), idx, node->input_size, node->output_size
); fflush(stdout); } } while (0)
; \
1898 for (i = 0; i < node->input_size; i++) \
1899 PRINT(CCV_CLI_VERBOSE, "|-> %d. %p (%p)\n", i + 1, node->inputs[i], (node->inputs[i] ? node->inputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-> %d. %p (%p)\n", i + 1, node->inputs[i], (
node->inputs[i] ? node->inputs[i]->data.u8 : 0)); fflush
(stdout); } } while (0)
; \
1900 for (i = 0; i < node->output_size; i++) \
1901 PRINT(CCV_CLI_VERBOSE, "|<- %d. %p (%p)\n", i + 1, node->outputs[i], (node->outputs[i] ? node->outputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|<- %d. %p (%p)\n", i + 1, node->outputs[i],
(node->outputs[i] ? node->outputs[i]->data.u8 : 0))
; fflush(stdout); } } while (0)
; \
1902 node->cmd = ccv_nnc_cmd_autotune(node->cmd, max_workspace_size, node->hint, flags, node->inputs, node->input_size, node->outputs, node->output_size, 0); \
1903 } \
1904 } while (0)
1905 const ccv_nnc_graph_exec_t* const graph_sources = sources ? sources : (graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0);
1906 const int graph_source_size = source_size ? source_size : (graph->sources ? graph->sources->rnum : 0);
1907 const ccv_nnc_graph_exec_t* const graph_destinations = destinations ? destinations : (graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0);
1908 const int graph_destination_size = destination_size ? destination_size : (graph->destinations ? graph->destinations->rnum : 0);
1909 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++
) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph
) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } int _exist_size_
[2] = { (graph_source_size), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size
); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph ==
graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _exist_size_
[0] = (graph_source_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size
); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_
].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_
[0] = (graph_destination_size); _exist_size_[1] = 0; _p_ = 0,
_q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph.c"
, 1909, __extension__ __PRETTY_FUNCTION__); })); _incomings_[
(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
graph_source_size); _i_++) { ((void) sizeof (((graph_sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources
)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (graph_source_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (
_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++
_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); if (_incomings_[(graph_destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); } else if (_incomings_[(graph_destinations)[_i_].d].c >
0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void*)(
((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0)))) + (graph_destinations)[
_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
1910#undef visitor
1911}
1912
1913void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph)
1914{
1915 int i, j;
1916 for (i = 0; i < graph->exec_info->rnum; i++)
1917 {
1918 ccv_nnc_graph_exec_info_t *info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1919 if (info->_heap_graph_ref)
1920 ccfreefree(info->_heap_graph_ref);
1921 ccv_array_t* outgoings = info->outgoings;
1922 if (outgoings)
1923 ccv_array_free(outgoings);
1924 // We allocate inputs & outputs in continuous fashion, therefore, only need to free the input array.
1925 if (info->inputs)
1926 ccfreefree(info->inputs);
1927 if (info->input_flags)
1928 ccfreefree(info->input_flags);
1929 if (info->updates)
1930 ccfreefree(info->updates);
1931 if ((info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) && info->p_while.inputs)
1932 ccfreefree(info->p_while.inputs);
1933 }
1934 if (graph->tensor_wraps)
1935 {
1936 for (i = 0; i < graph->tensor_wraps->rnum; i++)
1937 {
1938 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, i)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(i)))
;
1939 if (tensor_wrap_array)
1940 {
1941 for (j = 0; j < tensor_wrap_array->size; j++)
1942 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[j]);
1943 ccfreefree(tensor_wrap_array);
1944 }
1945 }
1946 ccv_array_free(graph->tensor_wraps);
1947 }
1948 if (graph->tensor_wraps_refs)
1949 ccv_array_free(graph->tensor_wraps_refs);
1950 if (graph->breakpoints)
1951 ccfreefree(graph->breakpoints);
1952 if (graph->sources)
1953 ccv_array_free(graph->sources);
1954 if (graph->destinations)
1955 ccv_array_free(graph->destinations);
1956 if (graph->default_schedule)
1957 ccv_nnc_graph_static_schedule_free(graph->default_schedule);
1958 if (graph->streams)
1959 {
1960 // If the graph has parent graph, the default stream is allocated by the parent graph, we need to skip.
1961 if (!graph->p)
1962 ccv_nnc_stream_context_free(graph->streams[0]);
1963 for (i = 1; i < graph->stream_size; i++)
1964 ccv_nnc_stream_context_free(graph->streams[i]);
1965 ccfreefree(graph->streams);
1966 }
1967 if (graph->block_stream_tasks)
1968 ccfreefree(graph->block_stream_tasks);
1969 if (graph->signals)
1970 {
1971 for (i = 0; i < graph->signal_size; i++)
1972 ccv_nnc_stream_signal_free(graph->signals[i]);
1973 ccfreefree(graph->signals);
1974 }
1975 if (graph->carry_overs)
1976 {
1977 for (i = 0; i < graph->carry_overs->rnum; i++)
1978 {
1979 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t
)(graph->carry_overs)->rsize * (size_t)(i)))
;
1980 _ccv_nnc_graph_tensor_wrap_free(carry_over->from);
1981 _ccv_nnc_graph_tensor_wrap_free(carry_over->to);
1982 }
1983 ccv_array_free(graph->carry_overs);
1984 }
1985 if (graph->sub_graphs)
1986 {
1987 for (i = 0; i < graph->sub_graphs->rnum; i++)
1988 ccv_nnc_graph_free(*(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
);
1989 ccv_array_free(graph->sub_graphs);
1990 }
1991 ccv_array_free(graph->exec_info);
1992 if (graph->buffer)
1993 ccfreefree(graph->buffer);
1994 ccfreefree(graph);
1995}