Bug Summary

File:nnc/ccv_nnc_graph.c
Warning:line 1523, column 44
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_graph.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/18 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -D HAVE_CUDA_SM80 -I /usr/local/include -internal-isystem /usr/local/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-08-20-112342-102351-1 -x c ccv_nnc_graph.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_graph.h"
6
7// MARK - Level-2 API
8
9ccv_nnc_graph_t* ccv_nnc_graph_new(void)
10{
11 ccv_nnc_graph_t* graph = (ccv_nnc_graph_t*)cccalloccalloc(1, sizeof(ccv_nnc_graph_t));
12 graph->exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), 5, 0);
13 return graph;
14}
15
16void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size)
17{
18 if (!graph->sources)
19 graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), source_size, 0);
20 else
21 ccv_array_clear(graph->sources);
22 int i;
23 for (i = 0; i < source_size; i++)
24 ccv_array_push(graph->sources, sources + i);
25 graph->topsorted = 0;
26}
27
28ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph)
29{
30 return graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0;
31}
32
33int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph)
34{
35 return graph->sources ? graph->sources->rnum : 0;
36}
37
38void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
39{
40 if (!graph->destinations)
41 graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), destination_size, 0);
42 else
43 ccv_array_clear(graph->sources);
44 int i;
45 for (i = 0; i < destination_size; i++)
46 ccv_array_push(graph->destinations, destinations + i);
47 graph->topsorted = 0;
48}
49
50ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph)
51{
52 return graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0;
53}
54
55int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph)
56{
57 return graph->destinations ? graph->destinations->rnum : 0;
58}
59
60void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd)
61{
62 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 62, __extension__ __PRETTY_FUNCTION__); }
))
;
63 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 63, __extension__ __PRETTY_FUNCTION__); }
))
;
64 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
65 exec_info->cmd = cmd;
66}
67
68ccv_nnc_cmd_t ccv_nnc_graph_exec_cmd(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec)
69{
70 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 70, __extension__ __PRETTY_FUNCTION__); }
))
;
71 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 71, __extension__ __PRETTY_FUNCTION__); }
))
;
72 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
73 return exec_info->cmd;
74}
75
76void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint)
77{
78 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 78, __extension__ __PRETTY_FUNCTION__); }
))
;
79 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 79, __extension__ __PRETTY_FUNCTION__); }
))
;
80 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
81 exec_info->hint = hint;
82}
83
84static int _ccv_nnc_tensor_multiview_level_count(const ccv_nnc_tensor_multiview_t* const mv)
85{
86 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
87 return 1;
88 const int count = mv->kind + mv->repeat;
89 int i, c = 0;
90 for (i = 0; i < count; i++)
91 {
92 ccv_nnc_tensor_t* tv = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i];
93 if (tv == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)))
94 c = ccv_max(c, 1)({ typeof (c) _a = (c); typeof (1) _b = (1); (_a > _b) ? _a
: _b; })
;
95 else
96 c = ccv_max(c, _ccv_nnc_tensor_multiview_level_count((ccv_nnc_tensor_multiview_t*)tv))({ typeof (c) _a = (c); typeof (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)) _b = (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)); (_a > _b) ? _a : _b; }
)
;
97 }
98 return c + 1;
99}
100
101static ccv_nnc_graph_tensor_wrap_t* _ccv_nnc_graph_tensor_wrap_new(const ccv_nnc_tensor_multiview_t* const mv)
102{
103 const int level_count = _ccv_nnc_tensor_multiview_level_count(mv);
104 ccv_nnc_graph_tensor_wrap_t* tensor_wrap = (ccv_nnc_graph_tensor_wrap_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_tensor_wrap_t) + sizeof(ccv_nnc_tensor_t*) * (level_count - 1));
105 tensor_wrap->update_required = 0;
106 tensor_wrap->count = level_count;
107 tensor_wrap->index = 0;
108 tensor_wrap->tensors[0] = (ccv_nnc_tensor_t*)mv;
109 return tensor_wrap;
110}
111
112static void _ccv_nnc_graph_exec_rewind(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
113{
114 if (!info->tensor_wraps_ref)
115 return;
116 int i;
117 assert(info->tensor_wraps_ref <= graph->tensor_wraps->rnum)((void) sizeof ((info->tensor_wraps_ref <= graph->tensor_wraps
->rnum) ? 1 : 0), __extension__ ({ if (info->tensor_wraps_ref
<= graph->tensor_wraps->rnum) ; else __assert_fail (
"info->tensor_wraps_ref <= graph->tensor_wraps->rnum"
, "ccv_nnc_graph.c", 117, __extension__ __PRETTY_FUNCTION__);
}))
;
118 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;;
119 // Rewind from tensor wraps.
120 for (i = 0; i < info->input_size; i++)
121 if (tensor_wrap_array->tensor_wraps[i])
122 info->inputs[i] = tensor_wrap_array->tensor_wraps[i]->tensors[0];
123 const int d = info->input_size;
124 for (i = 0; i < info->output_size; i++)
125 if (tensor_wrap_array->tensor_wraps[d + i])
126 info->outputs[i] = tensor_wrap_array->tensor_wraps[d + i]->tensors[0];
127 const int dd = info->input_size + info->output_size;
128 for (i = 0; i < info->update_size; i++)
129 if (tensor_wrap_array->tensor_wraps[dd + i])
130 info->updates[i] = tensor_wrap_array->tensor_wraps[dd + i]->tensors[0];
131}
132
133static void _ccv_nnc_graph_tensor_wrap_free(ccv_nnc_graph_tensor_wrap_t* const tensor_wrap)
134{
135 ccfreefree(tensor_wrap);
136}
137
138ccv_nnc_graph_tensor_wrap_array_t* ccv_nnc_get_tensor_wrap_array(ccv_nnc_graph_t* const graph, const int tensor_wrap_size, int* const tensor_wraps_ref)
139{
140 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = *tensor_wraps_ref ? (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, *tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(*tensor_wraps_ref
- 1)))
: 0;
141 // Otherwise, find an open slot.
142 if (!tensor_wrap_array_ref)
143 {
144 if (!graph->tensor_wraps)
145 graph->tensor_wraps = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wrap_array_t*), 0, 0);
146 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = 0;
147 ccv_array_push(graph->tensor_wraps, &tensor_wrap_array);
148 tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, graph->tensor_wraps->rnum - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(graph->tensor_wraps
->rnum - 1)))
;
149 *tensor_wraps_ref = graph->tensor_wraps->rnum;
150 }
151 int i;
152 if (*tensor_wrap_array_ref)
153 {
154 if ((*tensor_wrap_array_ref)->size != tensor_wrap_size)
155 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)ccreallocrealloc(*tensor_wrap_array_ref, sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1));
156 for (i = (*tensor_wrap_array_ref)->size; i < tensor_wrap_size; i++)
157 (*tensor_wrap_array_ref)->tensor_wraps[i] = 0;
158 } else
159 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)cccalloccalloc(sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1), 1);
160 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
161 tensor_wrap_array->size = tensor_wrap_size;
162 return tensor_wrap_array;
163}
164
165void ccv_nnc_set_tensor_wraps(ccv_nnc_graph_tensor_wrap_t** const tensor_wraps, ccv_nnc_tensor_t* const* const tensors, const int tensor_size)
166{
167 int i;
168 for (i = 0; i < tensor_size; i++)
169 if (tensors[i])
170 {
171 if (CCV_IS_TENSOR_MULTIVIEW(tensors[i])((*(int*)(tensors[i])) & CCV_TENSOR_MULTIVIEW) &&
172 ((ccv_nnc_tensor_multiview_t*)tensors[i])->anchor != CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1)
173 {
174 if (!tensor_wraps[i] || tensors[i] != tensor_wraps[i]->tensors[0])
175 {
176 if (tensor_wraps[i])
177 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
178 tensor_wraps[i] = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)tensors[i]);
179 }
180 } else {
181 if (tensor_wraps[i])
182 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
183 tensor_wraps[i] = 0;
184 }
185 }
186}
187
188void ccv_nnc_graph_register_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
189{
190 ccv_nnc_graph_t* p = graph;
191 const ccv_nnc_graph_tensor_wraps_ref_t tensor_wraps_ref = {
192 .d = tensor_wraps_ref_d,
193 .graph = graph,
194 };
195 do {
196 if (!p->tensor_wraps_refs)
197 {
198 p->tensor_wraps_refs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wraps_ref_t), 0, 0);
199 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
200 } else {
201 int i;
202 int has_tensor_wraps_ref = 0;
203 for (i = 0; !has_tensor_wraps_ref && i < p->tensor_wraps_refs->rnum; i++)
204 {
205 ccv_nnc_graph_tensor_wraps_ref_t* tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
206 has_tensor_wraps_ref = (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph);
207 }
208 if (!has_tensor_wraps_ref)
209 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
210 }
211 p = p->p;
212 } while (p);
213}
214
215static void _ccv_nnc_graph_redo_tensor_wraps(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
216{
217 int i;
218 const int has_wrap = ccv_nnc_tensors_have_wraps(info->inputs, info->input_size) ||
219 ccv_nnc_tensors_have_wraps(info->outputs, info->output_size) ||
220 ccv_nnc_tensors_have_wraps(info->updates, info->update_size);
221 if (has_wrap)
222 {
223 const int tensor_wrap_size = info->input_size + info->output_size + info->update_size;
224 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = ccv_nnc_get_tensor_wrap_array(graph, tensor_wrap_size, &info->tensor_wraps_ref);
225 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps, info->inputs, info->input_size);
226 const int d = info->input_size;
227 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + d, info->outputs, info->output_size);
228 const int dd = info->input_size + info->output_size;
229 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + dd, info->updates, info->update_size);
230 } else if (info->tensor_wraps_ref) {
231 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;
232 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
233 if (tensor_wrap_array)
234 {
235 for (i = 0; i < tensor_wrap_array->size; i++)
236 if (tensor_wrap_array->tensor_wraps[i])
237 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[i]);
238 ccfreefree(tensor_wrap_array);
239 *tensor_wrap_array_ref = 0;
240 info->tensor_wraps_ref = 0;
241 }
242 }
243}
244
245static void _ccv_nnc_graph_deregister_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
246{
247 ccv_nnc_graph_t* p = graph;
248 do {
249 int i;
250 // Remove from the array.
251 if (p->tensor_wraps_refs)
252 for (i = 0; i < p->tensor_wraps_refs->rnum; i++)
253 {
254 ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
255 if (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph)
256 {
257 --p->tensor_wraps_refs->rnum;
258 if (i < p->tensor_wraps_refs->rnum)
259 memcpy(tensor_wraps_ref, tensor_wraps_ref + 1, sizeof(ccv_nnc_graph_exec_t) * (p->tensor_wraps_refs->rnum - i));
260 break;
261 }
262 }
263 p = p->p;
264 } while (p);
265}
266
267void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size)
268{
269 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 269, __extension__ __PRETTY_FUNCTION__);
}))
;
270 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 270, __extension__ __PRETTY_FUNCTION__);
}))
;
271 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
272 assert(input_flag_size <= info->input_size)((void) sizeof ((input_flag_size <= info->input_size) ?
1 : 0), __extension__ ({ if (input_flag_size <= info->
input_size) ; else __assert_fail ("input_flag_size <= info->input_size"
, "ccv_nnc_graph.c", 272, __extension__ __PRETTY_FUNCTION__);
}))
;
273 assert(output_flag_size <= info->output_size)((void) sizeof ((output_flag_size <= info->output_size)
? 1 : 0), __extension__ ({ if (output_flag_size <= info->
output_size) ; else __assert_fail ("output_flag_size <= info->output_size"
, "ccv_nnc_graph.c", 273, __extension__ __PRETTY_FUNCTION__);
}))
;
274 if (info->input_size + info->output_size == 0)
275 return;
276 if (!info->input_flags)
277 {
278 info->input_flags = (int*)cccalloccalloc(info->input_size + info->output_size, sizeof(int));
279 info->output_flags = info->input_flags + info->input_size;
280 }
281 if (input_flag_size > 0)
282 memcpy(info->input_flags, input_flags, sizeof(int) * input_flag_size);
283 if (output_flag_size > 0)
284 memcpy(info->output_flags, output_flags, sizeof(int) * output_flag_size);
285}
286
287void ccv_nnc_graph_exec_pair_with(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t pair_exec)
288{
289 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 289, __extension__ __PRETTY_FUNCTION__);
}))
;
290 assert(exec.d >= 0)((void) sizeof ((exec.d >= 0) ? 1 : 0), __extension__ ({ if
(exec.d >= 0) ; else __assert_fail ("exec.d >= 0", "ccv_nnc_graph.c"
, 290, __extension__ __PRETTY_FUNCTION__); }))
;
291 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 291, __extension__ __PRETTY_FUNCTION__);
}))
;
292 assert(pair_exec.graph == graph || pair_exec.graph == graph->pair)((void) sizeof ((pair_exec.graph == graph || pair_exec.graph ==
graph->pair) ? 1 : 0), __extension__ ({ if (pair_exec.graph
== graph || pair_exec.graph == graph->pair) ; else __assert_fail
("pair_exec.graph == graph || pair_exec.graph == graph->pair"
, "ccv_nnc_graph.c", 292, __extension__ __PRETTY_FUNCTION__);
}))
;
293 assert(pair_exec.d >= 0)((void) sizeof ((pair_exec.d >= 0) ? 1 : 0), __extension__
({ if (pair_exec.d >= 0) ; else __assert_fail ("pair_exec.d >= 0"
, "ccv_nnc_graph.c", 293, __extension__ __PRETTY_FUNCTION__);
}))
;
294 if (pair_exec.graph == graph)
295 { assert(pair_exec.d < graph->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph->exec_info
->rnum) ; else __assert_fail ("pair_exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 295, __extension__ __PRETTY_FUNCTION__);
}))
; }
296 else
297 { assert(pair_exec.d < graph->pair->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->pair->exec_info
->rnum) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph
->pair->exec_info->rnum) ; else __assert_fail ("pair_exec.d < graph->pair->exec_info->rnum"
, "ccv_nnc_graph.c", 297, __extension__ __PRETTY_FUNCTION__);
}))
; }
298 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
299 exec_info->pair_ref = pair_exec.d + 1;
300}
301
302static ccv_nnc_tensor_t* _ccv_nnc_any_tensor_from_tensor_multiview(ccv_nnc_tensor_multiview_t* const mv)
303{
304 ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv;
305 while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
306 {
307 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
308 const int count = 0;
309 const int off = mv->kind;
310 const int mod = mv->repeat;
311 // If reached the root.
312 tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[count >= off ? ((count - off) % mod) + off : count]; // Unwrap.
313 }
314 return tensor;
315}
316
317void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
318{
319 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 319, __extension__ __PRETTY_FUNCTION__);
}))
;
320 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 320, __extension__ __PRETTY_FUNCTION__);
}))
;
321 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
322 // De-register from the graph if it contains multiview tensors.
323 if (info->tensor_wraps_ref)
324 _ccv_nnc_graph_deregister_tensor_wraps(graph, info->tensor_wraps_ref - 1);
325 // In case it is already executed, rewind.
326 _ccv_nnc_graph_exec_rewind(info, graph);
327 if (input_size == 0 && output_size == 0)
328 {
329 if (info->input_size > 0 || info->output_size > 0)
330 ccfreefree(info->inputs);
331 info->inputs = 0;
332 info->outputs = 0;
333 info->input_size = 0;
334 info->output_size = 0;
335 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
336 if (info->tensor_wraps_ref)
337 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
338 return;
339 }
340 if (info->inputs)
341 info->inputs = (ccv_nnc_tensor_t**)ccreallocrealloc(info->inputs, sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
342 else
343 info->inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
344 info->outputs = info->inputs + input_size;
345 if (inputs)
346 memcpy(info->inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
347 if (outputs)
348 memcpy(info->outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
349 int i;
350 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
351 for (i = 0; i < input_size + output_size; i++)
352 if (info->inputs[i])
353 {
354 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i])((*(int*)(info->inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i];
355 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype)((tensor->info.datatype) & 0xFF000);
356 }
357 info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes);
358 info->input_size = input_size;
359 info->output_size = output_size;
360 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
361 // Register again if the tensor wraps exist.
362 if (info->tensor_wraps_ref)
363 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
364 // Free flags.
365 if (info->input_flags)
366 {
367 ccfreefree(info->input_flags);
368 info->input_flags = info->output_flags = 0;
369 }
370}
371
372void ccv_nnc_graph_exec_add_as_affected(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update)
373{
374 assert(CCV_IS_TENSOR_MULTIVIEW(update))((void) sizeof ((((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(update)", "ccv_nnc_graph.c"
, 374, __extension__ __PRETTY_FUNCTION__); }))
;
375 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 375, __extension__ __PRETTY_FUNCTION__);
}))
;
376 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 376, __extension__ __PRETTY_FUNCTION__);
}))
;
377 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
378 const int register_tensor_wraps = !info->tensor_wraps_ref;
379 const int update_index = info->update_size;
380 ++info->update_size;
381 if (info->updates)
382 info->updates = (ccv_nnc_tensor_t**)ccreallocrealloc(info->updates, sizeof(ccv_nnc_tensor_t*) * info->update_size);
383 else
384 info->updates = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * info->update_size);
385 info->updates[update_index] = update;
386 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
387 if (register_tensor_wraps)
388 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
389}
390
391ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
392{
393 int d = graph->exec_info->rnum;
394 ccv_nnc_graph_exec_info_t info = {
395 .cmd = cmd,
396 .hint = hint,
397 .input_size = input_size,
398 .output_size = output_size,
399 };
400 assert(inputs || input_size == 0)((void) sizeof ((inputs || input_size == 0) ? 1 : 0), __extension__
({ if (inputs || input_size == 0) ; else __assert_fail ("inputs || input_size == 0"
, "ccv_nnc_graph.c", 400, __extension__ __PRETTY_FUNCTION__);
}))
;
401 assert(outputs || output_size == 0)((void) sizeof ((outputs || output_size == 0) ? 1 : 0), __extension__
({ if (outputs || output_size == 0) ; else __assert_fail ("outputs || output_size == 0"
, "ccv_nnc_graph.c", 401, __extension__ __PRETTY_FUNCTION__);
}))
;
402 if (input_size > 0 || output_size > 0)
403 {
404 info.inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
405 info.outputs = info.inputs + input_size;
406 if (inputs)
407 memcpy(info.inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
408 if (outputs)
409 memcpy(info.outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
410 info.input_size = input_size;
411 info.output_size = output_size;
412 int i;
413 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
414 for (i = 0; i < input_size + output_size; i++)
415 if (info.inputs[i])
416 {
417 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i])((*(int*)(info.inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i];
418 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype)((tensor->info.datatype) & 0xFF000);
419 }
420 info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes);
421 }
422 _ccv_nnc_graph_redo_tensor_wraps(&info, graph);
423 // Add itself to the graph's wraps array, this will help the run time when we run the graph and do unwrapping.
424 if (info.tensor_wraps_ref)
425 ccv_nnc_graph_register_tensor_wraps(graph, info.tensor_wraps_ref - 1);
426 ccv_array_push(graph->exec_info, &info);
427 return (ccv_nnc_graph_exec_t){
428 .d = d,
429 .graph = graph,
430 };
431}
432
433void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to)
434{
435 ccv_nnc_graph_tensor_carry_over_t carry_over = {
436 .from = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)from),
437 .to = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)to)
438 };
439 if (!graph->carry_overs)
440 graph->carry_overs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_carry_over_t), 0, 0);
441 ccv_array_push(graph->carry_overs, &carry_over);
442}
443
444int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
445{
446 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 446, __extension__ __PRETTY_FUNCTION__);
}))
;
447 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 447, __extension__ __PRETTY_FUNCTION__);
}))
;
448 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 448, __extension__ __PRETTY_FUNCTION__);
}))
;
449 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 449, __extension__ __PRETTY_FUNCTION__);
}))
;
450 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
451 if (src_info->outgoings == 0)
452 src_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0);
453 else {
454 int i;
455 // Check if this is already connected, if so, skip.
456 for (i = 0; i < src_info->outgoings->rnum; i++)
457 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
458 return -1;
459 }
460 ccv_array_push(src_info->outgoings, &destination.d);
461 graph->topsorted = 0;
462 return 0;
463}
464
465int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
466{
467 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 467, __extension__ __PRETTY_FUNCTION__);
}))
;
468 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 468, __extension__ __PRETTY_FUNCTION__);
}))
;
469 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 469, __extension__ __PRETTY_FUNCTION__);
}))
;
470 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 470, __extension__ __PRETTY_FUNCTION__);
}))
;
471 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
472 if (!src_info->outgoings)
473 return -1;
474 int i;
475 // Check if this is already connected, if so, skip.
476 for (i = 0; i < src_info->outgoings->rnum; i++)
477 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
478 {
479 if (i < src_info->outgoings->rnum - 1)
480 *(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
= *(int*)ccv_array_get(src_info->outgoings, src_info->outgoings->rnum - 1)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(src_info->outgoings
->rnum - 1)))
;
481 --src_info->outgoings->rnum;
482 graph->topsorted = 0;
483 return 0;
484 }
485 return -1;
486}
487
488int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph)
489{
490 return graph->exec_info ? graph->exec_info->rnum : 0;
491}
492
493void* ccv_nnc_graph_buffer(ccv_nnc_graph_t* const graph, int size)
494{
495 if (graph->buffer_size >= size)
496 return graph->buffer;
497 graph->buffer_size = size;
498 graph->buffer = (graph->buffer) ? ccreallocrealloc(graph->buffer, size) : ccmallocmalloc(size);
499 return graph->buffer;
500}
501
502void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size)
503{
504 if (exec_cvt_size == 0 && graph->exec_info->rnum == 0)
505 {
506 graph->topsorted = 1;
507 return;
508 }
509 assert(exec_cvt_size == graph->exec_info->rnum)((void) sizeof ((exec_cvt_size == graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (exec_cvt_size == graph->exec_info
->rnum) ; else __assert_fail ("exec_cvt_size == graph->exec_info->rnum"
, "ccv_nnc_graph.c", 509, __extension__ __PRETTY_FUNCTION__);
}))
;
510 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 510, __extension__ __PRETTY_FUNCTION__);
}))
;
511 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 511, __extension__ __PRETTY_FUNCTION__);
}))
;
512 int i, j;
513 for (i = 0; i < exec_cvt_size; i++)
514 exec_cvt[i] = -1;
515 ccv_array_t* exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), graph->exec_info->rnum, 0);
516 // If there are breakpoints, it is more complicated, we first start to the breakpoints, and then continue from the breakpoints to the destinations.
517 if (graph->breakpoint_size)
518 {
519 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, graph->breakpoints, graph->breakpoint_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = ((graph->exec_info->
rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->sources)->data)) + (size_t)(graph->sources)->
rsize * (size_t)(0))))[_i_].d].r = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_
[2] = { (graph->sources->rnum), 0, }; int _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_info->rnum)
) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void
) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ (
{ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->sources)->data)) + (size_t)(graph->sources)->
rsize * (size_t)(0))))[_i_].d].r = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].d; } _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges
= _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; }
_edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_
; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_
[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->
exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (graph->exec_info->rnum)) ; else __assert_fail
("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->breakpoint_size); _i_++) { ((void) sizeof (((graph->breakpoints
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->
breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(graph->breakpoints)[_i_].d].r = 5; _exists_
[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] =
(graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue
; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_
[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0
), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info
->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void
) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 :
0), __extension__ ({ if ((graph->breakpoints)[_i_].graph ==
graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(graph->breakpoints)[_i_].d].d = 1; } for
(_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((
void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph
->sources)->data)) + (size_t)(graph->sources)->rsize
* (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 6 && _d_ < (graph
->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } }
else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 6 && _d_ < (graph
->breakpoint_size)) { ((void) sizeof ((_exist_size_[_q_] <
(graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if
(_exist_size_[_q_] < (graph->exec_info->rnum)) ; else
__assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_
++) { ((void) sizeof (((graph->breakpoints)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_]
.graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[(graph->breakpoints)[_i_].d].r == 7)
continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph->
breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_
[(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 519, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(graph->breakpoints)[_i_].d].c > 0) continue; _visit_->
node[_visit_->size].index = (((graph->breakpoints)[_i_]
.d)); _visit_->node[_visit_->size].term = ((_incomings_
[(graph->breakpoints)[_i_].d].d)); ++_visit_->size;; } if
(_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
((_visit_->size <= (graph->exec_info->rnum)) ? 1
: 0), __extension__ ({ if (_visit_->size <= (graph->
exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
520 for (i = 0; i < graph->breakpoint_size; i++)
521 exec_cvt[graph->breakpoints[i].d] = -2; // Mark this as breakpoints, so we will skip the first round.
522 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
523 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 523, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
524 if (exec_cvt[idx] == -2) // Skip breakpoint.
525 continue;
526 // Loop over node and push to the array.
527 ccv_array_push(exec_info, node);
528 // Go to its sub-graph to fix exec_idx
529 for (i = 0; i < node->graph_ref_size; i++)
530 {
531 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
532 if (graph_ref >= 0)
533 {
534 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
535 sub_graph->exec_idx = exec_info->rnum;
536 }
537 }
538 exec_cvt[idx] = exec_info->rnum - 1;
539 } ccv_nnc_graph_visit_endfor} }
540 ccv_nnc_graph_visit_free(visit);
541 graph->breakpoint_offset = exec_info->rnum;
542 visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = ((graph->exec_info->
rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(graph->breakpoints)[_i_].d].r = 1; _exists_
[0][_i_] = (graph->breakpoints)[_i_].d; } int _exist_size_
[2] = { (graph->breakpoint_size), 0, }; int _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_info->rnum)
) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void
) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 :
0), __extension__ ({ if ((graph->breakpoints)[_i_].graph ==
graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(graph->breakpoints)[_i_].d].r = 3; _exists_
[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] =
(graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r != 3) continue; _incomings_[_idx_].r = 4; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if
(_exist_size_[_q_] < (graph->exec_info->rnum)) ; else
__assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph->destinations->rnum); _i_++) {
((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r = 5; _exists_[0][_i_] =
((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].d; } _exist_size_[0] = (graph->destinations
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_info->rnum)
) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph->destinations->rnum); _i_++) {
((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->breakpoint_size); _i_++) { ((void) sizeof ((
(graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 6 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(graph->destinations->rnum)) { ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_info->rnum)
) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph->destinations->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 7) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 542, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
543 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
544 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
545 // Loop over node and push to the array.
546 ccv_array_push(exec_info, node);
547 // Go to its sub-graph to fix exec_idx
548 for (i = 0; i < node->graph_ref_size; i++)
549 {
550 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
551 if (graph_ref >= 0)
552 {
553 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
554 sub_graph->exec_idx = exec_info->rnum;
555 }
556 }
557 exec_cvt[idx] = exec_info->rnum - 1;
558 } ccv_nnc_graph_visit_endfor} }
559 ccv_nnc_graph_visit_free(visit);
560 for (i = 0; i < graph->breakpoint_size; i++)
561 { assert(exec_cvt[graph->breakpoints[i].d] >= 0)((void) sizeof ((exec_cvt[graph->breakpoints[i].d] >= 0
) ? 1 : 0), __extension__ ({ if (exec_cvt[graph->breakpoints
[i].d] >= 0) ; else __assert_fail ("exec_cvt[graph->breakpoints[i].d] >= 0"
, "ccv_nnc_graph.c", 561, __extension__ __PRETTY_FUNCTION__);
}))
; } // All breakpoints should be assigned.
562 } else {
563 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = ((graph->exec_info->
rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->sources)->data)) + (size_t)(graph->sources)->
rsize * (size_t)(0))))[_i_].d].r = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_
[2] = { (graph->sources->rnum), 0, }; int _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r !=
0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_info->rnum)
) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void
) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ (
{ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->sources)->data)) + (size_t)(graph->sources)->
rsize * (size_t)(0))))[_i_].d].r = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].d; } _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges
= _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; }
_edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_
; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_
[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->
exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (graph->exec_info->rnum)) ; else __assert_fail
("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r = 5; _exists_[0][_i_] =
((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].d; } _exist_size_[0] = (graph->destinations
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_info->rnum)
) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph->destinations->rnum); _i_++) {
((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->sources->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 6 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(graph->destinations->rnum)) { ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_info->rnum)
) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph->destinations->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 7) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 563, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
564 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
565 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
566 // Loop over node and push to the array.
567 ccv_array_push(exec_info, node);
568 // Go to its sub-graph to fix exec_idx
569 for (i = 0; i < node->graph_ref_size; i++)
570 {
571 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
572 if (graph_ref >= 0)
573 {
574 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
575 sub_graph->exec_idx = exec_info->rnum;
576 }
577 }
578 exec_cvt[idx] = exec_info->rnum - 1;
579 } ccv_nnc_graph_visit_endfor} }
580 ccv_nnc_graph_visit_free(visit);
581 }
582 assert(graph->exec_info->rnum == exec_info->rnum)((void) sizeof ((graph->exec_info->rnum == exec_info->
rnum) ? 1 : 0), __extension__ ({ if (graph->exec_info->
rnum == exec_info->rnum) ; else __assert_fail ("graph->exec_info->rnum == exec_info->rnum"
, "ccv_nnc_graph.c", 582, __extension__ __PRETTY_FUNCTION__);
}))
;
583 ccv_array_free(graph->exec_info);
584 graph->exec_info = exec_info;
585 for (i = 0; i < graph->sources->rnum; i++)
586 {
587 ccv_nnc_graph_exec_t* const source = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, i)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(i)))
;
588 source->d = exec_cvt[source->d];
589 }
590 for (i = 0; i < graph->destinations->rnum; i++)
591 {
592 ccv_nnc_graph_exec_t* const destination = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
593 destination->d = exec_cvt[destination->d];
594 }
595 // Update all outgoings to reflect the latest.
596 for (i = 0; i < exec_info->rnum; i++)
597 {
598 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(exec_info, i)((void*)(((char*)((exec_info)->data)) + (size_t)(exec_info
)->rsize * (size_t)(i)))
;
599 if (info->outgoings)
600 for (j = 0; j < info->outgoings->rnum; j++)
601 *(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
= exec_cvt[*(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
];
602 }
603 graph->topsorted = 1;
604}
605
606typedef struct {
607 int device_id;
608 int exec_idx;
609 ccv_array_t* signal_set;
610 ccv_array_t* command_set; // The set of command executed in this stream. In case there is a tie (on rank). We will check this.
611} ccv_nnc_stream_data_t;
612
613static void _ccv_nnc_graph_schedule_assign_signals(ccv_array_t* const incoming, ccv_nnc_graph_exec_schedule_t* const node, ccv_array_t* const stream_data, int* const signal_size, ccv_nnc_graph_exec_schedule_t* const exec_info, const int exec_info_size)
614{
615 assert(incoming->rnum > 0)((void) sizeof ((incoming->rnum > 0) ? 1 : 0), __extension__
({ if (incoming->rnum > 0) ; else __assert_fail ("incoming->rnum > 0"
, "ccv_nnc_graph.c", 615, __extension__ __PRETTY_FUNCTION__);
}))
;
616 int i, j, k;
617 int wait_size = 0, max_wait_size = 0;
618 for (i = 0; i < incoming->rnum; i++)
619 {
620 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
621 ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx;
622 assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ?
1 : 0), __extension__ ({ if (incoming_exec_info->stream_size
> 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0"
, "ccv_nnc_graph.c", 622, __extension__ __PRETTY_FUNCTION__);
}))
;
623 max_wait_size += incoming_exec_info->stream_size;
624 }
625 int waits[ccv_max(1, max_wait_size)({ typeof (1) _a = (1); typeof (max_wait_size) _b = (max_wait_size
); (_a > _b) ? _a : _b; })
];
626 assert(node->stream_size > 0)((void) sizeof ((node->stream_size > 0) ? 1 : 0), __extension__
({ if (node->stream_size > 0) ; else __assert_fail ("node->stream_size > 0"
, "ccv_nnc_graph.c", 626, __extension__ __PRETTY_FUNCTION__);
}))
;
627 for (i = 0; i < incoming->rnum; i++)
628 {
629 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
630 assert(incoming_idx < exec_info_size)((void) sizeof ((incoming_idx < exec_info_size) ? 1 : 0), __extension__
({ if (incoming_idx < exec_info_size) ; else __assert_fail
("incoming_idx < exec_info_size", "ccv_nnc_graph.c", 630,
__extension__ __PRETTY_FUNCTION__); }))
;
631 assert(incoming_idx >= 0)((void) sizeof ((incoming_idx >= 0) ? 1 : 0), __extension__
({ if (incoming_idx >= 0) ; else __assert_fail ("incoming_idx >= 0"
, "ccv_nnc_graph.c", 631, __extension__ __PRETTY_FUNCTION__);
}))
;
632 ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx;
633 assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ?
1 : 0), __extension__ ({ if (incoming_exec_info->stream_size
> 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0"
, "ccv_nnc_graph.c", 633, __extension__ __PRETTY_FUNCTION__);
}))
;
634 int stream_synced = 1;
635 // If the current node's stream is a subset of the incoming node's stream, there
636 // is no need to sync with signal, because we are already synced with the incoming.
637 for (j = 0; stream_synced && j < node->stream_size; j++)
638 {
639 const int s = SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[j];
640 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 640
, __extension__ __PRETTY_FUNCTION__); }))
;
641 int flag = 0;
642 for (k = 0; !flag && k < incoming_exec_info->stream_size; k++)
643 flag = (SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_streams : (*incoming_exec_info)._heap_streams)
[k] == s);
644 stream_synced = flag;
645 }
646 if (stream_synced)
647 continue;
648 // Otherwise, find the streams we need to sync with, and create signals for these.
649 for (j = 0; j < incoming_exec_info->stream_size; j++)
650 {
651 const int s = SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_streams : (*incoming_exec_info)._heap_streams)
[j];
652 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 652
, __extension__ __PRETTY_FUNCTION__); }))
;
653 int flag = 0;
654 for (k = 0; !flag && k < node->stream_size; k++)
655 flag = (SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[k] == s);
656 if (!flag) // Need to have a signal.
657 {
658 if (SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j] < 0)
659 SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j] = (*signal_size)++;
660 else {
661 int flag = 0;
662 // If any of the stream the current node has already seen this signal, we are good already.
663 for (k = 0; !flag && k < node->stream_size; k++)
664 {
665 assert(SCHEDULE_STREAMS(*node)[k] >= 0)((void) sizeof ((((*node).stream_size <= 1 ? (*node)._inline_streams
: (*node)._heap_streams)[k] >= 0) ? 1 : 0), __extension__
({ if (((*node).stream_size <= 1 ? (*node)._inline_streams
: (*node)._heap_streams)[k] >= 0) ; else __assert_fail ("SCHEDULE_STREAMS(*node)[k] >= 0"
, "ccv_nnc_graph.c", 665, __extension__ __PRETTY_FUNCTION__);
}))
;
666 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((*node).stream_size <= 1 ? (*node)
._inline_streams : (*node)._heap_streams)[k])))
;
667 flag = (data->signal_set && ccv_array_find_int(data->signal_set, SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j]));
668 }
669 if (flag)
670 continue;
671 }
672 // Otherwise, we need to wait for this. Currently, our granularity is about wait on all streams.
673 waits[wait_size++] = SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j];
674 // All streams on this node have seen this signal.
675 for (k = 0; k < node->stream_size; k++)
676 {
677 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((*node).stream_size <= 1 ? (*node)
._inline_streams : (*node)._heap_streams)[k])))
;
678 if (!data->signal_set)
679 data->signal_set = ccv_array_new(sizeof(int), 0, 0);
680 ccv_array_push(data->signal_set, &SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j]);
681 }
682 }
683 }
684 }
685 node->wait_size = wait_size;
686 if (wait_size > 0)
687 {
688 node->waits = node->waits ? ccreallocrealloc(node->waits, sizeof(int) * wait_size) : ccmallocmalloc(sizeof(int) * wait_size);
689 memcpy(node->waits, waits, sizeof(int) * wait_size);
690 }
691}
692
693typedef struct {
694 int rank;
695 ccv_array_t* outgoings;
696} ccv_nnc_incoming_t;
697
698static int _ccv_nnc_device_ids_for_stream_data(ccv_nnc_graph_exec_info_t* const node, const int device_id, ccv_array_t* const stream_data, int* const device_ids, const int max_device_id_size)
699{
700 // TODO: I need to re-think whether this is GPU only or not.
701 int device_id_size = ccv_nnc_device_ids_for_io(node->inputs, node->input_size, node->outputs, node->output_size, CCV_TENSOR_GPU_MEMORY, device_ids, max_device_id_size);
702 if (device_id_size == 0)
703 {
704 // If there is a default data, use that device id. Otherwise, use the device id passed in (this will be the default data device id).
705 if (stream_data->rnum > 0)
706 {
707 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
708 device_ids[0] = default_data->device_id;
709 } else
710 device_ids[0] = device_id >= 0 ? device_id : 0;
711 device_id_size = 1;
712 }
713 return device_id_size;
714}
715
716void ccv_nnc_graph_static_schedule_free(ccv_nnc_graph_static_schedule_t* const schedule)
717{
718 int i;
719 ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info;
720 for (i = 0; i < schedule->exec_info_size; i++)
721 {
722 if (schd_info[i].stream_size > 1)
723 ccfreefree(schd_info[i]._heap_streams);
724 if (schd_info[i].waits)
725 ccfreefree(schd_info[i].waits);
726 }
727 if (schedule->stream_1s)
728 ccfreefree(schedule->stream_1s);
729 if (schedule->waits)
730 ccfreefree(schedule->waits);
731 if (schedule->psort)
732 ccfreefree(schedule->psort);
733 if (schedule->begin)
734 ccv_nnc_stream_signal_free(schedule->begin);
735 if (schedule->end)
736 ccv_nnc_stream_signal_free(schedule->end);
737 ccfreefree(schedule);
738}
739
740static ccv_nnc_graph_static_schedule_t* _ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int device_id, const int max_stream_count, ccv_nnc_stream_context_t* const stream_context, const ccv_nnc_graph_exec_t* const _sources, const int _source_size, const ccv_nnc_graph_exec_t* const _destinations, const int _destination_size)
741{
742 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 742, __extension__ __PRETTY_FUNCTION__);
}))
;
743 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 743, __extension__ __PRETTY_FUNCTION__);
}))
;
744 assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__
({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted"
, "ccv_nnc_graph.c", 744, __extension__ __PRETTY_FUNCTION__);
}))
; // Only support this on a topsorted graph.
745 const int exec_info_size = graph->exec_info->rnum;
746 assert(exec_info_size > 0)((void) sizeof ((exec_info_size > 0) ? 1 : 0), __extension__
({ if (exec_info_size > 0) ; else __assert_fail ("exec_info_size > 0"
, "ccv_nnc_graph.c", 746, __extension__ __PRETTY_FUNCTION__);
}))
;
747 const ccv_nnc_graph_exec_t* const sources = _sources == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: _sources;
748 const int source_size = _sources == 0 ? graph->sources->rnum : _source_size;
749 if (!_sources)
750 { assert(_source_size == 0)((void) sizeof ((_source_size == 0) ? 1 : 0), __extension__ (
{ if (_source_size == 0) ; else __assert_fail ("_source_size == 0"
, "ccv_nnc_graph.c", 750, __extension__ __PRETTY_FUNCTION__);
}))
; }
751 const ccv_nnc_graph_exec_t* const destinations = _destinations == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: _destinations;
752 const int destination_size = _destinations == 0 ? graph->destinations->rnum : _destination_size;
753 if (!_destinations)
754 { assert(_destination_size == 0)((void) sizeof ((_destination_size == 0) ? 1 : 0), __extension__
({ if (_destination_size == 0) ; else __assert_fail ("_destination_size == 0"
, "ccv_nnc_graph.c", 754, __extension__ __PRETTY_FUNCTION__);
}))
; }
755 const int root_schedule = (_sources == 0 && _destinations == 0);
756 ccv_nnc_graph_static_schedule_t* const schedule = cccalloccalloc(1, sizeof(ccv_nnc_graph_static_schedule_t) + sizeof(ccv_nnc_graph_exec_schedule_t) * (exec_info_size - 1));
757 schedule->exec_info_size = exec_info_size;
758 ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info;
759 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0)))
;
760 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (
(sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] =
(sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0
, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) {
_exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_
]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_
[_idx_].r != 1) continue; _incomings_[_idx_].r = 2; if ((exec_info
)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_]
.outgoings->rnum; _j_++) { const int d = *(int*)((void*)((
(char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t)(
(exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); ++
_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_
[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] =
(sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue
; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] <
(exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 760
, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations
)[_i_].d].r = 5; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue
; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_
[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((
_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail
("_exist_size_[_q_] < (exec_info_size)", "ccv_nnc_graph.c"
, 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0
; _i_ < (source_size); _i_++) { ((void) sizeof (((sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1
; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 6 && _d_ < (destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void
) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 760
, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(destinations
)[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(destinations)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if
(_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(destinations)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 760, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(destinations)[_i_].d].c > 0) continue; _visit_->node[
_visit_->size].index = (((destinations)[_i_].d)); _visit_->
node[_visit_->size].term = ((_incomings_[(destinations)[_i_
].d].d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_
); } while (0);; ((void) sizeof ((_visit_->size <= (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size
)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
761 if (!root_schedule)
762 {
763 // If this is not a root schedule, we need to do partial topsort.
764 int psort_size = 0;
765 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
766 ++psort_size;
767 } ccv_nnc_graph_visit_endfor} }
768 schedule->psort = (int*)ccmallocmalloc(sizeof(int) * psort_size);
769 schedule->psort_size = psort_size;
770 psort_size = 0;
771 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
772 schedule->psort[psort_size++] = idx;
773 } ccv_nnc_graph_visit_endfor} }
774 }
775 int i, j, k;
776 // Generate exec dependencies (or, in other words, partial ordering of executions).
777 ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(exec_info_size, exec_info_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
778 int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_info_size * 2);
779 int buf_size;
780#define for_block(x, val) \
781 do { \
782 if (((int32_t*)val)[0] > 0) \
783 { \
784 buf[buf_size * 2] = x; \
785 buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \
786 ++buf_size; \
787 } \
788 } while (0)
789 for (i = 0; i < exec_info_size; i++)
790 schd_info[i].stream_size = -1;
791 ccv_nnc_graph_visit_for(visit, exec_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_info)) const node __attribute__((unused
)) = (exec_info) + idx;
{
792 buf_size = 0; /* save all its parent deps to this buffer */
793 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
794 schd_info[idx].stream_size = 0;
795 if (vector)
796 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
797 if (!node->outgoings)
798 continue;
799 for (i = 0; i < node->outgoings->rnum; i++)
800 {
801 int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
802 const int32_t one = 1;
803 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx);
804 /* If not found, set, if the current node is the destination node, no need
805 * set itself as parent of subsequent nodes because its terminal nature. */
806 if (!term && (!cell.i32 || cell.i32[0] == 0))
807 ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one);
808 for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */
809 {
810 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]);
811 /* If not found, set */
812 if (!cell.i32 || cell.i32[0] == 0)
813 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]);
814 else {
815 /* Otherwise, set to the longest one */
816 int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2
+ 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; })
;
817 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep);
818 }
819 }
820 }
821 } ccv_nnc_graph_visit_endfor} }
822#undef for_block
823 ccfreefree(buf);
824 // Algorithm to allocate signals and streams for this graph.
825 ccv_array_t* const stream_data = ccv_array_new(sizeof(ccv_nnc_stream_data_t), 0, 0);
826 ccv_array_t** const outgoings = cccalloccalloc(exec_info_size, sizeof(ccv_array_t*));
827 ccv_nnc_incoming_t* const incomings = cccalloccalloc(exec_info_size, sizeof(ccv_nnc_incoming_t));
828 int max_device_id_size = 1;
829 // Filter out outgoing nodes that we will be able to access it afterwards anyway.
830 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
831 max_device_id_size = ccv_max(node->input_size + node->output_size, max_device_id_size)({ typeof (node->input_size + node->output_size) _a = (
node->input_size + node->output_size); typeof (max_device_id_size
) _b = (max_device_id_size); (_a > _b) ? _a : _b; })
;
832 if (node->outgoings)
833 {
834 outgoings[idx] = ccv_array_new(sizeof(int), 0, 0);
835 for (i = 0; i < node->outgoings->rnum; i++)
836 {
837 const int di = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
838 // Skip if we haven't accessed this exec.
839 if (schd_info[di].stream_size < 0)
840 continue;
841 int flag = 0;
842 for (j = 0; !flag && j < node->outgoings->rnum; j++)
843 {
844 if (j != i)
845 {
846 const int dj = *(int*)ccv_array_get(node->outgoings, j)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(j)))
;
847 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, di, dj);
848 flag = (cell.i32 && cell.i32[0]);
849 }
850 }
851 if (!flag)
852 {
853 ccv_array_push(outgoings[idx], &di);
854 if (!incomings[di].outgoings)
855 incomings[di].outgoings = ccv_array_new(sizeof(int), 1, 0);
856 ccv_array_push(incomings[di].outgoings, &idx);
857 }
858 }
859 }
860 } ccv_nnc_graph_visit_endfor} }
861#define visitor(node, idx, _) \
862 if (node->outgoings) \
863 for (i = 0; i < node->outgoings->rnum; i++) \
864 { \
865 const int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
; \
866 node->rank = ccv_max(incomings[d].rank + 1, node->rank)({ typeof (incomings[d].rank + 1) _a = (incomings[d].rank + 1
); typeof (node->rank) _b = (node->rank); (_a > _b) ?
_a : _b; })
; \
867 }
868 CCV_NNC_GRAPH_VISIT(graph, incomings, exec_info_size, destinations, destination_size, sources, source_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_
+= ((incomings)[_i_].outgoings) ? (incomings)[_i_].outgoings
->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_
> 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_)
_incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2
+ _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_info_size
) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size
)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size
)), (int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size
), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size)
; for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void)
sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 868
, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations
)[_i_].d].r = 1; _exists_[0][_i_] = (destinations)[_i_].d; } int
_exist_size_[2] = { (destination_size), 0, }; int _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if ((incomings)[_idx_].outgoings)
for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((incomings
)[_idx_].outgoings)->data)) + (size_t)((incomings)[_idx_].
outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if
(_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void
) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0),
__extension__ ({ if (_exist_size_[_q_] < (exec_info_size)
) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 868
, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations
)[_i_].d].r = 3; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r != 3) continue; _incomings_[_idx_].r = 4; if ((incomings)
[_idx_].outgoings) for (_j_ = 0; _j_ < (incomings)[_idx_].
outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((
char*)(((incomings)[_idx_].outgoings)->data)) + (size_t)((
incomings)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (
_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_
+= _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_
[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c
; if (_incomings_[d].r != 2) continue; _incomings_[d].r = 3; (
(void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 :
0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(sources)[_i_].d].r = 5; _exists_[0][_i_] =
(sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) {
_exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_
]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_
[_idx_].r != 5) continue; _incomings_[_idx_].r = 6; if (_incomings_
[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_
].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges -
1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d
].r = 5; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(sources)[_i_].d].d = 1; } for (_i_ = 0; _i_
< (destination_size); _i_++) { ((void) sizeof (((destinations
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (destinations)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (destination_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor(((incomings) + _idx_
), (_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d
) { ++_d_; _incomings_[_idx_].r = 7; } if ((incomings)[_idx_]
.outgoings) { if ((incomings)[_idx_].outgoings->rnum == 1)
{ const int d = *(int*)((void*)(((char*)(((incomings)[_idx_]
.outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(source_size)) { _exists_[_p_][_i_] = d; continue; } } else for
(_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum; _j_
++) { const int d = *(int*)((void*)(((char*)(((incomings)[_idx_
].outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(source_size)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size
)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof
(((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if
((sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[(sources)[_i_].d].r == 7) continue; if (
!(0)) { ((void) sizeof ((_incomings_[(sources)[_i_].d].c == 0
) ? 1 : 0), __extension__ ({ if (_incomings_[(sources)[_i_].d
].c == 0) ; else __assert_fail ("_incomings_[(sources)[_i_].d].c == 0"
, "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[(sources)[_i_].d].c > 0) continue
; visitor(((incomings) + (sources)[_i_].d), ((sources)[_i_].d
), (_incomings_[(sources)[_i_].d].d)); } if (_heap_mem_) free
(_incomings_); } while (0);
;
869#undef visitor
870 int device_ids[max_device_id_size];
871 int outgoing_device_ids[max_device_id_size];
872 int signal_size = 0;
873 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
874 // Go through the incomings.
875 const int device_id_size = _ccv_nnc_device_ids_for_stream_data(node, device_id, stream_data, device_ids, max_device_id_size);
876 if (schd_info[idx].stream_size == 0)
877 {
878 schd_info[idx].stream_size = device_id_size; // At least at the same size as the device_id_size.
879 if (device_id_size > 1)
880 {
881 schd_info[idx]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * device_id_size * 2);
882 schd_info[idx]._heap_signals = (schd_info[idx]._heap_streams + device_id_size);
883 }
884 for (i = 0; i < device_id_size; i++)
885 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = -1, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] = -1;
886 }
887 for (i = 0; i < device_id_size; i++)
888 // Go through until the end to assign streams.
889 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] < 0)
890 {
891 int stream_idx = -1;
892 int stream_has_command = 0;
893 // First, find a good stream in stream data (the stream is good if it can be recycled, and it has the same command).
894 // Otherwise, we prefer a usable stream (it doesn't have the command, but it can be recycled).
895 for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++)
896 {
897 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(j)))
;
898 if (data->device_id == device_ids[i])
899 {
900 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, data->exec_idx);
901 // If there is a path to conclude that exec_idx is before idx, then we can reuse
902 // this stream. Otherwise the work in this "empty stream" could still be ongoing,
903 // and we may delay the following work unnecessarily.
904 if (cell.i32 && cell.i32[0] > 0)
905 {
906 if (ccv_array_find_uint(data->command_set, node->cmd.cmd))
907 stream_idx = j, stream_has_command = 1;
908 else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet.
909 stream_idx = j;
910 }
911 }
912 }
913 if (stream_idx < 0)
914 {
915 // Note that the max stream count is a "soft" limit. Even we have different devices, our compute allocation has to be on different streams.
916 if (stream_data->rnum >= max_stream_count && max_stream_count > 0)
917 {
918 // If we are already at out limit, go through again to see if a stream is available, if the stream has command, and also its exec_idx is not preceding this execution.
919 for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++)
920 {
921 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(j)))
;
922 if (data->device_id == device_ids[i])
923 {
924 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, data->exec_idx, idx);
925 // There must be no path from idx to exec_idx otherwise we already have stream_idx. Now we just to verify
926 // there is no path from exec_idx to idx as well.
927 if (!cell.i32 || cell.i32[0] == 0)
928 {
929 if (ccv_array_find_uint(data->command_set, node->cmd.cmd))
930 stream_idx = j, stream_has_command = 1;
931 else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet.
932 stream_idx = j;
933 }
934 }
935 }
936 if (stream_idx >= 0)
937 {
938 // Now need to mark exec_idx is after idx, so we can avoid A -> B -> A deadlock.
939 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
940 const int32_t one = 1;
941 ccv_set_sparse_matrix_cell(exec_dep, idx, data->exec_idx, &one);
942 }
943 }
944 if (stream_idx < 0)
945 {
946 stream_idx = stream_data->rnum;
947 const ccv_nnc_stream_data_t data = {
948 .device_id = device_ids[i],
949 };
950 ccv_array_push(stream_data, &data);
951 }
952 }
953 assert(stream_idx >= 0)((void) sizeof ((stream_idx >= 0) ? 1 : 0), __extension__ (
{ if (stream_idx >= 0) ; else __assert_fail ("stream_idx >= 0"
, "ccv_nnc_graph.c", 953, __extension__ __PRETTY_FUNCTION__);
}))
;
954 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
955 if (!data->command_set)
956 data->command_set = ccv_array_new(sizeof(uint32_t), 1, 0);
957 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = stream_idx;
958 ccv_array_add_unique_uint(data->command_set, node->cmd.cmd);
959 // Assign all subsequent node to use this stream.
960 int outgoing_idx = idx;
961 // if we want to enforce the stream count is only 1, we certainly don't want to the greedy approach.
962 // With the greedy approach, the current stream will go all the way down and certainly conflict with
963 // other streams. We'd prefer to interleaving the execution instead in this case.
964 if (max_stream_count != 1)
965 while (outgoings[outgoing_idx] && outgoings[outgoing_idx]->rnum)
966 {
967 int highest_rank = -1;
968 int highest_idx = -1;
969 int stream_n = -1;
970 int stream_has_command = 0;
971 for (j = 0; j < outgoings[outgoing_idx]->rnum; j++)
972 {
973 const int d = *(int*)ccv_array_get(outgoings[outgoing_idx], j)((void*)(((char*)((outgoings[outgoing_idx])->data)) + (size_t
)(outgoings[outgoing_idx])->rsize * (size_t)(j)))
;
974 // This is not outside of our scope at this point.
975 assert(schd_info[d].stream_size >= 0)((void) sizeof ((schd_info[d].stream_size >= 0) ? 1 : 0), __extension__
({ if (schd_info[d].stream_size >= 0) ; else __assert_fail
("schd_info[d].stream_size >= 0", "ccv_nnc_graph.c", 975,
__extension__ __PRETTY_FUNCTION__); }))
;
976 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + d;
977 const int outgoing_device_id_size = _ccv_nnc_device_ids_for_stream_data(outgoing_node, device_id, stream_data, outgoing_device_ids, max_device_id_size);
978 if (schd_info[d].stream_size == 0)
979 {
980 schd_info[d].stream_size = outgoing_device_id_size; // At least at the same size as the device_id_size.
981 if (outgoing_device_id_size > 1)
982 {
983 schd_info[d]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * outgoing_device_id_size * 2);
984 schd_info[d]._heap_signals = (schd_info[d]._heap_streams + outgoing_device_id_size);
985 }
986 for (k = 0; k < outgoing_device_id_size; k++)
987 SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams
: (schd_info[d])._heap_streams)
[k] = -1, SCHEDULE_SIGNALS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_signals
: (schd_info[d])._heap_signals)
[k] = -1;
988 }
989 assert(schd_info[d].stream_size == outgoing_device_id_size)((void) sizeof ((schd_info[d].stream_size == outgoing_device_id_size
) ? 1 : 0), __extension__ ({ if (schd_info[d].stream_size == outgoing_device_id_size
) ; else __assert_fail ("schd_info[d].stream_size == outgoing_device_id_size"
, "ccv_nnc_graph.c", 989, __extension__ __PRETTY_FUNCTION__);
}))
;
990 for (k = 0; k < outgoing_device_id_size; k++)
991 // If it should be on the same device and the stream is not assign, potentially.
992 if (outgoing_device_ids[k] == device_ids[i] &&
993 SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams
: (schd_info[d])._heap_streams)
[k] < 0 &&
994 (incomings[d].rank > highest_rank ||
995 (incomings[d].rank == highest_rank &&
996 !stream_has_command && ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd))))
997 {
998 highest_rank = incomings[d].rank;
999 highest_idx = d;
1000 stream_n = k;
1001 // This is 1 if rank is the same (thus, I must break the tie already), if the rank is not the same, we need to compute this.
1002 stream_has_command = (incomings[d].rank == highest_rank || ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd));
1003 }
1004 }
1005 if (highest_idx >= 0)
1006 {
1007 outgoing_idx = highest_idx;
1008 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + outgoing_idx;
1009 assert(stream_n >= 0)((void) sizeof ((stream_n >= 0) ? 1 : 0), __extension__ ({
if (stream_n >= 0) ; else __assert_fail ("stream_n >= 0"
, "ccv_nnc_graph.c", 1009, __extension__ __PRETTY_FUNCTION__)
; }))
;
1010 SCHEDULE_STREAMS(schd_info[outgoing_idx])((schd_info[outgoing_idx]).stream_size <= 1 ? (schd_info[outgoing_idx
])._inline_streams : (schd_info[outgoing_idx])._heap_streams)
[stream_n] = stream_idx;
1011 ccv_array_add_unique_uint(data->command_set, outgoing_node->cmd.cmd);
1012 } else
1013 break;
1014 }
1015 data->exec_idx = outgoing_idx;
1016 }
1017 } ccv_nnc_graph_visit_endfor} }
1018 // Go through to assign signals when necessary.
1019 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1020 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
1021 _ccv_nnc_graph_schedule_assign_signals(incomings[idx].outgoings, schd_info + idx, stream_data, &signal_size, schd_info, exec_info_size);
1022 } ccv_nnc_graph_visit_endfor} }
1023 for (i = 0; i < exec_info_size; i++)
1024 if (outgoings[i])
1025 ccv_array_free(outgoings[i]);
1026 ccfreefree(outgoings);
1027 ccv_matrix_free(exec_dep);
1028 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
1029 if (device_id >= 0)
1030 {
1031 // If the default stream (stream 0) is not the same as desired stream, swap with the one that is.
1032 if (default_data->device_id != device_id)
1033 {
1034 int exchange_stream_idx = -1;
1035 // Find the stream idx to exchange.
1036 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1037 int flag = 0;
1038 for(i = 0; !flag && i < schd_info[idx].stream_size; i++)
1039 {
1040 const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i];
1041 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
1042 if (data->device_id == device_id)
1043 {
1044 exchange_stream_idx = stream_idx;
1045 flag = 1;
1046 }
1047 }
1048 if (flag)
1049 break;
1050 } ccv_nnc_graph_visit_endfor} }
1051 assert(exchange_stream_idx >= 0)((void) sizeof ((exchange_stream_idx >= 0) ? 1 : 0), __extension__
({ if (exchange_stream_idx >= 0) ; else __assert_fail ("exchange_stream_idx >= 0"
, "ccv_nnc_graph.c", 1051, __extension__ __PRETTY_FUNCTION__)
; }))
;
1052 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1053 for (i = 0; i < schd_info[idx].stream_size; i++)
1054 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == 0)
1055 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = -1;
1056 } ccv_nnc_graph_visit_endfor} }
1057 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1058 for (i = 0; i < schd_info[idx].stream_size; i++)
1059 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == exchange_stream_idx)
1060 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = 0;
1061 } ccv_nnc_graph_visit_endfor} }
1062 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1063 for (i = 0; i < schd_info[idx].stream_size; i++)
1064 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == -1)
1065 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = exchange_stream_idx;
1066 } ccv_nnc_graph_visit_endfor} }
1067 ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, exchange_stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(exchange_stream_idx)))
)->device_id = default_data->device_id;
1068 default_data->device_id = device_id;
1069 }
1070 }
1071 int graph_stream_1_size = 0;
1072 for (i = 0; i < source_size; i++)
1073 {
1074 const int idx = sources[i].d;
1075 // If it has incoming nodes, check whether these are on stream 0.
1076 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
1077 {
1078 int flag = 0;
1079 const ccv_array_t* const incoming = incomings[idx].outgoings;
1080 for (j = 0; !flag && j < incoming->rnum; j++)
1081 {
1082 const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(j)))
;
1083 for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++)
1084 flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx
])._inline_streams : (schd_info[incoming_idx])._heap_streams)
[k] == 0); // If this is the default stream, we already have a good start.
1085 }
1086 if (flag)
1087 continue;
1088 }
1089 for (j = 0; j < schd_info[idx].stream_size; j++)
1090 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this is not the default stream, we need explicit begin signal to start.
1091 ++graph_stream_1_size;
1092 }
1093 if (graph_stream_1_size > 0)
1094 {
1095 schedule->stream_1s = ccmallocmalloc(sizeof(int) * graph_stream_1_size);
1096 graph_stream_1_size = 0;
1097 for (i = 0; i < source_size; i++)
1098 {
1099 const int idx = sources[i].d;
1100 // If it has incoming nodes, check whether these are on stream 0.
1101 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
1102 {
1103 int flag = 0;
1104 const ccv_array_t* const incoming = incomings[idx].outgoings;
1105 for (j = 0; !flag && j < incoming->rnum; j++)
1106 {
1107 const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(j)))
;
1108 for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++)
1109 flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx
])._inline_streams : (schd_info[incoming_idx])._heap_streams)
[k] == 0); // If this is the default stream, we already have a good start.
1110 }
1111 if (flag)
1112 continue;
1113 }
1114 for (j = 0; j < schd_info[idx].stream_size; j++)
1115 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this is not the default stream, we need explicit begin signal to start.
1116 {
1117 const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j];
1118 int flag = 0;
1119 for (k = 0; !flag && k < graph_stream_1_size; k++)
1120 flag = (stream_idx == schedule->stream_1s[k]);
1121 if (!flag)
1122 schedule->stream_1s[graph_stream_1_size++] = stream_idx;
1123 }
1124 }
1125 schedule->stream_1_size = graph_stream_1_size;
1126 }
1127 for (i = 0; i < exec_info_size; i++)
1128 if (incomings[i].outgoings)
1129 ccv_array_free(incomings[i].outgoings);
1130 ccfreefree(incomings);
1131 int graph_wait_size = 0;
1132 for (i = 0; i < destination_size; i++)
1133 {
1134 const int idx = destinations[i].d;
1135 for (j = 0; j < schd_info[idx].stream_size; j++)
1136 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
1137 ++graph_wait_size;
1138 }
1139 if (graph_wait_size > 0)
1140 {
1141 schedule->waits = ccmallocmalloc(sizeof(int) * graph_wait_size);
1142 graph_wait_size = 0;
1143 for (i = 0; i < destination_size; i++)
1144 {
1145 const int idx = destinations[i].d;
1146 for (j = 0; j < schd_info[idx].stream_size; j++)
1147 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
1148 {
1149 ccv_nnc_stream_data_t* const default_stream_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
1150 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j] < 0)
1151 SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j] = signal_size++;
1152 else if (default_stream_data->signal_set && ccv_array_find_int(default_stream_data->signal_set, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j]))
1153 continue;
1154 schedule->waits[graph_wait_size++] = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j];
1155 }
1156 }
1157 schedule->wait_size = graph_wait_size;
1158 }
1159 for (i = 0; i < stream_data->rnum; i++)
1160 {
1161 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1162 if (data->signal_set)
1163 ccv_array_free(data->signal_set);
1164 assert(data->command_set)((void) sizeof ((data->command_set) ? 1 : 0), __extension__
({ if (data->command_set) ; else __assert_fail ("data->command_set"
, "ccv_nnc_graph.c", 1164, __extension__ __PRETTY_FUNCTION__)
; }))
;
1165 ccv_array_free(data->command_set);
1166 }
1167 // Allocate streams & signals
1168 int default_stream_type = stream_type;
1169 CCV_STREAM_SET_DEVICE_ID(default_stream_type, default_data->device_id)(default_stream_type) = (((default_stream_type) & ~0xfff00
) | (((default_data->device_id) & 0xfff) << 8))
;
1170 if (root_schedule)
1171 {
1172 assert(!graph->streams)((void) sizeof ((!graph->streams) ? 1 : 0), __extension__ (
{ if (!graph->streams) ; else __assert_fail ("!graph->streams"
, "ccv_nnc_graph.c", 1172, __extension__ __PRETTY_FUNCTION__)
; }))
;
1173 graph->stream_size = stream_data->rnum;
1174 graph->streams = (ccv_nnc_stream_context_t**)ccmallocmalloc(sizeof(ccv_nnc_stream_context_t*) * graph->stream_size);
1175 graph->block_stream_tasks = (co_routine_t**)cccalloccalloc(graph->stream_size, sizeof(co_routine_t*));
1176 if (stream_context)
1177 graph->streams[0] = stream_context;
1178 for (i = (stream_context ? 1 : 0); i < stream_data->rnum; i++)
1179 {
1180 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1181 int type = stream_type;
1182 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1183 graph->streams[i] = ccv_nnc_stream_context_new(type);
1184 }
1185 graph->signal_size = signal_size;
1186 graph->signals = (ccv_nnc_stream_signal_t**)cccalloccalloc(signal_size, sizeof(ccv_nnc_stream_signal_t*));
1187 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1188 for (i = 0; i < schd_info[idx].stream_size; i++)
1189 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1190 {
1191 const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i];
1192 if (!graph->signals[signal])
1193 {
1194 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ?
(schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams
)[i])))
;
1195 int type = stream_type;
1196 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1197 graph->signals[signal] = ccv_nnc_stream_signal_new(type);
1198 }
1199 }
1200 } ccv_nnc_graph_visit_endfor} }
1201 } else {
1202 assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ (
{ if (graph->streams) ; else __assert_fail ("graph->streams"
, "ccv_nnc_graph.c", 1202, __extension__ __PRETTY_FUNCTION__)
; }))
;
1203 assert(graph->stream_size >= stream_data->rnum)((void) sizeof ((graph->stream_size >= stream_data->
rnum) ? 1 : 0), __extension__ ({ if (graph->stream_size >=
stream_data->rnum) ; else __assert_fail ("graph->stream_size >= stream_data->rnum"
, "ccv_nnc_graph.c", 1203, __extension__ __PRETTY_FUNCTION__)
; }))
;
1204 // Find streams to proper allocated stream based on the type we need.
1205 int* const stream_idxs = (int*)ccmallocmalloc(sizeof(int) * (stream_data->rnum + signal_size));
1206 uint64_t* const stream_used = (uint64_t*)cccalloccalloc(((graph->stream_size + 63) >> 6) + ((graph->signal_size + 63) >> 6), sizeof(uint64_t));
1207 for (i = 0; i < stream_data->rnum; i++)
1208 {
1209 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1210 int type = stream_type;
1211 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1212 for (j = 0; j < graph->stream_size; j++)
1213 if (!(stream_used[j >> 6] & ((uint64_t)1 << (j & 63))))
1214 {
1215 const int stream_type = ccv_nnc_stream_context_type(graph->streams[j]);
1216 if (stream_type == type)
1217 {
1218 stream_idxs[i] = j;
1219 stream_used[j >> 6] |= ((uint64_t)1 << (j & 63));
1220 break;
1221 }
1222 }
1223 }
1224 assert(graph->signal_size >= signal_size)((void) sizeof ((graph->signal_size >= signal_size) ? 1
: 0), __extension__ ({ if (graph->signal_size >= signal_size
) ; else __assert_fail ("graph->signal_size >= signal_size"
, "ccv_nnc_graph.c", 1224, __extension__ __PRETTY_FUNCTION__)
; }))
;
1225 // Find signals to proper allocated signal based on the type we need.
1226 int* const signal_idxs = stream_idxs + stream_data->rnum;
1227 uint64_t* const signal_used = stream_used + ((graph->stream_size + 63) >> 6);
1228 for (i = 0; i < signal_size; i++)
1229 signal_idxs[i] = -1;
1230 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1231 for (i = 0; i < schd_info[idx].stream_size; i++)
1232 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1233 {
1234 const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i];
1235 if (signal_idxs[signal] < 0)
1236 {
1237 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ?
(schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams
)[i])))
;
1238 int type = stream_type;
1239 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1240 for (j = 0; j < graph->signal_size; j++)
1241 if (!(signal_used[j >> 6] & ((uint64_t)1 << (j & 63))))
1242 {
1243 const int signal_type = ccv_nnc_stream_signal_type(graph->signals[j]);
1244 if (signal_type == type)
1245 {
1246 signal_idxs[signal] = j;
1247 signal_used[j >> 6] |= ((uint64_t)1 << (j & 63));
1248 break;
1249 }
1250 }
1251 }
1252 }
1253 } ccv_nnc_graph_visit_endfor} }
1254 // Now rebind streams and signals from the schedule.
1255 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1256 for (i = 0; i < schd_info[idx].stream_size; i++)
1257 {
1258 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = stream_idxs[SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i]];
1259 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1260 SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] = signal_idxs[SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i]];
1261 }
1262 for (i = 0; i < schd_info[idx].wait_size; i++)
1263 schd_info[idx].waits[i] = signal_idxs[schd_info[idx].waits[i]];
1264 } ccv_nnc_graph_visit_endfor} }
1265 for (i = 0; i < schedule->stream_1_size; i++)
1266 schedule->stream_1s[i] = stream_idxs[schedule->stream_1s[i]];
1267 for (i = 0; i < schedule->wait_size; i++)
1268 schedule->waits[i] = signal_idxs[schedule->waits[i]];
1269 // Rebind who is the stream 0 (default stream).
1270 schedule->stream_0 = stream_idxs[0];
1271 ccfreefree(stream_used);
1272 ccfreefree(stream_idxs);
1273 }
1274 assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ (
{ if (graph->streams) ; else __assert_fail ("graph->streams"
, "ccv_nnc_graph.c", 1274, __extension__ __PRETTY_FUNCTION__)
; }))
;
1275 ccv_nnc_graph_visit_free(visit);
1276 for (i = 0; i < signal_size; i++)
1277 { assert(graph->signals[i])((void) sizeof ((graph->signals[i]) ? 1 : 0), __extension__
({ if (graph->signals[i]) ; else __assert_fail ("graph->signals[i]"
, "ccv_nnc_graph.c", 1277, __extension__ __PRETTY_FUNCTION__)
; }))
; }
1278 if (schedule->stream_1_size)
1279 schedule->begin = ccv_nnc_stream_signal_new(default_stream_type);
1280 schedule->end = ccv_nnc_stream_signal_new(default_stream_type);
1281 // Do this recursively for its sub graphs.
1282 if (graph->sub_graphs)
1283 for (i = 0; i < graph->sub_graphs->rnum; i++)
1284 {
1285 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
;
1286 if (sub_graph && !sub_graph->default_schedule)
1287 {
1288 const int exec_idx = sub_graph->exec_idx - 1;
1289 assert(schd_info[exec_idx].stream_size == 1)((void) sizeof ((schd_info[exec_idx].stream_size == 1) ? 1 : 0
), __extension__ ({ if (schd_info[exec_idx].stream_size == 1)
; else __assert_fail ("schd_info[exec_idx].stream_size == 1"
, "ccv_nnc_graph.c", 1289, __extension__ __PRETTY_FUNCTION__)
; }))
;
1290 const int stream_idx = SCHEDULE_STREAMS(schd_info[exec_idx])((schd_info[exec_idx]).stream_size <= 1 ? (schd_info[exec_idx
])._inline_streams : (schd_info[exec_idx])._heap_streams)
[0];
1291 const int device_id = ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
)->device_id;
1292 sub_graph->default_schedule = _ccv_nnc_graph_static_schedule_new(sub_graph, stream_type, device_id, max_stream_count, graph->streams[stream_idx], 0, 0, 0, 0);
1293 }
1294 }
1295 ccv_array_free(stream_data);
1296 return schedule;
1297}
1298void ccv_nnc_graph_set_default_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type, const int max_stream_count)
1299{
1300 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1300, __extension__ __PRETTY_FUNCTION__)
; }))
;
1301 if (graph->default_schedule)
1302 ccv_nnc_graph_static_schedule_free(graph->default_schedule);
1303 graph->default_schedule = _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, max_stream_count, 0, 0, 0, 0, 0);
1304}
1305
1306ccv_nnc_graph_static_schedule_t* ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int max_stream_count, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1307{
1308 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1308, __extension__ __PRETTY_FUNCTION__)
; }))
;
1309 return _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, max_stream_count, 0, sources, source_size, destinations, destination_size);
1310}
1311
1312ccv_nnc_stream_context_t* ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph)
1313{
1314 if (graph->streams && graph->stream_size > 0)
1315 return graph->streams[0];
1316 return 0;
1317}
1318
1319static void _ccv_nnc_graph_dot_exec(const int index, const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, ccv_nnc_stream_context_t** const streams, const int flags, FILE* out)
1320{
1321 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1322 fputc('{', out);
1323 fprintf(out, "node%d", index);
1324 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1325 {
1326 fputs("|Command: ", out);
1327 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1328 if (schd_info)
1329 {
1330 if (schd_info->stream_size > 0)
1331 {
1332 int i, flag = 0;
1333 fputs("|Stream: ", out);
1334 for (i = 0; i < schd_info->stream_size; i++)
1335 {
1336 const int device_id = streams ? CCV_TENSOR_GET_DEVICE_ID(streams[SCHEDULE_STREAMS(*schd_info)[i]]->type)(((streams[((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)[i]]->type) & 0xfff00) >>
8)
: 0;
1337 if (i == 0)
1338 fprintf(out, "%d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)
[i], device_id);
1339 else
1340 fprintf(out, ", %d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)
[i], device_id);
1341 }
1342 for (i = 0; i < schd_info->stream_size; i++)
1343 if (SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i] >= 0)
1344 {
1345 if (!flag)
1346 {
1347 flag = 1;
1348 fprintf(out, "|Signal: %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i]);
1349 } else
1350 fprintf(out, ", %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i]);
1351 }
1352 }
1353 if (schd_info->wait_size > 0)
1354 {
1355 fputs("|Wait: ", out);
1356 int i;
1357 for (i = 0; i < schd_info->wait_size - 1; i++)
1358 fprintf(out, "%d, ", schd_info->waits[i]);
1359 fprintf(out, "%d", schd_info->waits[schd_info->wait_size - 1]);
1360 }
1361 }
1362 fputc('}', out);
1363 }
1364}
1365
1366static void _ccv_nnc_graph_dot_tensor(const int index, const ccv_nnc_tensor_t* const tensor, const int zone, const int flags, const int depth, FILE* out)
1367{
1368 // if it has an alias pointer, or, it is a long form.
1369 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1370 fputc('{', out);
1371 const int is_tensor_view = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW);
1372 if (is_tensor_view)
1373 fprintf(out, "tensorview%d", index);
1374 else
1375 fprintf(out, "tensor%d", index);
1376 int i;
1377 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1378 fputc('\'', out);
1379 if (CCV_GET_TAPE_ALLOC(tensor->type)((tensor->type) & CCV_TAPE_ALLOC))
1380 fputs(" (t)", out);
1381 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1382 {
1383 const int device_id = CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8);
1384 fprintf(out, "|d%d|zone%d", device_id, zone);
1385 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1386 fputc('\'', out);
1387 uintptr_t aptr = (uintptr_t)tensor->data.u8;
1388 size_t tensor_size;
1389 if (is_tensor_view)
1390 tensor_size = (size_t)((ccv_nnc_tensor_view_t*)(tensor))->stride[0] * tensor->info.dim[0] * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1391 else
1392 tensor_size = ccv_nnc_dimension_count(tensor->info.dim) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1393 // Print out the range as well.
1394 fprintf(out, "|{%#010x|%#010x}|%d", (uint32_t)aptr, (uint32_t)(aptr + tensor_size - 1), tensor->info.dim[0]);
1395 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor->info.dim[i]; i++)
1396 fprintf(out, "x%d", tensor->info.dim[i]);
1397 fputc('}', out);
1398 }
1399}
1400
1401typedef struct {
1402 int index;
1403 int name;
1404 int zone;
1405 uintptr_t tensor_ref;
1406 uintptr_t start_ptr;
1407 uintptr_t end_ptr;
1408} ccv_nnc_tensor_dot_t;
1409
1410typedef struct {
1411 ccv_nnc_tensor_dot_t* dots;
1412 int* remap;
1413 int* rename_zone;
1414 int* rename_index;
1415} ccv_nnc_tensor_dot_recovery_t;
1416
1417// First sort by start_ptr, then sort by tensor ptr (so that we will have the same tensor sorted to one cluster).
1418#define less_than(i1, i2, aux) ((i1).start_ptr < (i2).start_ptr || ((i1).start_ptr == (i2).start_ptr && (i1).tensor_ref < (i2).tensor_ref))
1419static CCV_IMPLEMENT_QSORT(_ccv_nnc_tensor_dot_sort_by_ptr, ccv_nnc_tensor_dot_t, less_than)void _ccv_nnc_tensor_dot_sort_by_ptr(ccv_nnc_tensor_dot_t *array
, size_t total, int aux) { int isort_thresh = 7; ccv_nnc_tensor_dot_t
t; int sp = 0; struct { ccv_nnc_tensor_dot_t *lb; ccv_nnc_tensor_dot_t
*ub; } stack[48]; if( total <= 1 ) return; stack[0].lb = array
; stack[0].ub = array + (total - 1); while( sp >= 0 ) { ccv_nnc_tensor_dot_t
* left = stack[sp].lb; ccv_nnc_tensor_dot_t* right = stack[sp
--].ub; for(;;) { int i, n = (int)(right - left) + 1, m; ccv_nnc_tensor_dot_t
* ptr; ccv_nnc_tensor_dot_t* ptr2; if( n <= isort_thresh )
{ insert_sort: for( ptr = left + 1; ptr <= right; ptr++ )
{ for( ptr2 = ptr; ptr2 > left && less_than(ptr2[
0],ptr2[-1], aux); ptr2--) (((t)) = ((ptr2[0])), ((ptr2[0])) =
((ptr2[-1])), ((ptr2[-1])) = ((t))); } break; } else { ccv_nnc_tensor_dot_t
* left0; ccv_nnc_tensor_dot_t* left1; ccv_nnc_tensor_dot_t* right0
; ccv_nnc_tensor_dot_t* right1; ccv_nnc_tensor_dot_t* pivot; ccv_nnc_tensor_dot_t
* a; ccv_nnc_tensor_dot_t* b; ccv_nnc_tensor_dot_t* c; int swap_cnt
= 0; left0 = left; right0 = right; pivot = left + (n/2); if(
n > 40 ) { int d = n / 8; a = left, b = left + d, c = left
+ 2*d; left = less_than(*a, *b, aux) ? (less_than(*b, *c, aux
) ? b : (less_than(*a, *c, aux) ? c : a)) : (less_than(*c, *b
, aux) ? b : (less_than(*a, *c, aux) ? a : c)); a = pivot - d
, b = pivot, c = pivot + d; pivot = less_than(*a, *b, aux) ? (
less_than(*b, *c, aux) ? b : (less_than(*a, *c, aux) ? c : a)
) : (less_than(*c, *b, aux) ? b : (less_than(*a, *c, aux) ? a
: c)); a = right - 2*d, b = right - d, c = right; right = less_than
(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than(*a, *
c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than(
*a, *c, aux) ? a : c)); } a = left, b = pivot, c = right; pivot
= less_than(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than
(*a, *c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than
(*a, *c, aux) ? a : c)); if( pivot != left0 ) { (((t)) = ((*pivot
)), ((*pivot)) = ((*left0)), ((*left0)) = ((t))); pivot = left0
; } left = left1 = left0 + 1; right = right1 = right0; for(;;
) { while( left <= right && !less_than(*pivot, *left
, aux) ) { if( !less_than(*left, *pivot, aux) ) { if( left >
left1 ) (((t)) = ((*left1)), ((*left1)) = ((*left)), ((*left
)) = ((t))); swap_cnt = 1; left1++; } left++; } while( left <=
right && !less_than(*right, *pivot, aux) ) { if( !less_than
(*pivot, *right, aux) ) { if( right < right1 ) (((t)) = ((
*right1)), ((*right1)) = ((*right)), ((*right)) = ((t))); swap_cnt
= 1; right1--; } right--; } if( left > right ) break; (((
t)) = ((*left)), ((*left)) = ((*right)), ((*right)) = ((t)));
swap_cnt = 1; left++; right--; } if( swap_cnt == 0 ) { left =
left0, right = right0; goto insert_sort; } n = ({ typeof ((int
)(left1 - left0)) _a = ((int)(left1 - left0)); typeof ((int)(
left - left1)) _b = ((int)(left - left1)); (_a < _b) ? _a :
_b; }); for( i = 0; i < n; i++ ) (((t)) = ((left0[i])), (
(left0[i])) = ((left[i-n])), ((left[i-n])) = ((t))); n = ({ typeof
((int)(right0 - right1)) _a = ((int)(right0 - right1)); typeof
((int)(right1 - right)) _b = ((int)(right1 - right)); (_a <
_b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left
[i])), ((left[i])) = ((right0[i-n+1])), ((right0[i-n+1])) = (
(t))); n = (int)(left - left1); m = (int)(right1 - right); if
( n > 1 ) { if( m > 1 ) { if( n > m ) { stack[++sp].
lb = left0; stack[sp].ub = left0 + n - 1; left = right0 - m +
1, right = right0; } else { stack[++sp].lb = right0 - m + 1;
stack[sp].ub = right0; left = left0, right = left0 + n - 1; }
} else left = left0, right = left0 + n - 1; } else if( m >
1 ) left = right0 - m + 1, right = right0; else break; } } }
}
1420#undef less_than
1421
1422static int _ccv_nnc_graph_dot_tensor_multiview_count(const ccv_nnc_tensor_multiview_t* const mv)
1423{
1424 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
1425 return 1;
1426 const int count = mv->kind + mv->repeat;
1427 int i, c = 0;
1428 for (i = 0; i < count; i++)
1429 c += _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
1430 return c;
1431}
1432
1433static void _ccv_nnc_graph_dot_tensor_multiview_tensor_dots(const ccv_nnc_tensor_multiview_t* const mv, ccv_nnc_tensor_dot_t* const tensor_dots, int* tensor_index)
1434{
1435 const int count = mv->kind + mv->repeat;
1436 int i;
1437 for (i = 0; i < count; i++)
1438 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1439 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], tensor_dots, tensor_index);
1440 else {
1441 tensor_dots[*tensor_index].name = *tensor_index;
1442 tensor_dots[*tensor_index].start_ptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1443 // Because tv's pointer will get updated, it is not correct in this case to have one tensor_ref.
1444 tensor_dots[*tensor_index].tensor_ref = tensor_dots[*tensor_index].start_ptr;
1445 const size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1446 tensor_dots[*tensor_index].end_ptr = tensor_dots[*tensor_index].start_ptr + dim_size - 1;
1447 ++(*tensor_index);
1448 }
1449}
1450
1451static ccv_nnc_tensor_dot_recovery_t _ccv_nnc_graph_tensor_dot_recovery(const ccv_nnc_graph_t* const graph)
1452{
1453 int i, j;
1454 // Recover tensor relationships for all tensors referenced in the graph.
1455 // Most notably, we have to give these indexes, and find if they point to
1456 // the same memory region, and whether they overlap. These information
1457 // are lost since we converted from symbolic form to the execution form.
1458 // and here we do our best to recover because that is easier to understand
1459 // if we want to present the graph visually (also, we don't want to put this
1460 // information into the tensor or execution graph to avoid overhead, thus,
1461 // recovering is the best we can do).
1462 int tensor_count = 0;
1463 for (i = 0; i < graph->exec_info->rnum; i++)
50
Assuming 'i' is < field 'rnum'
51
Loop condition is true. Entering loop body
62
Assuming 'i' is >= field 'rnum'
1464 {
1465 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1466 for (j = 0; j < exec_info->input_size; j++)
52
Assuming 'j' is >= field 'input_size'
53
Loop condition is false. Execution continues on line 1469
1467 if (exec_info->inputs[j])
1468 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[j])((*(int*)(exec_info->inputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->inputs[j]) : 1;
1469 for (j = 0; j < exec_info->output_size; j++)
54
Assuming 'j' is < field 'output_size'
55
Loop condition is true. Entering loop body
60
Assuming 'j' is >= field 'output_size'
61
Loop condition is false. Execution continues on line 1463
1470 if (exec_info->outputs[j])
56
Assuming the condition is true
57
Taking true branch
1471 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[j])((*(int*)(exec_info->outputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->outputs[j]) : 1;
58
Assuming the condition is true
59
'?' condition is true
1472 }
1473 ccv_nnc_tensor_dot_t* tensor_dots = tensor_count > 0 ? (ccv_nnc_tensor_dot_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_dot_t) * tensor_count) : 0;
63
Loop condition is false. Execution continues on line 1473
64
Assuming 'tensor_count' is <= 0
65
'?' condition is false
66
'tensor_dots' initialized to a null pointer value
1474 int k = 0;
1475 for (i = 0; i < graph->exec_info->rnum; i++)
67
Loop condition is true. Entering loop body
73
Loop condition is false. Execution continues on line 1519
1476 {
1477 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1478 for (j = 0; j
67.1
'j' is >= field 'input_size'
< exec_info->input_size; j++)
68
Loop condition is false. Execution continues on line 1498
1479 {
1480 ccv_nnc_tensor_t* tensor = exec_info->inputs[j];
1481 if (!tensor)
1482 continue;
1483 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1484 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1485 else {
1486 tensor_dots[k].name = k;
1487 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1488 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1489 size_t tensor_size;
1490 if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW))
1491 tensor_size = (size_t)((ccv_nnc_tensor_view_t*)(tensor))->stride[0] * tensor->info.dim[0] * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1492 else
1493 tensor_size = ccv_nnc_dimension_count(tensor->info.dim) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1494 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + tensor_size - 1;
1495 ++k;
1496 }
1497 }
1498 for (j = 0; j < exec_info->output_size; j++)
69
Loop condition is true. Entering loop body
72
Loop condition is false. Execution continues on line 1475
1499 {
1500 ccv_nnc_tensor_t* tensor = exec_info->outputs[j];
1501 if (!tensor
69.1
'tensor' is non-null
)
70
Taking false branch
1502 continue;
1503 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
71
Taking true branch
1504 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1505 else {
1506 tensor_dots[k].name = k;
1507 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1508 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1509 size_t tensor_size;
1510 if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW))
1511 tensor_size = (size_t)((ccv_nnc_tensor_view_t*)(tensor))->stride[0] * tensor->info.dim[0] * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1512 else
1513 tensor_size = ccv_nnc_dimension_count(tensor->info.dim) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1514 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + tensor_size - 1;
1515 ++k;
1516 }
1517 }
1518 }
1519 tensor_count = k; // We may over count, now shrink.
1520 // To group overlap memory into one zone, we sort it by start ptr first (secondary by the tensor pointer).
1521 _ccv_nnc_tensor_dot_sort_by_ptr(tensor_dots, tensor_count, 0);
1522 int index = 0, zone = 0;
1523 uintptr_t tensor_ref = tensor_count > 0 ? tensor_dots[0].tensor_ref : 0;
74
Assuming 'tensor_count' is > 0
75
'?' condition is true
76
Dereference of null pointer
1524 uintptr_t end_ptr = tensor_count > 0 ? tensor_dots[0].end_ptr : 0;
1525 // Then, it is trivial, we go by end ptr. If the next start ptr is still within the end ptr (start ptr <= end ptr),
1526 // they are the same zone.
1527 for (i = 0; i < tensor_count; i++)
1528 {
1529 if (tensor_dots[i].tensor_ref != tensor_ref)
1530 {
1531 tensor_ref = tensor_dots[i].tensor_ref;
1532 ++index;
1533 }
1534 if (tensor_dots[i].start_ptr > end_ptr)
1535 {
1536 end_ptr = ccv_max(end_ptr, tensor_dots[i].end_ptr)({ typeof (end_ptr) _a = (end_ptr); typeof (tensor_dots[i].end_ptr
) _b = (tensor_dots[i].end_ptr); (_a > _b) ? _a : _b; })
;
1537 ++zone;
1538 }
1539 tensor_dots[i].index = index;
1540 tensor_dots[i].zone = zone;
1541 }
1542 // We already have index and zone assigned, but the problem is that these are not very human interpretable (because
1543 // it follows the pointer from low to high, not the tensor creation order). The following code renamed both the index
1544 // and the zone so that it is much more understandable.
1545 const int index_count = index + 1;
1546 const int zone_count = zone + 1;
1547 int* remap = (int*)ccmallocmalloc(sizeof(int) * (tensor_count + index_count + zone_count));
1548 int* rename_index = remap + tensor_count;
1549 int* rename_zone = rename_index + index_count;
1550 for (i = 0; i < tensor_count; i++)
1551 remap[tensor_dots[i].name] = i;
1552 for (i = 0; i < index_count; i++)
1553 rename_index[i] = -1;
1554 for (i = 0; i < zone_count; i++)
1555 rename_zone[i] = -1;
1556 index = 0;
1557 zone = 0;
1558 for (i = 0; i < tensor_count; i++)
1559 {
1560 ccv_nnc_tensor_dot_t* tensor_dot = tensor_dots + remap[i];
1561 if (rename_index[tensor_dot->index] == -1)
1562 rename_index[tensor_dot->index] = index++;
1563 if (rename_zone[tensor_dot->zone] == -1)
1564 rename_zone[tensor_dot->zone] = zone++;
1565 }
1566 ccv_nnc_tensor_dot_recovery_t recovery = {
1567 .dots = tensor_dots,
1568 .remap = remap,
1569 .rename_index = rename_index,
1570 .rename_zone = rename_zone,
1571 };
1572 return recovery;
1573}
1574
1575static void _ccv_nnc_graph_tensor_dot_recovery_free(const ccv_nnc_tensor_dot_recovery_t recovery)
1576{
1577 ccfreefree(recovery.dots);
1578 ccfreefree(recovery.remap);
1579}
1580
1581static void _ccv_nnc_graph_dot_tensor_multiview_one(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int depth, int* tensor_index, FILE* out)
1582{
1583 const int count = mv->kind + mv->repeat;
1584 int i, j;
1585 fputs("|{", out);
1586 for (i = 0; i < count; i++)
1587 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1588 {
1589 fprintf(out, "{%d", i);
1590 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1591 fputc('*', out); // Denotes that we loop on this.
1592 _ccv_nnc_graph_dot_tensor_multiview_one((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], recovery, depth, tensor_index, out);
1593 if (i == count - 1)
1594 fputc('}', out);
1595 else
1596 fputs("}|", out);
1597 } else {
1598 fprintf(out, "{%d", i);
1599 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1600 fputc('*', out); // Denotes that we loop on this.
1601 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1602 fprintf(out, "|zone%d", recovery.rename_zone[tensor_dot->zone]);
1603 for (j = 0; j < depth; j++)
1604 fputc('\'', out);
1605 uintptr_t aptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1606 // For the last one, we don't extend to full ainc.
1607 size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1608 // Print out the range as well.
1609 fprintf(out, "|{%#010x|%#010x}", (uint32_t)aptr, (uint32_t)(aptr + dim_size - 1));
1610 ++(*tensor_index);
1611 if (i == count - 1)
1612 fputc('}', out);
1613 else
1614 fputs("}|", out);
1615 }
1616 fputc('}', out);
1617}
1618
1619static void _ccv_nnc_graph_dot_tensor_multiview(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, int* tensor_index, FILE* out)
1620{
1621 // if it has an alias pointer, or, it is a long form.
1622 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1623 fputc('{', out);
1624 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1625 fprintf(out, "multiview%d", recovery.rename_index[tensor_dot->index]);
1626 int i;
1627 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1628 fputc('\'', out);
1629 if (CCV_GET_TAPE_ALLOC(mv->type)((mv->type) & CCV_TAPE_ALLOC))
1630 fputs(" (t)", out);
1631 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1632 {
1633 _ccv_nnc_graph_dot_tensor_multiview_one(mv, recovery, depth, tensor_index, out);
1634 const ccv_nnc_tensor_t* root = (ccv_nnc_tensor_t*)mv;
1635 while (CCV_IS_TENSOR_MULTIVIEW(root)((*(int*)(root)) & CCV_TENSOR_MULTIVIEW))
1636 root = CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)root)(((ccv_nnc_tensor_multiview_t*)root)->_heap_data ? ((ccv_nnc_tensor_multiview_t
*)root)->_heap_data : ((ccv_nnc_tensor_multiview_t*)root)->
_inline_data)
[0];
1637 fprintf(out, "|%d", root->info.dim[0]);
1638 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && root->info.dim[i]; i++)
1639 fprintf(out, "x%d", root->info.dim[i]);
1640 fputc('}', out);
1641 } else
1642 *tensor_index += _ccv_nnc_graph_dot_tensor_multiview_count(mv);
1643}
1644
1645static void _ccv_nnc_graph_dot_node(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int exec_index, ccv_nnc_stream_context_t** const streams, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* const tensor_index)
1646{
1647 fprintf(out, "node%d [shape=record,label=\"", exec_index);
1648 _ccv_nnc_graph_dot_exec(exec_index, exec_info, schd_info, streams, flags, out);
1649 int i;
1650 int k = *tensor_index;
1651 if (exec_info->input_size > 0)
1652 {
1653 fputs("|{Input", out);
1654 for (i = 0; i < exec_info->input_size; i++)
1655 if (exec_info->inputs[i])
1656 {
1657 fputc('|', out);
1658 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1659 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1660 else {
1661 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1662 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1663 ++k;
1664 }
1665 } else
1666 fputs("|-", out);
1667 fputc('}', out);
1668 }
1669 if (exec_info->output_size > 0)
1670 {
1671 fputs("|{Output", out);
1672 for (i = 0; i < exec_info->output_size; i++)
1673 if (exec_info->outputs[i])
1674 {
1675 fputc('|', out);
1676 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1677 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1678 else {
1679 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1680 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1681 ++k;
1682 }
1683 } else
1684 fputs("|-", out);
1685 fputc('}', out);
1686 }
1687 fputs("\"];\n", out);
1688 *tensor_index = k;
1689}
1690
1691static void _ccv_nnc_graph_dot_while_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const ccv_nnc_graph_t* const while_graph, const int flags, const int depth, FILE* out, int* tensor_index)
1692{
1693 int i;
1694 fprintf(out, "label=<<b>while%d </b>Command: ", exec_index);
1695 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1696 fputs(">;\n", out);
1697 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1698 int k = *tensor_index;
1699 if (exec_info->input_size > 0)
1700 {
1701 fputs("{Input|{", out);
1702 for (i = 0; i < exec_info->input_size; i++)
1703 {
1704 if (i > 0)
1705 fputc('|', out);
1706 if (exec_info->inputs[i])
1707 {
1708 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1709 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1710 else {
1711 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1712 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1713 ++k;
1714 }
1715 } else
1716 fputc('-', out);
1717 }
1718 fputs("}}", out);
1719 }
1720 if (exec_info->output_size > 0)
1721 {
1722 if (exec_info->input_size > 0)
1723 fputs("|", out);
1724 fputs("{Output|{", out);
1725 for (i = 0; i < exec_info->output_size; i++)
1726 {
1727 if (i > 0)
1728 fputc('|', out);
1729 if (exec_info->outputs[i])
1730 {
1731 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1732 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1733 else {
1734 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1735 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1736 ++k;
1737 }
1738 } else
1739 fputc('-', out);
1740 }
1741 fputs("}}", out);
1742 }
1743 fputs("}\"];\n", out);
1744 *tensor_index = k;
1745}
1746
1747static void _ccv_nnc_graph_dot_case_of_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* tensor_index)
1748{
1749 int i;
1750 fprintf(out, "label=<<b>caseof%d </b>Command: ", exec_index);
1751 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1752 fputs(">;\n", out);
1753 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1754 int k = *tensor_index;
1755 if (exec_info->input_size > 0)
1756 {
1757 fputs("{Input|{", out);
1758 for (i = 0; i < exec_info->input_size; i++)
1759 {
1760 if (i > 0)
1761 fputc('|', out);
1762 if (exec_info->inputs[i])
1763 {
1764 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1765 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1766 else {
1767 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1768 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1769 ++k;
1770 }
1771 } else
1772 fputc('-', out);
1773 }
1774 fputs("}}", out);
1775 }
1776 if (exec_info->output_size > 0)
1777 {
1778 if (exec_info->input_size > 0)
1779 fputs("|", out);
1780 fputs("{Output|{", out);
1781 for (i = 0; i < exec_info->output_size; i++)
1782 {
1783 if (i > 0)
1784 fputc('|', out);
1785 if (exec_info->outputs[i])
1786 {
1787 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1788 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1789 else {
1790 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1791 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1792 ++k;
1793 }
1794 } else
1795 fputc('-', out);
1796 }
1797 fputs("}}", out);
1798 }
1799 fputs("}\"];\n", out);
1800 *tensor_index = k;
1801}
1802
1803static void _ccv_nnc_graph_dot_sub_graphs(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_tensor_dot_recovery_t p_recovery, const ccv_array_t* const sub_graphs, const int flags, const int depth, FILE* out, int* tensor_index, int* exec_index)
1804{
1805 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
7
Assuming the condition is false
8
Taking false branch
21
Assuming the condition is false
22
Taking false branch
35
Assuming the condition is false
36
Taking false branch
1806 {
1807 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1808 const ccv_nnc_graph_t* const while_graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[0] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0]
- 1)))
;
1809 // Output this node info within this subgraph.
1810 _ccv_nnc_graph_dot_while_label(exec_info, *exec_index, p_recovery, while_graph, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1811 } else if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
9
Assuming the condition is false
10
Taking false branch
23
Assuming the condition is false
24
Taking false branch
37
Assuming the condition is false
38
Taking false branch
1812 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1813 _ccv_nnc_graph_dot_case_of_label(exec_info, *exec_index, p_recovery, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1814 }
1815 ++(*exec_index);
1816 int p;
1817 for (p = 0; p < exec_info->graph_ref_size; p++)
11
Assuming 'p' is < field 'graph_ref_size'
12
Loop condition is true. Entering loop body
25
Assuming 'p' is < field 'graph_ref_size'
26
Loop condition is true. Entering loop body
39
Assuming 'p' is < field 'graph_ref_size'
40
Loop condition is true. Entering loop body
45
Assuming 'p' is < field 'graph_ref_size'
46
Loop condition is true. Entering loop body
1818 {
1819 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
13
Taking false branch
27
Taking false branch
41
Taking false branch
47
Taking false branch
1820 {
1821 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=\"\"\n", *exec_index, *exec_index);
1822 ++(*exec_index);
1823 }
1824 const ccv_nnc_graph_t* const graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[p] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[p]
- 1)))
;
14
'?' condition is false
28
'?' condition is false
42
'?' condition is false
48
'?' condition is false
1825 const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule;
1826 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
49
Calling '_ccv_nnc_graph_tensor_dot_recovery'
1827 int i, j;
1828 int k = 0;
1829 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1830 // Output styles.
1831 for (i = 0; i
42.1
'i' is >= field 'rnum'
< graph->exec_info->rnum; i++)
15
Loop condition is true. Entering loop body
29
Loop condition is true. Entering loop body
43
Loop condition is false. Execution continues on line 1845
1832 {
1833 node_id[i] = *exec_index;
1834 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1835 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0]
)
16
Assuming field '_heap_graph_ref' is null
17
'?' condition is false
18
Assuming the condition is true
19
Taking true branch
30
Assuming field '_heap_graph_ref' is null
31
'?' condition is false
32
Assuming the condition is true
33
Taking true branch
1836 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, depth + 1, out, &k, exec_index);
20
Calling '_ccv_nnc_graph_dot_sub_graphs'
34
Calling '_ccv_nnc_graph_dot_sub_graphs'
1837 else {
1838 _ccv_nnc_graph_dot_node(exec_info,
1839 schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0,
1840 *exec_index, graph->streams, recovery, flags, depth, out, &k);
1841 ++(*exec_index);
1842 }
1843 }
1844 // Output connections.
1845 for (i = 0; i
43.1
'i' is >= field 'rnum'
< graph->exec_info->rnum; i++)
44
Loop condition is false. Execution continues on line 1864
1846 {
1847 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1848 if (exec_info->outgoings)
1849 for (j = 0; j < exec_info->outgoings->rnum; j++)
1850 {
1851 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1852 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1853 // If both are sub-graphs, have both tail and head specified.
1854 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1855 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1856 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1857 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1858 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1859 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1860 else
1861 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1862 }
1863 }
1864 fputs("}\n", out);
1865 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1866 ccfreefree(node_id);
1867 }
1868 // Extra subgraph cluster.
1869 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1870 fputs("}\n", out);
1871}
1872
1873void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out)
1874{
1875 fputs("digraph G {\ncompound=true;\n", out);
1876 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1877 int i, j;
1878 int k = 0, c = 0;
1879 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1880 const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule;
1881 // Output styles.
1882 for (i = 0; i < graph->exec_info->rnum; i++)
1
Loop condition is true. Entering loop body
1883 {
1884 node_id[i] = c;
1885 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1886 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0]
)
2
Assuming field '_heap_graph_ref' is null
3
'?' condition is false
4
Assuming the condition is true
5
Taking true branch
1887 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, 1, out, &k, &c);
6
Calling '_ccv_nnc_graph_dot_sub_graphs'
1888 else {
1889 _ccv_nnc_graph_dot_node(exec_info,
1890 schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0,
1891 c, graph->streams, recovery, flags, 0, out, &k);
1892 ++c;
1893 }
1894 }
1895 // Output connections.
1896 for (i = 0; i < graph->exec_info->rnum; i++)
1897 {
1898 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1899 if (exec_info->outgoings)
1900 for (j = 0; j < exec_info->outgoings->rnum; j++)
1901 {
1902 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1903 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1904 // If both are sub-graphs, have both tail and head specified.
1905 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1906 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1907 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1908 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1909 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1910 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1911 else
1912 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1913 }
1914 }
1915 fputs("}\n", out);
1916 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1917 ccfreefree(node_id);
1918}
1919
1920void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1921{
1922 // exec current node, for synchronous CPU execution, no stream unit.
1923 int i;
1924#define visitor(node, idx, ...) \
1925 do { \
1926 if (node->cmd.cmd == CCV_NNC_NOOP) \
1927 continue; \
1928 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) \
1929 for (i = 0; i < node->graph_ref_size; i++) \
1930 { \
1931 ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[i] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref
? (node)->_heap_graph_ref : (node)->_inline_graph_ref)
[i] - 1)))
; \
1932 ccv_nnc_graph_autotune(sub_graph, max_workspace_size, flags, 0, 0, 0, 0); \
1933 } \
1934 else { \
1935 /* Need to unwrap these tensors */ \
1936 for (i = 0; i < node->input_size + node->output_size; i++) \
1937 if (node->inputs[i] && CCV_IS_TENSOR_MULTIVIEW(node->inputs[i])((*(int*)(node->inputs[i])) & CCV_TENSOR_MULTIVIEW)) \
1938 node->inputs[i] = _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)node->inputs[i]); \
1939 PRINT(CCV_CLI_VERBOSE, "%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node
->cmd.cmd), idx, node->input_size, node->output_size
); fflush(stdout); } } while (0)
; \
1940 for (i = 0; i < node->input_size; i++) \
1941 { \
1942 PRINT(CCV_CLI_VERBOSE, "|-> %d. %p (%p)", i + 1, node->inputs[i], (node->inputs[i] ? node->inputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-> %d. %p (%p)", i + 1, node->inputs[i], (node
->inputs[i] ? node->inputs[i]->data.u8 : 0)); fflush
(stdout); } } while (0)
; \
1943 if (node->inputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) \
1944 ccv_nnc_print_tensor_shape(node->inputs[i]); \
1945 PRINT(CCV_CLI_VERBOSE, "\n")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("\n"); fflush(stdout); } } while (0)
; \
1946 } \
1947 for (i = 0; i < node->output_size; i++) \
1948 { \
1949 PRINT(CCV_CLI_VERBOSE, "|<- %d. %p (%p)", i + 1, node->outputs[i], (node->outputs[i] ? node->outputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|<- %d. %p (%p)", i + 1, node->outputs[i], (
node->outputs[i] ? node->outputs[i]->data.u8 : 0)); fflush
(stdout); } } while (0)
; \
1950 if (node->outputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) \
1951 ccv_nnc_print_tensor_shape(node->outputs[i]); \
1952 PRINT(CCV_CLI_VERBOSE, "\n")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("\n"); fflush(stdout); } } while (0)
; \
1953 } \
1954 node->cmd = ccv_nnc_cmd_autotune(node->cmd, max_workspace_size, node->hint, flags, node->inputs, node->input_size, node->outputs, node->output_size, 0); \
1955 } \
1956 } while (0)
1957 const ccv_nnc_graph_exec_t* const graph_sources = sources ? sources : (graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0);
1958 const int graph_source_size = source_size ? source_size : (graph->sources ? graph->sources->rnum : 0);
1959 const ccv_nnc_graph_exec_t* const graph_destinations = destinations ? destinations : (graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0);
1960 const int graph_destination_size = destination_size ? destination_size : (graph->destinations ? graph->destinations->rnum : 0);
1961 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = ((graph->exec_info->
rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++
) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph
) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); _incomings_[(graph_sources)[_i_].d].r = 1; _exists_[0]
[_i_] = (graph_sources)[_i_].d; } int _exist_size_[2] = { (graph_source_size
), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue
; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] <
(graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if
(_exist_size_[_q_] < (graph->exec_info->rnum)) ; else
__assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof
(((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail
("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph.c", 1961
, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph_sources
)[_i_].d].r = 3; _exists_[0][_i_] = (graph_sources)[_i_].d; }
_exist_size_[0] = (graph_source_size); _exist_size_[1] = 0; _p_
= 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges
= _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; }
_edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_
; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_
[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->
exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (graph->exec_info->rnum)) ; else __assert_fail
("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 1961, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph_destination_size); _i_++) { ((void) sizeof (((graph_destinations
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_destinations
)[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); _incomings_[(graph_destinations)[_i_].d].r = 5; _exists_
[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_[0] = (
graph_destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue
; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_
[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0
), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info
->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph.c"
, 1961, __extension__ __PRETTY_FUNCTION__); })); _incomings_[
(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
graph_source_size); _i_++) { ((void) sizeof (((graph_sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources
)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (graph_source_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (
_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++
_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size
)) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info
->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] <
(graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); if (_incomings_[(graph_destinations)[_i_].d].r == 7) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph.c", 1961, __extension__ __PRETTY_FUNCTION__)
; })); } else if (_incomings_[(graph_destinations)[_i_].d].c >
0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void*)(
((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0)))) + (graph_destinations)[
_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
1962#undef visitor
1963}
1964
1965void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph)
1966{
1967 int i, j;
1968 for (i = 0; i < graph->exec_info->rnum; i++)
1969 {
1970 ccv_nnc_graph_exec_info_t *info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1971 if (info->_heap_graph_ref)
1972 ccfreefree(info->_heap_graph_ref);
1973 ccv_array_t* outgoings = info->outgoings;
1974 if (outgoings)
1975 ccv_array_free(outgoings);
1976 // We allocate inputs & outputs in continuous fashion, therefore, only need to free the input array.
1977 if (info->inputs)
1978 ccfreefree(info->inputs);
1979 if (info->input_flags)
1980 ccfreefree(info->input_flags);
1981 if (info->updates)
1982 ccfreefree(info->updates);
1983 if ((info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) && info->p_while.inputs)
1984 ccfreefree(info->p_while.inputs);
1985 }
1986 if (graph->tensor_wraps)
1987 {
1988 for (i = 0; i < graph->tensor_wraps->rnum; i++)
1989 {
1990 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, i)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(i)))
;
1991 if (tensor_wrap_array)
1992 {
1993 for (j = 0; j < tensor_wrap_array->size; j++)
1994 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[j]);
1995 ccfreefree(tensor_wrap_array);
1996 }
1997 }
1998 ccv_array_free(graph->tensor_wraps);
1999 }
2000 if (graph->tensor_wraps_refs)
2001 ccv_array_free(graph->tensor_wraps_refs);
2002 if (graph->breakpoints)
2003 ccfreefree(graph->breakpoints);
2004 if (graph->sources)
2005 ccv_array_free(graph->sources);
2006 if (graph->destinations)
2007 ccv_array_free(graph->destinations);
2008 if (graph->default_schedule)
2009 ccv_nnc_graph_static_schedule_free(graph->default_schedule);
2010 if (graph->streams)
2011 {
2012 // If the graph has parent graph, the default stream is allocated by the parent graph, we need to skip.
2013 if (!graph->p)
2014 ccv_nnc_stream_context_free(graph->streams[0]);
2015 for (i = 1; i < graph->stream_size; i++)
2016 ccv_nnc_stream_context_free(graph->streams[i]);
2017 ccfreefree(graph->streams);
2018 }
2019 if (graph->block_stream_tasks)
2020 ccfreefree(graph->block_stream_tasks);
2021 if (graph->signals)
2022 {
2023 for (i = 0; i < graph->signal_size; i++)
2024 ccv_nnc_stream_signal_free(graph->signals[i]);
2025 ccfreefree(graph->signals);
2026 }
2027 if (graph->carry_overs)
2028 {
2029 for (i = 0; i < graph->carry_overs->rnum; i++)
2030 {
2031 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t
)(graph->carry_overs)->rsize * (size_t)(i)))
;
2032 _ccv_nnc_graph_tensor_wrap_free(carry_over->from);
2033 _ccv_nnc_graph_tensor_wrap_free(carry_over->to);
2034 }
2035 ccv_array_free(graph->carry_overs);
2036 }
2037 if (graph->sub_graphs)
2038 {
2039 for (i = 0; i < graph->sub_graphs->rnum; i++)
2040 ccv_nnc_graph_free(*(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
);
2041 ccv_array_free(graph->sub_graphs);
2042 }
2043 ccv_array_free(graph->exec_info);
2044 if (graph->buffer)
2045 ccfreefree(graph->buffer);
2046 ccfreefree(graph);
2047}