Bug Summary

File:nnc/ccv_nnc_graph.c
Warning:line 1481, column 44
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_graph.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/14.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2022-06-22-151334-490440-1 -x c ccv_nnc_graph.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_graph.h"
6
7// MARK - Level-2 API
8
9ccv_nnc_graph_t* ccv_nnc_graph_new(void)
10{
11 ccv_nnc_graph_t* graph = (ccv_nnc_graph_t*)cccalloccalloc(1, sizeof(ccv_nnc_graph_t));
12 graph->exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), 5, 0);
13 return graph;
14}
15
16void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size)
17{
18 if (!graph->sources)
19 graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), source_size, 0);
20 else
21 ccv_array_clear(graph->sources);
22 int i;
23 for (i = 0; i < source_size; i++)
24 ccv_array_push(graph->sources, sources + i);
25 graph->topsorted = 0;
26}
27
28ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph)
29{
30 return graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0;
31}
32
33int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph)
34{
35 return graph->sources ? graph->sources->rnum : 0;
36}
37
38void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
39{
40 if (!graph->destinations)
41 graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), destination_size, 0);
42 else
43 ccv_array_clear(graph->sources);
44 int i;
45 for (i = 0; i < destination_size; i++)
46 ccv_array_push(graph->destinations, destinations + i);
47 graph->topsorted = 0;
48}
49
50ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph)
51{
52 return graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0;
53}
54
55int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph)
56{
57 return graph->destinations ? graph->destinations->rnum : 0;
58}
59
60void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd)
61{
62 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 62, __extension__ __PRETTY_FUNCTION__); }
))
;
63 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 63, __extension__ __PRETTY_FUNCTION__); }
))
;
64 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
65 exec_info->cmd = cmd;
66}
67
68ccv_nnc_cmd_t ccv_nnc_graph_exec_cmd(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec)
69{
70 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 70, __extension__ __PRETTY_FUNCTION__); }
))
;
71 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 71, __extension__ __PRETTY_FUNCTION__); }
))
;
72 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
73 return exec_info->cmd;
74}
75
76void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint)
77{
78 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 78, __extension__ __PRETTY_FUNCTION__); }
))
;
79 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 79, __extension__ __PRETTY_FUNCTION__); }
))
;
80 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
81 exec_info->hint = hint;
82}
83
84static int _ccv_nnc_tensor_multiview_level_count(const ccv_nnc_tensor_multiview_t* const mv)
85{
86 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
87 return 1;
88 const int count = mv->kind + mv->repeat;
89 int i, c = 0;
90 for (i = 0; i < count; i++)
91 {
92 ccv_nnc_tensor_t* tv = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i];
93 if (tv == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)))
94 c = ccv_max(c, 1)({ typeof (c) _a = (c); typeof (1) _b = (1); (_a > _b) ? _a
: _b; })
;
95 else
96 c = ccv_max(c, _ccv_nnc_tensor_multiview_level_count((ccv_nnc_tensor_multiview_t*)tv))({ typeof (c) _a = (c); typeof (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)) _b = (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)); (_a > _b) ? _a : _b; }
)
;
97 }
98 return c + 1;
99}
100
101static ccv_nnc_graph_tensor_wrap_t* _ccv_nnc_graph_tensor_wrap_new(const ccv_nnc_tensor_multiview_t* const mv)
102{
103 const int level_count = _ccv_nnc_tensor_multiview_level_count(mv);
104 ccv_nnc_graph_tensor_wrap_t* tensor_wrap = (ccv_nnc_graph_tensor_wrap_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_tensor_wrap_t) + sizeof(ccv_nnc_tensor_t*) * (level_count - 1));
105 tensor_wrap->update_required = 0;
106 tensor_wrap->count = level_count;
107 tensor_wrap->index = 0;
108 tensor_wrap->tensors[0] = (ccv_nnc_tensor_t*)mv;
109 return tensor_wrap;
110}
111
112static void _ccv_nnc_graph_exec_rewind(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
113{
114 if (!info->tensor_wraps_ref)
115 return;
116 int i;
117 assert(info->tensor_wraps_ref <= graph->tensor_wraps->rnum)((void) sizeof ((info->tensor_wraps_ref <= graph->tensor_wraps
->rnum) ? 1 : 0), __extension__ ({ if (info->tensor_wraps_ref
<= graph->tensor_wraps->rnum) ; else __assert_fail (
"info->tensor_wraps_ref <= graph->tensor_wraps->rnum"
, "ccv_nnc_graph.c", 117, __extension__ __PRETTY_FUNCTION__);
}))
;
118 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;;
119 // Rewind from tensor wraps.
120 for (i = 0; i < info->input_size; i++)
121 if (tensor_wrap_array->tensor_wraps[i])
122 info->inputs[i] = tensor_wrap_array->tensor_wraps[i]->tensors[0];
123 const int d = info->input_size;
124 for (i = 0; i < info->output_size; i++)
125 if (tensor_wrap_array->tensor_wraps[d + i])
126 info->outputs[i] = tensor_wrap_array->tensor_wraps[d + i]->tensors[0];
127 const int dd = info->input_size + info->output_size;
128 for (i = 0; i < info->update_size; i++)
129 if (tensor_wrap_array->tensor_wraps[dd + i])
130 info->updates[i] = tensor_wrap_array->tensor_wraps[dd + i]->tensors[0];
131}
132
133static void _ccv_nnc_graph_tensor_wrap_free(ccv_nnc_graph_tensor_wrap_t* const tensor_wrap)
134{
135 ccfreefree(tensor_wrap);
136}
137
138ccv_nnc_graph_tensor_wrap_array_t* ccv_nnc_get_tensor_wrap_array(ccv_nnc_graph_t* const graph, const int tensor_wrap_size, int* const tensor_wraps_ref)
139{
140 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = *tensor_wraps_ref ? (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, *tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(*tensor_wraps_ref
- 1)))
: 0;
141 // Otherwise, find an open slot.
142 if (!tensor_wrap_array_ref)
143 {
144 if (!graph->tensor_wraps)
145 graph->tensor_wraps = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wrap_array_t*), 0, 0);
146 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = 0;
147 ccv_array_push(graph->tensor_wraps, &tensor_wrap_array);
148 tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, graph->tensor_wraps->rnum - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(graph->tensor_wraps
->rnum - 1)))
;
149 *tensor_wraps_ref = graph->tensor_wraps->rnum;
150 }
151 int i;
152 if (*tensor_wrap_array_ref)
153 {
154 if ((*tensor_wrap_array_ref)->size != tensor_wrap_size)
155 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)ccreallocrealloc(*tensor_wrap_array_ref, sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1));
156 for (i = (*tensor_wrap_array_ref)->size; i < tensor_wrap_size; i++)
157 (*tensor_wrap_array_ref)->tensor_wraps[i] = 0;
158 } else
159 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)cccalloccalloc(sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1), 1);
160 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
161 tensor_wrap_array->size = tensor_wrap_size;
162 return tensor_wrap_array;
163}
164
165void ccv_nnc_set_tensor_wraps(ccv_nnc_graph_tensor_wrap_t** const tensor_wraps, ccv_nnc_tensor_t* const* const tensors, const int tensor_size)
166{
167 int i;
168 for (i = 0; i < tensor_size; i++)
169 if (tensors[i])
170 {
171 if (CCV_IS_TENSOR_MULTIVIEW(tensors[i])((*(int*)(tensors[i])) & CCV_TENSOR_MULTIVIEW) &&
172 ((ccv_nnc_tensor_multiview_t*)tensors[i])->anchor != CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1)
173 {
174 if (!tensor_wraps[i] || tensors[i] != tensor_wraps[i]->tensors[0])
175 {
176 if (tensor_wraps[i])
177 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
178 tensor_wraps[i] = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)tensors[i]);
179 }
180 } else {
181 if (tensor_wraps[i])
182 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
183 tensor_wraps[i] = 0;
184 }
185 }
186}
187
188void ccv_nnc_graph_register_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
189{
190 ccv_nnc_graph_t* p = graph;
191 const ccv_nnc_graph_tensor_wraps_ref_t tensor_wraps_ref = {
192 .d = tensor_wraps_ref_d,
193 .graph = graph,
194 };
195 do {
196 if (!p->tensor_wraps_refs)
197 {
198 p->tensor_wraps_refs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wraps_ref_t), 0, 0);
199 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
200 } else {
201 int i;
202 int has_tensor_wraps_ref = 0;
203 for (i = 0; !has_tensor_wraps_ref && i < p->tensor_wraps_refs->rnum; i++)
204 {
205 ccv_nnc_graph_tensor_wraps_ref_t* tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
206 has_tensor_wraps_ref = (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph);
207 }
208 if (!has_tensor_wraps_ref)
209 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
210 }
211 p = p->p;
212 } while (p);
213}
214
215static void _ccv_nnc_graph_redo_tensor_wraps(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
216{
217 int i;
218 const int has_wrap = ccv_nnc_tensors_have_wraps(info->inputs, info->input_size) ||
219 ccv_nnc_tensors_have_wraps(info->outputs, info->output_size) ||
220 ccv_nnc_tensors_have_wraps(info->updates, info->update_size);
221 if (has_wrap)
222 {
223 const int tensor_wrap_size = info->input_size + info->output_size + info->update_size;
224 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = ccv_nnc_get_tensor_wrap_array(graph, tensor_wrap_size, &info->tensor_wraps_ref);
225 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps, info->inputs, info->input_size);
226 const int d = info->input_size;
227 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + d, info->outputs, info->output_size);
228 const int dd = info->input_size + info->output_size;
229 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + dd, info->updates, info->update_size);
230 } else if (info->tensor_wraps_ref) {
231 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;
232 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
233 if (tensor_wrap_array)
234 {
235 for (i = 0; i < tensor_wrap_array->size; i++)
236 if (tensor_wrap_array->tensor_wraps[i])
237 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[i]);
238 ccfreefree(tensor_wrap_array);
239 *tensor_wrap_array_ref = 0;
240 info->tensor_wraps_ref = 0;
241 }
242 }
243}
244
245static void _ccv_nnc_graph_deregister_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
246{
247 ccv_nnc_graph_t* p = graph;
248 do {
249 int i;
250 // Remove from the array.
251 if (p->tensor_wraps_refs)
252 for (i = 0; i < p->tensor_wraps_refs->rnum; i++)
253 {
254 ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
255 if (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph)
256 {
257 --p->tensor_wraps_refs->rnum;
258 if (i < p->tensor_wraps_refs->rnum)
259 memcpy(tensor_wraps_ref, tensor_wraps_ref + 1, sizeof(ccv_nnc_graph_exec_t) * (p->tensor_wraps_refs->rnum - i));
260 break;
261 }
262 }
263 p = p->p;
264 } while (p);
265}
266
267void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size)
268{
269 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 269, __extension__ __PRETTY_FUNCTION__);
}))
;
270 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 270, __extension__ __PRETTY_FUNCTION__);
}))
;
271 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
272 assert(input_flag_size <= info->input_size)((void) sizeof ((input_flag_size <= info->input_size) ?
1 : 0), __extension__ ({ if (input_flag_size <= info->
input_size) ; else __assert_fail ("input_flag_size <= info->input_size"
, "ccv_nnc_graph.c", 272, __extension__ __PRETTY_FUNCTION__);
}))
;
273 assert(output_flag_size <= info->output_size)((void) sizeof ((output_flag_size <= info->output_size)
? 1 : 0), __extension__ ({ if (output_flag_size <= info->
output_size) ; else __assert_fail ("output_flag_size <= info->output_size"
, "ccv_nnc_graph.c", 273, __extension__ __PRETTY_FUNCTION__);
}))
;
274 if (info->input_size + info->output_size == 0)
275 return;
276 if (!info->input_flags)
277 {
278 info->input_flags = (int*)cccalloccalloc(info->input_size + info->output_size, sizeof(int));
279 info->output_flags = info->input_flags + info->input_size;
280 }
281 if (input_flag_size > 0)
282 memcpy(info->input_flags, input_flags, sizeof(int) * input_flag_size);
283 if (output_flag_size > 0)
284 memcpy(info->output_flags, output_flags, sizeof(int) * output_flag_size);
285}
286
287void ccv_nnc_graph_exec_pair_with(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t pair_exec)
288{
289 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 289, __extension__ __PRETTY_FUNCTION__);
}))
;
290 assert(exec.d >= 0)((void) sizeof ((exec.d >= 0) ? 1 : 0), __extension__ ({ if
(exec.d >= 0) ; else __assert_fail ("exec.d >= 0", "ccv_nnc_graph.c"
, 290, __extension__ __PRETTY_FUNCTION__); }))
;
291 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 291, __extension__ __PRETTY_FUNCTION__);
}))
;
292 assert(pair_exec.graph == graph || pair_exec.graph == graph->pair)((void) sizeof ((pair_exec.graph == graph || pair_exec.graph ==
graph->pair) ? 1 : 0), __extension__ ({ if (pair_exec.graph
== graph || pair_exec.graph == graph->pair) ; else __assert_fail
("pair_exec.graph == graph || pair_exec.graph == graph->pair"
, "ccv_nnc_graph.c", 292, __extension__ __PRETTY_FUNCTION__);
}))
;
293 assert(pair_exec.d >= 0)((void) sizeof ((pair_exec.d >= 0) ? 1 : 0), __extension__
({ if (pair_exec.d >= 0) ; else __assert_fail ("pair_exec.d >= 0"
, "ccv_nnc_graph.c", 293, __extension__ __PRETTY_FUNCTION__);
}))
;
294 if (pair_exec.graph == graph)
295 { assert(pair_exec.d < graph->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph->exec_info
->rnum) ; else __assert_fail ("pair_exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 295, __extension__ __PRETTY_FUNCTION__);
}))
; }
296 else
297 { assert(pair_exec.d < graph->pair->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->pair->exec_info
->rnum) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph
->pair->exec_info->rnum) ; else __assert_fail ("pair_exec.d < graph->pair->exec_info->rnum"
, "ccv_nnc_graph.c", 297, __extension__ __PRETTY_FUNCTION__);
}))
; }
298 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
299 exec_info->pair_ref = pair_exec.d + 1;
300}
301
302static ccv_nnc_tensor_t* _ccv_nnc_any_tensor_from_tensor_multiview(ccv_nnc_tensor_multiview_t* const mv)
303{
304 ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv;
305 while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
306 {
307 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
308 const int count = 0;
309 const int off = mv->kind;
310 const int mod = mv->repeat;
311 // If reached the root.
312 tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[count >= off ? ((count - off) % mod) + off : count]; // Unwrap.
313 }
314 return tensor;
315}
316
317void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
318{
319 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 319, __extension__ __PRETTY_FUNCTION__);
}))
;
320 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 320, __extension__ __PRETTY_FUNCTION__);
}))
;
321 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
322 // De-register from the graph if it contains multiview tensors.
323 if (info->tensor_wraps_ref)
324 _ccv_nnc_graph_deregister_tensor_wraps(graph, info->tensor_wraps_ref - 1);
325 // In case it is already executed, rewind.
326 _ccv_nnc_graph_exec_rewind(info, graph);
327 if (input_size == 0 && output_size == 0)
328 {
329 if (info->input_size > 0 || info->output_size > 0)
330 ccfreefree(info->inputs);
331 info->inputs = 0;
332 info->outputs = 0;
333 info->input_size = 0;
334 info->output_size = 0;
335 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
336 if (info->tensor_wraps_ref)
337 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
338 return;
339 }
340 if (info->inputs)
341 info->inputs = (ccv_nnc_tensor_t**)ccreallocrealloc(info->inputs, sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
342 else
343 info->inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
344 info->outputs = info->inputs + input_size;
345 if (inputs)
346 memcpy(info->inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
347 if (outputs)
348 memcpy(info->outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
349 int i;
350 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
351 for (i = 0; i < input_size + output_size; i++)
352 if (info->inputs[i])
353 {
354 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i])((*(int*)(info->inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i];
355 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
356 }
357 info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes);
358 info->input_size = input_size;
359 info->output_size = output_size;
360 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
361 // Register again if the tensor wraps exist.
362 if (info->tensor_wraps_ref)
363 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
364 // Free flags.
365 if (info->input_flags)
366 {
367 ccfreefree(info->input_flags);
368 info->input_flags = info->output_flags = 0;
369 }
370}
371
372void ccv_nnc_graph_exec_add_as_affected(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update)
373{
374 assert(CCV_IS_TENSOR_MULTIVIEW(update))((void) sizeof ((((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(update)", "ccv_nnc_graph.c"
, 374, __extension__ __PRETTY_FUNCTION__); }))
;
375 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 375, __extension__ __PRETTY_FUNCTION__);
}))
;
376 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 376, __extension__ __PRETTY_FUNCTION__);
}))
;
377 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
378 const int register_tensor_wraps = !info->tensor_wraps_ref;
379 const int update_index = info->update_size;
380 ++info->update_size;
381 if (info->updates)
382 info->updates = (ccv_nnc_tensor_t**)ccreallocrealloc(info->updates, sizeof(ccv_nnc_tensor_t*) * info->update_size);
383 else
384 info->updates = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * info->update_size);
385 info->updates[update_index] = update;
386 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
387 if (register_tensor_wraps)
388 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
389}
390
391ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
392{
393 int d = graph->exec_info->rnum;
394 ccv_nnc_graph_exec_info_t info = {
395 .cmd = cmd,
396 .hint = hint,
397 .input_size = input_size,
398 .output_size = output_size,
399 };
400 assert(inputs || input_size == 0)((void) sizeof ((inputs || input_size == 0) ? 1 : 0), __extension__
({ if (inputs || input_size == 0) ; else __assert_fail ("inputs || input_size == 0"
, "ccv_nnc_graph.c", 400, __extension__ __PRETTY_FUNCTION__);
}))
;
401 assert(outputs || output_size == 0)((void) sizeof ((outputs || output_size == 0) ? 1 : 0), __extension__
({ if (outputs || output_size == 0) ; else __assert_fail ("outputs || output_size == 0"
, "ccv_nnc_graph.c", 401, __extension__ __PRETTY_FUNCTION__);
}))
;
402 if (input_size > 0 || output_size > 0)
403 {
404 info.inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
405 info.outputs = info.inputs + input_size;
406 if (inputs)
407 memcpy(info.inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
408 if (outputs)
409 memcpy(info.outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
410 info.input_size = input_size;
411 info.output_size = output_size;
412 int i;
413 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
414 for (i = 0; i < input_size + output_size; i++)
415 if (info.inputs[i])
416 {
417 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i])((*(int*)(info.inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i];
418 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
419 }
420 info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes);
421 }
422 _ccv_nnc_graph_redo_tensor_wraps(&info, graph);
423 // Add itself to the graph's wraps array, this will help the run time when we run the graph and do unwrapping.
424 if (info.tensor_wraps_ref)
425 ccv_nnc_graph_register_tensor_wraps(graph, info.tensor_wraps_ref - 1);
426 ccv_array_push(graph->exec_info, &info);
427 return (ccv_nnc_graph_exec_t){
428 .d = d,
429 .graph = graph,
430 };
431}
432
433void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to)
434{
435 ccv_nnc_graph_tensor_carry_over_t carry_over = {
436 .from = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)from),
437 .to = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)to)
438 };
439 if (!graph->carry_overs)
440 graph->carry_overs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_carry_over_t), 0, 0);
441 ccv_array_push(graph->carry_overs, &carry_over);
442}
443
444int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
445{
446 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 446, __extension__ __PRETTY_FUNCTION__);
}))
;
447 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 447, __extension__ __PRETTY_FUNCTION__);
}))
;
448 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 448, __extension__ __PRETTY_FUNCTION__);
}))
;
449 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 449, __extension__ __PRETTY_FUNCTION__);
}))
;
450 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
451 if (src_info->outgoings == 0)
452 src_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0);
453 else {
454 int i;
455 // Check if this is already connected, if so, skip.
456 for (i = 0; i < src_info->outgoings->rnum; i++)
457 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
458 return -1;
459 }
460 ccv_array_push(src_info->outgoings, &destination.d);
461 graph->topsorted = 0;
462 return 0;
463}
464
465int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
466{
467 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 467, __extension__ __PRETTY_FUNCTION__);
}))
;
468 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 468, __extension__ __PRETTY_FUNCTION__);
}))
;
469 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 469, __extension__ __PRETTY_FUNCTION__);
}))
;
470 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 470, __extension__ __PRETTY_FUNCTION__);
}))
;
471 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
472 if (!src_info->outgoings)
473 return -1;
474 int i, j = -1;
475 // Check if this is already connected, if so, skip.
476 for (i = 0; i < src_info->outgoings->rnum; i++)
477 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
478 {
479 j = i;
480 break;
481 }
482 if (j < 0)
483 return -1;
484 if (j < src_info->outgoings->rnum - 1)
485 *(int*)ccv_array_get(src_info->outgoings, j)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(j)))
= *(int*)ccv_array_get(src_info->outgoings, src_info->outgoings->rnum - 1)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(src_info->outgoings
->rnum - 1)))
;
486 --src_info->outgoings->rnum;
487 ccv_nnc_graph_exec_info_t* dest_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, destination.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(destination.d)))
;
488 if (dest_info->outgoings)
489 for (i = 0; i < dest_info->outgoings->rnum; i++)
490 ccv_array_add_unique_int(src_info->outgoings, *(int*)ccv_array_get(dest_info->outgoings, i)((void*)(((char*)((dest_info->outgoings)->data)) + (size_t
)(dest_info->outgoings)->rsize * (size_t)(i)))
);
491 graph->topsorted = 0;
492 return 0;
493}
494
495int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph)
496{
497 return graph->exec_info ? graph->exec_info->rnum : 0;
498}
499
500void* ccv_nnc_graph_buffer(ccv_nnc_graph_t* const graph, int size)
501{
502 if (graph->buffer_size >= size)
503 return graph->buffer;
504 graph->buffer_size = size;
505 graph->buffer = (graph->buffer) ? ccreallocrealloc(graph->buffer, size) : ccmallocmalloc(size);
506 return graph->buffer;
507}
508
509void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size)
510{
511 assert(exec_cvt_size == graph->exec_info->rnum)((void) sizeof ((exec_cvt_size == graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (exec_cvt_size == graph->exec_info
->rnum) ; else __assert_fail ("exec_cvt_size == graph->exec_info->rnum"
, "ccv_nnc_graph.c", 511, __extension__ __PRETTY_FUNCTION__);
}))
;
512 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 512, __extension__ __PRETTY_FUNCTION__);
}))
;
513 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
}))
;
514 int i, j;
515 for (i = 0; i < exec_cvt_size; i++)
516 exec_cvt[i] = -1;
517 ccv_array_t* exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), graph->exec_info->rnum, 0);
518 // If there are breakpoints, it is more complicated, we first start to the breakpoints, and then continue from the breakpoints to the destinations.
519 if (graph->breakpoint_size)
520 {
521 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, graph->breakpoints, graph->breakpoint_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->sources
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->sources)->data)) + (size_t)(
graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph->breakpoint_size); _i_++) { ((void) sizeof
(((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 521, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph->breakpoints)[_i_].d].d = 1; } for (_i_ = 0; _i_ <
(graph->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } }
else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_q_][_exist_size_[_q_]] = d
; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) =
(_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[(graph->breakpoints)[_i_].d].r == 4)
continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph->
breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_
[(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 521, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(graph->breakpoints)[_i_].d].c > 0) continue; _visit_->
node[_visit_->size].index = (((graph->breakpoints)[_i_]
.d)); _visit_->node[_visit_->size].term = ((_incomings_
[(graph->breakpoints)[_i_].d].d)); ++_visit_->size;; } if
(_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
((_visit_->size <= (graph->exec_info->rnum)) ? 1
: 0), __extension__ ({ if (_visit_->size <= (graph->
exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 521, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
522 for (i = 0; i < graph->breakpoint_size; i++)
523 exec_cvt[graph->breakpoints[i].d] = -2; // Mark this as breakpoints, so we will skip the first round.
524 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
525 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 525, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
526 if (exec_cvt[idx] == -2) // Skip breakpoint.
527 continue;
528 // Loop over node and push to the array.
529 ccv_array_push(exec_info, node);
530 // Go to its sub-graph to fix exec_idx
531 for (i = 0; i < node->graph_ref_size; i++)
532 {
533 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
534 if (graph_ref >= 0)
535 {
536 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
537 sub_graph->exec_idx = exec_info->rnum;
538 }
539 }
540 exec_cvt[idx] = exec_info->rnum - 1;
541 } ccv_nnc_graph_visit_endfor} }
542 ccv_nnc_graph_visit_free(visit);
543 graph->breakpoint_offset = exec_info->rnum;
544 visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } int
_exist_size_[2] = { (graph->breakpoint_size), 0, }; int _p_
= 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 1) continue; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 2) continue; _incomings_[_idx_].r = 2
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges
= _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; }
_edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_
; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++
_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) =
(_i_)); } for (_i_ = 0; _i_ < (graph->destinations->
rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->breakpoint_size); _i_++) { ((void) sizeof ((
(graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 544, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 544, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
545 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
546 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 546, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
547 // Loop over node and push to the array.
548 ccv_array_push(exec_info, node);
549 // Go to its sub-graph to fix exec_idx
550 for (i = 0; i < node->graph_ref_size; i++)
551 {
552 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
553 if (graph_ref >= 0)
554 {
555 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
556 sub_graph->exec_idx = exec_info->rnum;
557 }
558 }
559 exec_cvt[idx] = exec_info->rnum - 1;
560 } ccv_nnc_graph_visit_endfor} }
561 ccv_nnc_graph_visit_free(visit);
562 for (i = 0; i < graph->breakpoint_size; i++)
563 { assert(exec_cvt[graph->breakpoints[i].d] >= 0)((void) sizeof ((exec_cvt[graph->breakpoints[i].d] >= 0
) ? 1 : 0), __extension__ ({ if (exec_cvt[graph->breakpoints
[i].d] >= 0) ; else __assert_fail ("exec_cvt[graph->breakpoints[i].d] >= 0"
, "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__);
}))
; } // All breakpoints should be assigned.
564 } else {
565 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->sources
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->sources)->data)) + (size_t)(
graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->destinations
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->sources->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 565, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
566 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
567 assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ (
{ if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref"
, "ccv_nnc_graph.c", 567, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a pair ref, we cannot fix it up.
568 // Loop over node and push to the array.
569 ccv_array_push(exec_info, node);
570 // Go to its sub-graph to fix exec_idx
571 for (i = 0; i < node->graph_ref_size; i++)
572 {
573 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
574 if (graph_ref >= 0)
575 {
576 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
577 sub_graph->exec_idx = exec_info->rnum;
578 }
579 }
580 exec_cvt[idx] = exec_info->rnum - 1;
581 } ccv_nnc_graph_visit_endfor} }
582 ccv_nnc_graph_visit_free(visit);
583 }
584 assert(graph->exec_info->rnum == exec_info->rnum)((void) sizeof ((graph->exec_info->rnum == exec_info->
rnum) ? 1 : 0), __extension__ ({ if (graph->exec_info->
rnum == exec_info->rnum) ; else __assert_fail ("graph->exec_info->rnum == exec_info->rnum"
, "ccv_nnc_graph.c", 584, __extension__ __PRETTY_FUNCTION__);
}))
;
585 ccv_array_free(graph->exec_info);
586 graph->exec_info = exec_info;
587 for (i = 0; i < graph->sources->rnum; i++)
588 {
589 ccv_nnc_graph_exec_t* const source = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, i)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(i)))
;
590 source->d = exec_cvt[source->d];
591 }
592 for (i = 0; i < graph->destinations->rnum; i++)
593 {
594 ccv_nnc_graph_exec_t* const destination = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
595 destination->d = exec_cvt[destination->d];
596 }
597 // Update all outgoings to reflect the latest.
598 for (i = 0; i < exec_info->rnum; i++)
599 {
600 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(exec_info, i)((void*)(((char*)((exec_info)->data)) + (size_t)(exec_info
)->rsize * (size_t)(i)))
;
601 if (info->outgoings)
602 for (j = 0; j < info->outgoings->rnum; j++)
603 *(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
= exec_cvt[*(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
];
604 }
605 graph->topsorted = 1;
606}
607
608typedef struct {
609 int device_id;
610 int exec_idx;
611 ccv_array_t* signal_set;
612 ccv_array_t* command_set; // The set of command executed in this stream. In case there is a tie (on rank). We will check this.
613} ccv_nnc_stream_data_t;
614
615static void _ccv_nnc_graph_schedule_assign_signals(ccv_array_t* const incoming, ccv_nnc_graph_exec_schedule_t* const node, ccv_array_t* const stream_data, int* const signal_size, ccv_nnc_graph_exec_schedule_t* const exec_info, const int exec_info_size)
616{
617 assert(incoming->rnum > 0)((void) sizeof ((incoming->rnum > 0) ? 1 : 0), __extension__
({ if (incoming->rnum > 0) ; else __assert_fail ("incoming->rnum > 0"
, "ccv_nnc_graph.c", 617, __extension__ __PRETTY_FUNCTION__);
}))
;
618 int i, j, k;
619 int wait_size = 0, max_wait_size = 0;
620 for (i = 0; i < incoming->rnum; i++)
621 {
622 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
623 ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx;
624 assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ?
1 : 0), __extension__ ({ if (incoming_exec_info->stream_size
> 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0"
, "ccv_nnc_graph.c", 624, __extension__ __PRETTY_FUNCTION__);
}))
;
625 max_wait_size += incoming_exec_info->stream_size;
626 }
627 int waits[ccv_max(1, max_wait_size)({ typeof (1) _a = (1); typeof (max_wait_size) _b = (max_wait_size
); (_a > _b) ? _a : _b; })
];
628 assert(node->stream_size > 0)((void) sizeof ((node->stream_size > 0) ? 1 : 0), __extension__
({ if (node->stream_size > 0) ; else __assert_fail ("node->stream_size > 0"
, "ccv_nnc_graph.c", 628, __extension__ __PRETTY_FUNCTION__);
}))
;
629 for (i = 0; i < incoming->rnum; i++)
630 {
631 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
632 assert(incoming_idx < exec_info_size)((void) sizeof ((incoming_idx < exec_info_size) ? 1 : 0), __extension__
({ if (incoming_idx < exec_info_size) ; else __assert_fail
("incoming_idx < exec_info_size", "ccv_nnc_graph.c", 632,
__extension__ __PRETTY_FUNCTION__); }))
;
633 assert(incoming_idx >= 0)((void) sizeof ((incoming_idx >= 0) ? 1 : 0), __extension__
({ if (incoming_idx >= 0) ; else __assert_fail ("incoming_idx >= 0"
, "ccv_nnc_graph.c", 633, __extension__ __PRETTY_FUNCTION__);
}))
;
634 ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx;
635 assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ?
1 : 0), __extension__ ({ if (incoming_exec_info->stream_size
> 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0"
, "ccv_nnc_graph.c", 635, __extension__ __PRETTY_FUNCTION__);
}))
;
636 int stream_synced = 1;
637 // If the current node's stream is a subset of the incoming node's stream, there
638 // is no need to sync with signal, because we are already synced with the incoming.
639 for (j = 0; stream_synced && j < node->stream_size; j++)
640 {
641 const int s = SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[j];
642 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 642
, __extension__ __PRETTY_FUNCTION__); }))
;
643 int flag = 0;
644 for (k = 0; !flag && k < incoming_exec_info->stream_size; k++)
645 flag = (SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_streams : (*incoming_exec_info)._heap_streams)
[k] == s);
646 stream_synced = flag;
647 }
648 if (stream_synced)
649 continue;
650 // Otherwise, find the streams we need to sync with, and create signals for these.
651 for (j = 0; j < incoming_exec_info->stream_size; j++)
652 {
653 const int s = SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_streams : (*incoming_exec_info)._heap_streams)
[j];
654 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 654
, __extension__ __PRETTY_FUNCTION__); }))
;
655 int flag = 0;
656 for (k = 0; !flag && k < node->stream_size; k++)
657 flag = (SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[k] == s);
658 if (!flag) // Need to have a signal.
659 {
660 if (SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j] < 0)
661 SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j] = (*signal_size)++;
662 else {
663 int flag = 0;
664 // If any of the stream the current node has already seen this signal, we are good already.
665 for (k = 0; !flag && k < node->stream_size; k++)
666 {
667 assert(SCHEDULE_STREAMS(*node)[k] >= 0)((void) sizeof ((((*node).stream_size <= 1 ? (*node)._inline_streams
: (*node)._heap_streams)[k] >= 0) ? 1 : 0), __extension__
({ if (((*node).stream_size <= 1 ? (*node)._inline_streams
: (*node)._heap_streams)[k] >= 0) ; else __assert_fail ("SCHEDULE_STREAMS(*node)[k] >= 0"
, "ccv_nnc_graph.c", 667, __extension__ __PRETTY_FUNCTION__);
}))
;
668 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((*node).stream_size <= 1 ? (*node)
._inline_streams : (*node)._heap_streams)[k])))
;
669 flag = (data->signal_set && ccv_array_find_int(data->signal_set, SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j]));
670 }
671 if (flag)
672 continue;
673 }
674 // Otherwise, we need to wait for this. Currently, our granularity is about wait on all streams.
675 waits[wait_size++] = SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j];
676 // All streams on this node have seen this signal.
677 for (k = 0; k < node->stream_size; k++)
678 {
679 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((*node).stream_size <= 1 ? (*node)
._inline_streams : (*node)._heap_streams)[k])))
;
680 if (!data->signal_set)
681 data->signal_set = ccv_array_new(sizeof(int), 0, 0);
682 ccv_array_push(data->signal_set, &SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info
)._inline_signals : (*incoming_exec_info)._heap_signals)
[j]);
683 }
684 }
685 }
686 }
687 node->wait_size = wait_size;
688 if (wait_size > 0)
689 {
690 node->waits = node->waits ? ccreallocrealloc(node->waits, sizeof(int) * wait_size) : ccmallocmalloc(sizeof(int) * wait_size);
691 memcpy(node->waits, waits, sizeof(int) * wait_size);
692 }
693}
694
695typedef struct {
696 int rank;
697 ccv_array_t* outgoings;
698} ccv_nnc_incoming_t;
699
700static int _ccv_nnc_device_ids_for_stream_data(ccv_nnc_graph_exec_info_t* const node, const int device_id, ccv_array_t* const stream_data, int* const device_ids, const int max_device_id_size)
701{
702 // TODO: I need to re-think whether this is GPU only or not.
703 int device_id_size = ccv_nnc_device_ids_for_io(node->inputs, node->input_size, node->outputs, node->output_size, CCV_TENSOR_GPU_MEMORY, device_ids, max_device_id_size);
704 if (device_id_size == 0)
705 {
706 // If there is a default data, use that device id. Otherwise, use the device id passed in (this will be the default data device id).
707 if (stream_data->rnum > 0)
708 {
709 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
710 device_ids[0] = default_data->device_id;
711 } else
712 device_ids[0] = device_id >= 0 ? device_id : 0;
713 device_id_size = 1;
714 }
715 return device_id_size;
716}
717
718void ccv_nnc_graph_static_schedule_free(ccv_nnc_graph_static_schedule_t* const schedule)
719{
720 int i;
721 ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info;
722 for (i = 0; i < schedule->exec_info_size; i++)
723 {
724 if (schd_info[i].stream_size > 1)
725 ccfreefree(schd_info[i]._heap_streams);
726 if (schd_info[i].waits)
727 ccfreefree(schd_info[i].waits);
728 }
729 if (schedule->stream_1s)
730 ccfreefree(schedule->stream_1s);
731 if (schedule->waits)
732 ccfreefree(schedule->waits);
733 if (schedule->psort)
734 ccfreefree(schedule->psort);
735 if (schedule->begin)
736 ccv_nnc_stream_signal_free(schedule->begin);
737 if (schedule->end)
738 ccv_nnc_stream_signal_free(schedule->end);
739 ccfreefree(schedule);
740}
741
742static ccv_nnc_graph_static_schedule_t* _ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int device_id, ccv_nnc_stream_context_t* const stream_context, const ccv_nnc_graph_exec_t* const _sources, const int _source_size, const ccv_nnc_graph_exec_t* const _destinations, const int _destination_size)
743{
744 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 744, __extension__ __PRETTY_FUNCTION__);
}))
;
745 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 745, __extension__ __PRETTY_FUNCTION__);
}))
;
746 assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__
({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted"
, "ccv_nnc_graph.c", 746, __extension__ __PRETTY_FUNCTION__);
}))
; // Only support this on a topsorted graph.
747 const int exec_info_size = graph->exec_info->rnum;
748 assert(exec_info_size > 0)((void) sizeof ((exec_info_size > 0) ? 1 : 0), __extension__
({ if (exec_info_size > 0) ; else __assert_fail ("exec_info_size > 0"
, "ccv_nnc_graph.c", 748, __extension__ __PRETTY_FUNCTION__);
}))
;
749 const ccv_nnc_graph_exec_t* const sources = _sources == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: _sources;
750 const int source_size = _sources == 0 ? graph->sources->rnum : _source_size;
751 if (!_sources)
752 { assert(_source_size == 0)((void) sizeof ((_source_size == 0) ? 1 : 0), __extension__ (
{ if (_source_size == 0) ; else __assert_fail ("_source_size == 0"
, "ccv_nnc_graph.c", 752, __extension__ __PRETTY_FUNCTION__);
}))
; }
753 const ccv_nnc_graph_exec_t* const destinations = _destinations == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: _destinations;
754 const int destination_size = _destinations == 0 ? graph->destinations->rnum : _destination_size;
755 if (!_destinations)
756 { assert(_destination_size == 0)((void) sizeof ((_destination_size == 0) ? 1 : 0), __extension__
({ if (_destination_size == 0) ; else __assert_fail ("_destination_size == 0"
, "ccv_nnc_graph.c", 756, __extension__ __PRETTY_FUNCTION__);
}))
; }
757 const int root_schedule = (_sources == 0 && _destinations == 0);
758 ccv_nnc_graph_static_schedule_t* const schedule = cccalloccalloc(1, sizeof(ccv_nnc_graph_static_schedule_t) + sizeof(ccv_nnc_graph_exec_schedule_t) * (exec_info_size - 1));
759 schedule->exec_info_size = exec_info_size;
760 ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info;
761 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0)))
;
762 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = (exec_info_size + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (
(sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_
[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r == 1) continue; _incomings_[
_idx_].r = 1; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size);
_i_++) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else
__assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_graph.c"
, 762, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r == 2) continue; _incomings_[
_idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0;
_j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0
; _i_ < (source_size); _i_++) { ((void) sizeof (((sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1
; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 3 && _d_ < (destination_size)
) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (exec_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _incomings_[d].r == 3 && _d_ < (destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void
) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 762
, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(destinations
)[_i_].d].r == 4) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(destinations)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if
(_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(destinations)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 762, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(destinations)[_i_].d].c > 0) continue; _visit_->node[
_visit_->size].index = (((destinations)[_i_].d)); _visit_->
node[_visit_->size].term = ((_incomings_[(destinations)[_i_
].d].d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_
); } while (0);; ((void) sizeof ((_visit_->size <= (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size
)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_nnc_graph.c", 762, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
763 if (!root_schedule)
764 {
765 // If this is not a root schedule, we need to do partial topsort.
766 int psort_size = 0;
767 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
768 ++psort_size;
769 } ccv_nnc_graph_visit_endfor} }
770 schedule->psort = (int*)ccmallocmalloc(sizeof(int) * psort_size);
771 schedule->psort_size = psort_size;
772 psort_size = 0;
773 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
774 schedule->psort[psort_size++] = idx;
775 } ccv_nnc_graph_visit_endfor} }
776 }
777 int i, j, k;
778 // Generate exec dependencies (or, in other words, partial ordering of executions).
779 ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(exec_info_size, exec_info_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
780 int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_info_size * 2);
781 int buf_size;
782#define for_block(x, val) \
783 do { \
784 if (((int32_t*)val)[0] > 0) \
785 { \
786 buf[buf_size * 2] = x; \
787 buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \
788 ++buf_size; \
789 } \
790 } while (0)
791 for (i = 0; i < exec_info_size; i++)
792 schd_info[i].stream_size = -1;
793 ccv_nnc_graph_visit_for(visit, exec_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_info)) const node __attribute__((unused
)) = (exec_info) + idx;
{
794 buf_size = 0; /* save all its parent deps to this buffer */
795 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
796 schd_info[idx].stream_size = 0;
797 if (vector)
798 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
799 if (!node->outgoings)
800 continue;
801 for (i = 0; i < node->outgoings->rnum; i++)
802 {
803 int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
804 const int32_t one = 1;
805 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx);
806 /* If not found, set, if the current node is the destination node, no need
807 * set itself as parent of subsequent nodes because its terminal nature. */
808 if (!term && (!cell.i32 || cell.i32[0] == 0))
809 ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one);
810 for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */
811 {
812 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]);
813 /* If not found, set */
814 if (!cell.i32 || cell.i32[0] == 0)
815 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]);
816 else {
817 /* Otherwise, set to the longest one */
818 int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2
+ 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; })
;
819 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep);
820 }
821 }
822 }
823 } ccv_nnc_graph_visit_endfor} }
824#undef for_block
825 ccfreefree(buf);
826 // Algorithm to allocate signals and streams for this graph.
827 ccv_array_t* const stream_data = ccv_array_new(sizeof(ccv_nnc_stream_data_t), 0, 0);
828 ccv_array_t** const outgoings = cccalloccalloc(exec_info_size, sizeof(ccv_array_t*));
829 ccv_nnc_incoming_t* const incomings = cccalloccalloc(exec_info_size, sizeof(ccv_nnc_incoming_t));
830 int max_device_id_size = 1;
831 // Filter out outgoing nodes that we will be able to access it afterwards anyway.
832 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
833 max_device_id_size = ccv_max(node->input_size + node->output_size, max_device_id_size)({ typeof (node->input_size + node->output_size) _a = (
node->input_size + node->output_size); typeof (max_device_id_size
) _b = (max_device_id_size); (_a > _b) ? _a : _b; })
;
834 if (node->outgoings)
835 {
836 outgoings[idx] = ccv_array_new(sizeof(int), 0, 0);
837 for (i = 0; i < node->outgoings->rnum; i++)
838 {
839 const int di = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
840 // Skip if we haven't accessed this exec.
841 if (schd_info[di].stream_size < 0)
842 continue;
843 int flag = 0;
844 for (j = 0; !flag && j < node->outgoings->rnum; j++)
845 {
846 if (j != i)
847 {
848 const int dj = *(int*)ccv_array_get(node->outgoings, j)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(j)))
;
849 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, di, dj);
850 flag = (cell.i32 && cell.i32[0]);
851 }
852 }
853 if (!flag)
854 {
855 ccv_array_push(outgoings[idx], &di);
856 if (!incomings[di].outgoings)
857 incomings[di].outgoings = ccv_array_new(sizeof(int), 1, 0);
858 ccv_array_push(incomings[di].outgoings, &idx);
859 }
860 }
861 }
862 } ccv_nnc_graph_visit_endfor} }
863#define visitor(node, idx, _) \
864 if (node->outgoings) \
865 for (i = 0; i < node->outgoings->rnum; i++) \
866 { \
867 const int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
; \
868 node->rank = ccv_max(incomings[d].rank + 1, node->rank)({ typeof (incomings[d].rank + 1) _a = (incomings[d].rank + 1
); typeof (node->rank) _b = (node->rank); (_a > _b) ?
_a : _b; })
; \
869 }
870 CCV_NNC_GRAPH_VISIT(graph, incomings, exec_info_size, destinations, destination_size, sources, source_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_
+= ((incomings)[_i_].outgoings) ? (incomings)[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (exec_info_size + _incoming_edges_
> 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_)
_incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2
+ _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_info_size
) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size
)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size
)), (int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size
), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size)
; for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void)
sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 870
, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] =
(destinations)[_i_].d; } int _exist_size_[2] = { (destination_size
), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 1) continue; _incomings_[_idx_].r = 1
; if ((incomings)[_idx_].outgoings) for (_j_ = 0; _j_ < (incomings
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((incomings)[_idx_].outgoings)->data)) + (
size_t)((incomings)[_idx_].outgoings)->rsize * (size_t)(_j_
))); ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d
; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_
) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++
) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 :
0), __extension__ ({ if ((destinations)[_i_].graph == graph)
; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c"
, 870, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 2) continue
; _incomings_[_idx_].r = 2; if ((incomings)[_idx_].outgoings)
for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((incomings
)[_idx_].outgoings)->data)) + (size_t)((incomings)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; _exists_[
_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) =
(_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(source_size); _i_++) { ((void) sizeof (((sources)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph
== graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] =
(source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (
_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ =
0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ =
_exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue;
_incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges > 0
) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(source_size); _i_++) { ((void) sizeof (((sources)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph
== graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(sources)[_i_].d].d = 1; } for (_i_ = 0; _i_
< (destination_size); _i_++) { ((void) sizeof (((destinations
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (destinations)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (destination_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor(((incomings) + _idx_
), (_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d
) { ++_d_; _incomings_[_idx_].r = 4; } if ((incomings)[_idx_]
.outgoings) { if ((incomings)[_idx_].outgoings->rnum == 1)
{ const int d = *(int*)((void*)(((char*)(((incomings)[_idx_]
.outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(source_size)) { _exists_[_p_][_i_] = d; continue; } } else for
(_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum; _j_
++) { const int d = *(int*)((void*)(((char*)(((incomings)[_idx_
].outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(source_size)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_
[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void
) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == graph) ; else __assert_fail (
"(sources)[_i_].graph == graph", "ccv_nnc_graph.c", 870, __extension__
__PRETTY_FUNCTION__); })); if (_incomings_[(sources)[_i_].d]
.r == 4) continue; if (!(0)) { ((void) sizeof ((_incomings_[(
sources)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_
[(sources)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(sources)[_i_].d].c == 0"
, "ccv_nnc_graph.c", 870, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[(sources)[_i_].d].c > 0) continue
; visitor(((incomings) + (sources)[_i_].d), ((sources)[_i_].d
), (_incomings_[(sources)[_i_].d].d)); } if (_heap_mem_) free
(_incomings_); } while (0);
;
871#undef visitor
872 int device_ids[max_device_id_size];
873 int outgoing_device_ids[max_device_id_size];
874 int signal_size = 0;
875 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
876 // Go through the incomings.
877 const int device_id_size = _ccv_nnc_device_ids_for_stream_data(node, device_id, stream_data, device_ids, max_device_id_size);
878 if (schd_info[idx].stream_size == 0)
879 {
880 schd_info[idx].stream_size = device_id_size; // At least at the same size as the device_id_size.
881 if (device_id_size > 1)
882 {
883 schd_info[idx]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * device_id_size * 2);
884 schd_info[idx]._heap_signals = (schd_info[idx]._heap_streams + device_id_size);
885 }
886 for (i = 0; i < device_id_size; i++)
887 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = -1, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] = -1;
888 }
889 for (i = 0; i < device_id_size; i++)
890 // Go through until the end to assign streams.
891 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] < 0)
892 {
893 int stream_idx = -1;
894 int stream_has_command = 0;
895 // First, find a good stream in stream data (the stream is good if it can be recycled, and it has the same command).
896 // Otherwise, we prefer a usable stream (it doesn't have the command, but it can be recycled).
897 for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++)
898 {
899 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(j)))
;
900 if (data->device_id == device_ids[i])
901 {
902 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, data->exec_idx);
903 // If there is a path to conclude that exec_idx is before idx, then we can reuse
904 // this stream. Otherwise the work in this "empty stream" could still be ongoing,
905 // and we may delay the following work unnecessarily.
906 if (cell.i32 && cell.i32[0] > 0)
907 {
908 if (ccv_array_find_uint(data->command_set, node->cmd.cmd))
909 stream_idx = j, stream_has_command = 1;
910 else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet.
911 stream_idx = j;
912 }
913 }
914 }
915 if (stream_idx < 0)
916 {
917 stream_idx = stream_data->rnum;
918 const ccv_nnc_stream_data_t data = {
919 .device_id = device_ids[i],
920 };
921 ccv_array_push(stream_data, &data);
922 }
923 assert(stream_idx >= 0)((void) sizeof ((stream_idx >= 0) ? 1 : 0), __extension__ (
{ if (stream_idx >= 0) ; else __assert_fail ("stream_idx >= 0"
, "ccv_nnc_graph.c", 923, __extension__ __PRETTY_FUNCTION__);
}))
;
924 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
925 if (!data->command_set)
926 data->command_set = ccv_array_new(sizeof(uint32_t), 1, 0);
927 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = stream_idx;
928 ccv_array_add_unique_uint(data->command_set, node->cmd.cmd);
929 // Assign all subsequent node to use this stream.
930 int outgoing_idx = idx;
931 while (outgoings[outgoing_idx] && outgoings[outgoing_idx]->rnum)
932 {
933 int highest_rank = -1;
934 int highest_idx = -1;
935 int stream_n = -1;
936 int stream_has_command = 0;
937 for (j = 0; j < outgoings[outgoing_idx]->rnum; j++)
938 {
939 const int d = *(int*)ccv_array_get(outgoings[outgoing_idx], j)((void*)(((char*)((outgoings[outgoing_idx])->data)) + (size_t
)(outgoings[outgoing_idx])->rsize * (size_t)(j)))
;
940 // This is not outside of our scope at this point.
941 assert(schd_info[d].stream_size >= 0)((void) sizeof ((schd_info[d].stream_size >= 0) ? 1 : 0), __extension__
({ if (schd_info[d].stream_size >= 0) ; else __assert_fail
("schd_info[d].stream_size >= 0", "ccv_nnc_graph.c", 941,
__extension__ __PRETTY_FUNCTION__); }))
;
942 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + d;
943 const int outgoing_device_id_size = _ccv_nnc_device_ids_for_stream_data(outgoing_node, device_id, stream_data, outgoing_device_ids, max_device_id_size);
944 if (schd_info[d].stream_size == 0)
945 {
946 schd_info[d].stream_size = outgoing_device_id_size; // At least at the same size as the device_id_size.
947 if (outgoing_device_id_size > 1)
948 {
949 schd_info[d]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * outgoing_device_id_size * 2);
950 schd_info[d]._heap_signals = (schd_info[d]._heap_streams + outgoing_device_id_size);
951 }
952 for (k = 0; k < outgoing_device_id_size; k++)
953 SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams
: (schd_info[d])._heap_streams)
[k] = -1, SCHEDULE_SIGNALS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_signals
: (schd_info[d])._heap_signals)
[k] = -1;
954 }
955 assert(schd_info[d].stream_size == outgoing_device_id_size)((void) sizeof ((schd_info[d].stream_size == outgoing_device_id_size
) ? 1 : 0), __extension__ ({ if (schd_info[d].stream_size == outgoing_device_id_size
) ; else __assert_fail ("schd_info[d].stream_size == outgoing_device_id_size"
, "ccv_nnc_graph.c", 955, __extension__ __PRETTY_FUNCTION__);
}))
;
956 for (k = 0; k < outgoing_device_id_size; k++)
957 // If it should be on the same device and the stream is not assign, potentially.
958 if (outgoing_device_ids[k] == device_ids[i] &&
959 SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams
: (schd_info[d])._heap_streams)
[k] < 0 &&
960 (incomings[d].rank > highest_rank ||
961 (incomings[d].rank == highest_rank &&
962 !stream_has_command && ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd))))
963 {
964 highest_rank = incomings[d].rank;
965 highest_idx = d;
966 stream_n = k;
967 // This is 1 if rank is the same (thus, I must break the tie already), if the rank is not the same, we need to compute this.
968 stream_has_command = (incomings[d].rank == highest_rank || ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd));
969 }
970 }
971 if (highest_idx >= 0)
972 {
973 outgoing_idx = highest_idx;
974 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + outgoing_idx;
975 assert(stream_n >= 0)((void) sizeof ((stream_n >= 0) ? 1 : 0), __extension__ ({
if (stream_n >= 0) ; else __assert_fail ("stream_n >= 0"
, "ccv_nnc_graph.c", 975, __extension__ __PRETTY_FUNCTION__);
}))
;
976 SCHEDULE_STREAMS(schd_info[outgoing_idx])((schd_info[outgoing_idx]).stream_size <= 1 ? (schd_info[outgoing_idx
])._inline_streams : (schd_info[outgoing_idx])._heap_streams)
[stream_n] = stream_idx;
977 ccv_array_add_unique_uint(data->command_set, outgoing_node->cmd.cmd);
978 } else
979 break;
980 }
981 data->exec_idx = outgoing_idx;
982 }
983 } ccv_nnc_graph_visit_endfor} }
984 // Go through to assign signals when necessary.
985 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
986 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
987 _ccv_nnc_graph_schedule_assign_signals(incomings[idx].outgoings, schd_info + idx, stream_data, &signal_size, schd_info, exec_info_size);
988 } ccv_nnc_graph_visit_endfor} }
989 for (i = 0; i < exec_info_size; i++)
990 if (outgoings[i])
991 ccv_array_free(outgoings[i]);
992 ccfreefree(outgoings);
993 ccv_matrix_free(exec_dep);
994 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
995 if (device_id >= 0)
996 {
997 // If the default stream (stream 0) is not the same as desired stream, swap with the one that is.
998 if (default_data->device_id != device_id)
999 {
1000 int exchange_stream_idx = -1;
1001 // Find the stream idx to exchange.
1002 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1003 int flag = 0;
1004 for(i = 0; !flag && i < schd_info[idx].stream_size; i++)
1005 {
1006 const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i];
1007 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
1008 if (data->device_id == device_id)
1009 {
1010 exchange_stream_idx = stream_idx;
1011 flag = 1;
1012 }
1013 }
1014 if (flag)
1015 break;
1016 } ccv_nnc_graph_visit_endfor} }
1017 assert(exchange_stream_idx >= 0)((void) sizeof ((exchange_stream_idx >= 0) ? 1 : 0), __extension__
({ if (exchange_stream_idx >= 0) ; else __assert_fail ("exchange_stream_idx >= 0"
, "ccv_nnc_graph.c", 1017, __extension__ __PRETTY_FUNCTION__)
; }))
;
1018 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1019 for (i = 0; i < schd_info[idx].stream_size; i++)
1020 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == 0)
1021 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = -1;
1022 } ccv_nnc_graph_visit_endfor} }
1023 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1024 for (i = 0; i < schd_info[idx].stream_size; i++)
1025 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == exchange_stream_idx)
1026 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = 0;
1027 } ccv_nnc_graph_visit_endfor} }
1028 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1029 for (i = 0; i < schd_info[idx].stream_size; i++)
1030 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] == -1)
1031 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = exchange_stream_idx;
1032 } ccv_nnc_graph_visit_endfor} }
1033 ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, exchange_stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(exchange_stream_idx)))
)->device_id = default_data->device_id;
1034 default_data->device_id = device_id;
1035 }
1036 }
1037 int graph_stream_1_size = 0;
1038 for (i = 0; i < source_size; i++)
1039 {
1040 const int idx = sources[i].d;
1041 // If it has incoming nodes, check whether these are on stream 0.
1042 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
1043 {
1044 int flag = 0;
1045 const ccv_array_t* const incoming = incomings[idx].outgoings;
1046 for (j = 0; !flag && j < incoming->rnum; j++)
1047 {
1048 const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(j)))
;
1049 for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++)
1050 flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx
])._inline_streams : (schd_info[incoming_idx])._heap_streams)
[k] == 0); // If this is the default stream, we already have a good start.
1051 }
1052 if (flag)
1053 continue;
1054 }
1055 for (j = 0; j < schd_info[idx].stream_size; j++)
1056 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this is not the default stream, we need explicit begin signal to start.
1057 ++graph_stream_1_size;
1058 }
1059 if (graph_stream_1_size > 0)
1060 {
1061 schedule->stream_1s = ccmallocmalloc(sizeof(int) * graph_stream_1_size);
1062 graph_stream_1_size = 0;
1063 for (i = 0; i < source_size; i++)
1064 {
1065 const int idx = sources[i].d;
1066 // If it has incoming nodes, check whether these are on stream 0.
1067 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
1068 {
1069 int flag = 0;
1070 const ccv_array_t* const incoming = incomings[idx].outgoings;
1071 for (j = 0; !flag && j < incoming->rnum; j++)
1072 {
1073 const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(j)))
;
1074 for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++)
1075 flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx
])._inline_streams : (schd_info[incoming_idx])._heap_streams)
[k] == 0); // If this is the default stream, we already have a good start.
1076 }
1077 if (flag)
1078 continue;
1079 }
1080 for (j = 0; j < schd_info[idx].stream_size; j++)
1081 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this is not the default stream, we need explicit begin signal to start.
1082 {
1083 const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j];
1084 int flag = 0;
1085 for (k = 0; !flag && k < graph_stream_1_size; k++)
1086 flag = (stream_idx == schedule->stream_1s[k]);
1087 if (!flag)
1088 schedule->stream_1s[graph_stream_1_size++] = stream_idx;
1089 }
1090 }
1091 schedule->stream_1_size = graph_stream_1_size;
1092 }
1093 for (i = 0; i < exec_info_size; i++)
1094 if (incomings[i].outgoings)
1095 ccv_array_free(incomings[i].outgoings);
1096 ccfreefree(incomings);
1097 int graph_wait_size = 0;
1098 for (i = 0; i < destination_size; i++)
1099 {
1100 const int idx = destinations[i].d;
1101 for (j = 0; j < schd_info[idx].stream_size; j++)
1102 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
1103 ++graph_wait_size;
1104 }
1105 if (graph_wait_size > 0)
1106 {
1107 schedule->waits = ccmallocmalloc(sizeof(int) * graph_wait_size);
1108 graph_wait_size = 0;
1109 for (i = 0; i < destination_size; i++)
1110 {
1111 const int idx = destinations[i].d;
1112 for (j = 0; j < schd_info[idx].stream_size; j++)
1113 if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
1114 {
1115 ccv_nnc_stream_data_t* const default_stream_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
1116 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j] < 0)
1117 SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j] = signal_size++;
1118 else if (default_stream_data->signal_set && ccv_array_find_int(default_stream_data->signal_set, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j]))
1119 continue;
1120 schedule->waits[graph_wait_size++] = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[j];
1121 }
1122 }
1123 schedule->wait_size = graph_wait_size;
1124 }
1125 for (i = 0; i < stream_data->rnum; i++)
1126 {
1127 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1128 if (data->signal_set)
1129 ccv_array_free(data->signal_set);
1130 assert(data->command_set)((void) sizeof ((data->command_set) ? 1 : 0), __extension__
({ if (data->command_set) ; else __assert_fail ("data->command_set"
, "ccv_nnc_graph.c", 1130, __extension__ __PRETTY_FUNCTION__)
; }))
;
1131 ccv_array_free(data->command_set);
1132 }
1133 // Allocate streams & signals
1134 int default_stream_type = stream_type;
1135 CCV_STREAM_SET_DEVICE_ID(default_stream_type, default_data->device_id)(default_stream_type) = (((default_stream_type) & ~0xfff00
) | (((default_data->device_id) & 0xfff) << 8))
;
1136 if (root_schedule)
1137 {
1138 assert(!graph->streams)((void) sizeof ((!graph->streams) ? 1 : 0), __extension__ (
{ if (!graph->streams) ; else __assert_fail ("!graph->streams"
, "ccv_nnc_graph.c", 1138, __extension__ __PRETTY_FUNCTION__)
; }))
;
1139 graph->stream_size = stream_data->rnum;
1140 graph->streams = (ccv_nnc_stream_context_t**)ccmallocmalloc(sizeof(ccv_nnc_stream_context_t*) * graph->stream_size);
1141 graph->block_stream_tasks = (co_routine_t**)cccalloccalloc(graph->stream_size, sizeof(co_routine_t*));
1142 if (stream_context)
1143 graph->streams[0] = stream_context;
1144 for (i = (stream_context ? 1 : 0); i < stream_data->rnum; i++)
1145 {
1146 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1147 int type = stream_type;
1148 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1149 graph->streams[i] = ccv_nnc_stream_context_new(type);
1150 }
1151 graph->signal_size = signal_size;
1152 graph->signals = (ccv_nnc_stream_signal_t**)cccalloccalloc(signal_size, sizeof(ccv_nnc_stream_signal_t*));
1153 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1154 for (i = 0; i < schd_info[idx].stream_size; i++)
1155 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1156 {
1157 const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i];
1158 if (!graph->signals[signal])
1159 {
1160 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ?
(schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams
)[i])))
;
1161 int type = stream_type;
1162 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1163 graph->signals[signal] = ccv_nnc_stream_signal_new(type);
1164 }
1165 }
1166 } ccv_nnc_graph_visit_endfor} }
1167 } else {
1168 assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ (
{ if (graph->streams) ; else __assert_fail ("graph->streams"
, "ccv_nnc_graph.c", 1168, __extension__ __PRETTY_FUNCTION__)
; }))
;
1169 assert(graph->stream_size >= stream_data->rnum)((void) sizeof ((graph->stream_size >= stream_data->
rnum) ? 1 : 0), __extension__ ({ if (graph->stream_size >=
stream_data->rnum) ; else __assert_fail ("graph->stream_size >= stream_data->rnum"
, "ccv_nnc_graph.c", 1169, __extension__ __PRETTY_FUNCTION__)
; }))
;
1170 // Find streams to proper allocated stream based on the type we need.
1171 int* const stream_idxs = (int*)ccmallocmalloc(sizeof(int) * (stream_data->rnum + signal_size));
1172 uint64_t* const stream_used = (uint64_t*)cccalloccalloc(((graph->stream_size + 63) >> 6) + ((graph->signal_size + 63) >> 6), sizeof(uint64_t));
1173 for (i = 0; i < stream_data->rnum; i++)
1174 {
1175 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1176 int type = stream_type;
1177 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1178 for (j = 0; j < graph->stream_size; j++)
1179 if (!(stream_used[j >> 6] & ((uint64_t)1 << (j & 63))))
1180 {
1181 const int stream_type = ccv_nnc_stream_context_type(graph->streams[j]);
1182 if (stream_type == type)
1183 {
1184 stream_idxs[i] = j;
1185 stream_used[j >> 6] |= ((uint64_t)1 << (j & 63));
1186 break;
1187 }
1188 }
1189 }
1190 assert(graph->signal_size >= signal_size)((void) sizeof ((graph->signal_size >= signal_size) ? 1
: 0), __extension__ ({ if (graph->signal_size >= signal_size
) ; else __assert_fail ("graph->signal_size >= signal_size"
, "ccv_nnc_graph.c", 1190, __extension__ __PRETTY_FUNCTION__)
; }))
;
1191 // Find signals to proper allocated signal based on the type we need.
1192 int* const signal_idxs = stream_idxs + stream_data->rnum;
1193 uint64_t* const signal_used = stream_used + ((graph->stream_size + 63) >> 6);
1194 for (i = 0; i < signal_size; i++)
1195 signal_idxs[i] = -1;
1196 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1197 for (i = 0; i < schd_info[idx].stream_size; i++)
1198 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1199 {
1200 const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i];
1201 if (signal_idxs[signal] < 0)
1202 {
1203 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ?
(schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams
)[i])))
;
1204 int type = stream_type;
1205 CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1206 for (j = 0; j < graph->signal_size; j++)
1207 if (!(signal_used[j >> 6] & ((uint64_t)1 << (j & 63))))
1208 {
1209 const int signal_type = ccv_nnc_stream_signal_type(graph->signals[j]);
1210 if (signal_type == type)
1211 {
1212 signal_idxs[signal] = j;
1213 signal_used[j >> 6] |= ((uint64_t)1 << (j & 63));
1214 break;
1215 }
1216 }
1217 }
1218 }
1219 } ccv_nnc_graph_visit_endfor} }
1220 // Now rebind streams and signals from the schedule.
1221 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1222 for (i = 0; i < schd_info[idx].stream_size; i++)
1223 {
1224 SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i] = stream_idxs[SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams
: (schd_info[idx])._heap_streams)
[i]];
1225 if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] >= 0)
1226 SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i] = signal_idxs[SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals
: (schd_info[idx])._heap_signals)
[i]];
1227 }
1228 for (i = 0; i < schd_info[idx].wait_size; i++)
1229 schd_info[idx].waits[i] = signal_idxs[schd_info[idx].waits[i]];
1230 } ccv_nnc_graph_visit_endfor} }
1231 for (i = 0; i < schedule->stream_1_size; i++)
1232 schedule->stream_1s[i] = stream_idxs[schedule->stream_1s[i]];
1233 for (i = 0; i < schedule->wait_size; i++)
1234 schedule->waits[i] = signal_idxs[schedule->waits[i]];
1235 // Rebind who is the stream 0 (default stream).
1236 schedule->stream_0 = stream_idxs[0];
1237 ccfreefree(stream_used);
1238 ccfreefree(stream_idxs);
1239 }
1240 assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ (
{ if (graph->streams) ; else __assert_fail ("graph->streams"
, "ccv_nnc_graph.c", 1240, __extension__ __PRETTY_FUNCTION__)
; }))
;
1241 ccv_nnc_graph_visit_free(visit);
1242 for (i = 0; i < signal_size; i++)
1243 { assert(graph->signals[i])((void) sizeof ((graph->signals[i]) ? 1 : 0), __extension__
({ if (graph->signals[i]) ; else __assert_fail ("graph->signals[i]"
, "ccv_nnc_graph.c", 1243, __extension__ __PRETTY_FUNCTION__)
; }))
; }
1244 if (schedule->stream_1_size)
1245 schedule->begin = ccv_nnc_stream_signal_new(default_stream_type);
1246 schedule->end = ccv_nnc_stream_signal_new(default_stream_type);
1247 // Do this recursively for its sub graphs.
1248 if (graph->sub_graphs)
1249 for (i = 0; i < graph->sub_graphs->rnum; i++)
1250 {
1251 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
;
1252 if (sub_graph && !sub_graph->default_schedule)
1253 {
1254 const int exec_idx = sub_graph->exec_idx - 1;
1255 assert(schd_info[exec_idx].stream_size == 1)((void) sizeof ((schd_info[exec_idx].stream_size == 1) ? 1 : 0
), __extension__ ({ if (schd_info[exec_idx].stream_size == 1)
; else __assert_fail ("schd_info[exec_idx].stream_size == 1"
, "ccv_nnc_graph.c", 1255, __extension__ __PRETTY_FUNCTION__)
; }))
;
1256 const int stream_idx = SCHEDULE_STREAMS(schd_info[exec_idx])((schd_info[exec_idx]).stream_size <= 1 ? (schd_info[exec_idx
])._inline_streams : (schd_info[exec_idx])._heap_streams)
[0];
1257 const int device_id = ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
)->device_id;
1258 sub_graph->default_schedule = _ccv_nnc_graph_static_schedule_new(sub_graph, stream_type, device_id, graph->streams[stream_idx], 0, 0, 0, 0);
1259 }
1260 }
1261 ccv_array_free(stream_data);
1262 return schedule;
1263}
1264void ccv_nnc_graph_set_default_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type)
1265{
1266 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1266, __extension__ __PRETTY_FUNCTION__)
; }))
;
1267 if (graph->default_schedule)
1268 ccv_nnc_graph_static_schedule_free(graph->default_schedule);
1269 graph->default_schedule = _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, 0, 0, 0, 0, 0);
1270}
1271
1272ccv_nnc_graph_static_schedule_t* ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1273{
1274 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1274, __extension__ __PRETTY_FUNCTION__)
; }))
;
1275 return _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, 0, sources, source_size, destinations, destination_size);
1276}
1277
1278ccv_nnc_stream_context_t* ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph)
1279{
1280 if (graph->streams && graph->stream_size > 0)
1281 return graph->streams[0];
1282 return 0;
1283}
1284
1285static void _ccv_nnc_graph_dot_exec(const int index, const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, ccv_nnc_stream_context_t** const streams, const int flags, FILE* out)
1286{
1287 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1288 fputc('{', out);
1289 fprintf(out, "node%d", index);
1290 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1291 {
1292 fputs("|Command: ", out);
1293 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1294 if (schd_info)
1295 {
1296 if (schd_info->stream_size > 0)
1297 {
1298 int i, flag = 0;
1299 fputs("|Stream: ", out);
1300 for (i = 0; i < schd_info->stream_size; i++)
1301 {
1302 const int device_id = streams ? CCV_TENSOR_GET_DEVICE_ID(streams[SCHEDULE_STREAMS(*schd_info)[i]]->type)(((streams[((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)[i]]->type) & 0xfff00) >>
8)
: 0;
1303 if (i == 0)
1304 fprintf(out, "%d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)
[i], device_id);
1305 else
1306 fprintf(out, ", %d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams
: (*schd_info)._heap_streams)
[i], device_id);
1307 }
1308 for (i = 0; i < schd_info->stream_size; i++)
1309 if (SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i] >= 0)
1310 {
1311 if (!flag)
1312 {
1313 flag = 1;
1314 fprintf(out, "|Signal: %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i]);
1315 } else
1316 fprintf(out, ", %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals
: (*schd_info)._heap_signals)
[i]);
1317 }
1318 }
1319 if (schd_info->wait_size > 0)
1320 {
1321 fputs("|Wait: ", out);
1322 int i;
1323 for (i = 0; i < schd_info->wait_size - 1; i++)
1324 fprintf(out, "%d, ", schd_info->waits[i]);
1325 fprintf(out, "%d", schd_info->waits[schd_info->wait_size - 1]);
1326 }
1327 }
1328 fputc('}', out);
1329 }
1330}
1331
1332static void _ccv_nnc_graph_dot_tensor(const int index, const ccv_nnc_tensor_t* const tensor, const int zone, const int flags, const int depth, FILE* out)
1333{
1334 // if it has an alias pointer, or, it is a long form.
1335 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1336 fputc('{', out);
1337 const int is_tensor_view = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW);
1338 if (is_tensor_view)
1339 fprintf(out, "tensorview%d", index);
1340 else
1341 fprintf(out, "tensor%d", index);
1342 int i;
1343 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1344 fputc('\'', out);
1345 if (CCV_GET_TAPE_ALLOC(tensor->type)((tensor->type) & CCV_TAPE_ALLOC))
1346 fputs(" (t)", out);
1347 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1348 {
1349 const int device_id = CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8);
1350 fprintf(out, "|d%d|zone%d", device_id, zone);
1351 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1352 fputc('\'', out);
1353 uintptr_t aptr = (uintptr_t)tensor->data.u8;
1354 const int* ainc = is_tensor_view ? ((ccv_nnc_tensor_view_t*)(tensor))->inc : tensor->info.dim;
1355 // For the last one, we don't extend to full ainc.
1356 size_t ainc_size = (ccv_nnc_dimension_count(ainc) - ainc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1357 // Print out the range as well.
1358 fprintf(out, "|{%#010x|%#010x}|%d", (uint32_t)aptr, (uint32_t)(aptr + ainc_size - 1), tensor->info.dim[0]);
1359 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor->info.dim[i]; i++)
1360 fprintf(out, "x%d", tensor->info.dim[i]);
1361 fputc('}', out);
1362 }
1363}
1364
1365typedef struct {
1366 int index;
1367 int name;
1368 int zone;
1369 uintptr_t tensor_ref;
1370 uintptr_t start_ptr;
1371 uintptr_t end_ptr;
1372} ccv_nnc_tensor_dot_t;
1373
1374typedef struct {
1375 ccv_nnc_tensor_dot_t* dots;
1376 int* remap;
1377 int* rename_zone;
1378 int* rename_index;
1379} ccv_nnc_tensor_dot_recovery_t;
1380
1381// First sort by start_ptr, then sort by tensor ptr (so that we will have the same tensor sorted to one cluster).
1382#define less_than(i1, i2, aux) ((i1).start_ptr < (i2).start_ptr || ((i1).start_ptr == (i2).start_ptr && (i1).tensor_ref < (i2).tensor_ref))
1383static CCV_IMPLEMENT_QSORT(_ccv_nnc_tensor_dot_sort_by_ptr, ccv_nnc_tensor_dot_t, less_than)void _ccv_nnc_tensor_dot_sort_by_ptr(ccv_nnc_tensor_dot_t *array
, size_t total, int aux) { int isort_thresh = 7; ccv_nnc_tensor_dot_t
t; int sp = 0; struct { ccv_nnc_tensor_dot_t *lb; ccv_nnc_tensor_dot_t
*ub; } stack[48]; if( total <= 1 ) return; stack[0].lb = array
; stack[0].ub = array + (total - 1); while( sp >= 0 ) { ccv_nnc_tensor_dot_t
* left = stack[sp].lb; ccv_nnc_tensor_dot_t* right = stack[sp
--].ub; for(;;) { int i, n = (int)(right - left) + 1, m; ccv_nnc_tensor_dot_t
* ptr; ccv_nnc_tensor_dot_t* ptr2; if( n <= isort_thresh )
{ insert_sort: for( ptr = left + 1; ptr <= right; ptr++ )
{ for( ptr2 = ptr; ptr2 > left && less_than(ptr2[
0],ptr2[-1], aux); ptr2--) (((t)) = ((ptr2[0])), ((ptr2[0])) =
((ptr2[-1])), ((ptr2[-1])) = ((t))); } break; } else { ccv_nnc_tensor_dot_t
* left0; ccv_nnc_tensor_dot_t* left1; ccv_nnc_tensor_dot_t* right0
; ccv_nnc_tensor_dot_t* right1; ccv_nnc_tensor_dot_t* pivot; ccv_nnc_tensor_dot_t
* a; ccv_nnc_tensor_dot_t* b; ccv_nnc_tensor_dot_t* c; int swap_cnt
= 0; left0 = left; right0 = right; pivot = left + (n/2); if(
n > 40 ) { int d = n / 8; a = left, b = left + d, c = left
+ 2*d; left = less_than(*a, *b, aux) ? (less_than(*b, *c, aux
) ? b : (less_than(*a, *c, aux) ? c : a)) : (less_than(*c, *b
, aux) ? b : (less_than(*a, *c, aux) ? a : c)); a = pivot - d
, b = pivot, c = pivot + d; pivot = less_than(*a, *b, aux) ? (
less_than(*b, *c, aux) ? b : (less_than(*a, *c, aux) ? c : a)
) : (less_than(*c, *b, aux) ? b : (less_than(*a, *c, aux) ? a
: c)); a = right - 2*d, b = right - d, c = right; right = less_than
(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than(*a, *
c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than(
*a, *c, aux) ? a : c)); } a = left, b = pivot, c = right; pivot
= less_than(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than
(*a, *c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than
(*a, *c, aux) ? a : c)); if( pivot != left0 ) { (((t)) = ((*pivot
)), ((*pivot)) = ((*left0)), ((*left0)) = ((t))); pivot = left0
; } left = left1 = left0 + 1; right = right1 = right0; for(;;
) { while( left <= right && !less_than(*pivot, *left
, aux) ) { if( !less_than(*left, *pivot, aux) ) { if( left >
left1 ) (((t)) = ((*left1)), ((*left1)) = ((*left)), ((*left
)) = ((t))); swap_cnt = 1; left1++; } left++; } while( left <=
right && !less_than(*right, *pivot, aux) ) { if( !less_than
(*pivot, *right, aux) ) { if( right < right1 ) (((t)) = ((
*right1)), ((*right1)) = ((*right)), ((*right)) = ((t))); swap_cnt
= 1; right1--; } right--; } if( left > right ) break; (((
t)) = ((*left)), ((*left)) = ((*right)), ((*right)) = ((t)));
swap_cnt = 1; left++; right--; } if( swap_cnt == 0 ) { left =
left0, right = right0; goto insert_sort; } n = ({ typeof ((int
)(left1 - left0)) _a = ((int)(left1 - left0)); typeof ((int)(
left - left1)) _b = ((int)(left - left1)); (_a < _b) ? _a :
_b; }); for( i = 0; i < n; i++ ) (((t)) = ((left0[i])), (
(left0[i])) = ((left[i-n])), ((left[i-n])) = ((t))); n = ({ typeof
((int)(right0 - right1)) _a = ((int)(right0 - right1)); typeof
((int)(right1 - right)) _b = ((int)(right1 - right)); (_a <
_b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left
[i])), ((left[i])) = ((right0[i-n+1])), ((right0[i-n+1])) = (
(t))); n = (int)(left - left1); m = (int)(right1 - right); if
( n > 1 ) { if( m > 1 ) { if( n > m ) { stack[++sp].
lb = left0; stack[sp].ub = left0 + n - 1; left = right0 - m +
1, right = right0; } else { stack[++sp].lb = right0 - m + 1;
stack[sp].ub = right0; left = left0, right = left0 + n - 1; }
} else left = left0, right = left0 + n - 1; } else if( m >
1 ) left = right0 - m + 1, right = right0; else break; } } }
}
1384#undef less_than
1385
1386static int _ccv_nnc_graph_dot_tensor_multiview_count(const ccv_nnc_tensor_multiview_t* const mv)
1387{
1388 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
1389 return 1;
1390 const int count = mv->kind + mv->repeat;
1391 int i, c = 0;
1392 for (i = 0; i < count; i++)
1393 c += _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
1394 return c;
1395}
1396
1397static void _ccv_nnc_graph_dot_tensor_multiview_tensor_dots(const ccv_nnc_tensor_multiview_t* const mv, ccv_nnc_tensor_dot_t* const tensor_dots, int* tensor_index)
1398{
1399 const int count = mv->kind + mv->repeat;
1400 int i;
1401 for (i = 0; i < count; i++)
1402 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1403 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], tensor_dots, tensor_index);
1404 else {
1405 tensor_dots[*tensor_index].name = *tensor_index;
1406 tensor_dots[*tensor_index].start_ptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1407 // Because tv's pointer will get updated, it is not correct in this case to have one tensor_ref.
1408 tensor_dots[*tensor_index].tensor_ref = tensor_dots[*tensor_index].start_ptr;
1409 const size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1410 tensor_dots[*tensor_index].end_ptr = tensor_dots[*tensor_index].start_ptr + dim_size - 1;
1411 ++(*tensor_index);
1412 }
1413}
1414
1415static ccv_nnc_tensor_dot_recovery_t _ccv_nnc_graph_tensor_dot_recovery(const ccv_nnc_graph_t* const graph)
1416{
1417 int i, j;
1418 // Recover tensor relationships for all tensors referenced in the graph.
1419 // Most notably, we have to give these indexes, and find if they point to
1420 // the same memory region, and whether they overlap. These information
1421 // are lost since we converted from symbolic form to the execution form.
1422 // and here we do our best to recover because that is easier to understand
1423 // if we want to present the graph visually (also, we don't want to put this
1424 // information into the tensor or execution graph to avoid overhead, thus,
1425 // recovering is the best we can do).
1426 int tensor_count = 0;
1427 for (i = 0; i < graph->exec_info->rnum; i++)
50
Assuming 'i' is < field 'rnum'
51
Loop condition is true. Entering loop body
62
Assuming 'i' is >= field 'rnum'
1428 {
1429 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1430 for (j = 0; j < exec_info->input_size; j++)
52
Assuming 'j' is >= field 'input_size'
53
Loop condition is false. Execution continues on line 1433
1431 if (exec_info->inputs[j])
1432 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[j])((*(int*)(exec_info->inputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->inputs[j]) : 1;
1433 for (j = 0; j < exec_info->output_size; j++)
54
Assuming 'j' is < field 'output_size'
55
Loop condition is true. Entering loop body
60
Assuming 'j' is >= field 'output_size'
61
Loop condition is false. Execution continues on line 1427
1434 if (exec_info->outputs[j])
56
Assuming the condition is true
57
Taking true branch
1435 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[j])((*(int*)(exec_info->outputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->outputs[j]) : 1;
58
Assuming the condition is true
59
'?' condition is true
1436 }
1437 ccv_nnc_tensor_dot_t* tensor_dots = tensor_count > 0 ? (ccv_nnc_tensor_dot_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_dot_t) * tensor_count) : 0;
63
Loop condition is false. Execution continues on line 1437
64
Assuming 'tensor_count' is <= 0
65
'?' condition is false
66
'tensor_dots' initialized to a null pointer value
1438 int k = 0;
1439 for (i = 0; i < graph->exec_info->rnum; i++)
67
Loop condition is true. Entering loop body
73
Loop condition is false. Execution continues on line 1477
1440 {
1441 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1442 for (j = 0; j
67.1
'j' is >= field 'input_size'
< exec_info->input_size; j++)
68
Loop condition is false. Execution continues on line 1459
1443 {
1444 ccv_nnc_tensor_t* tensor = exec_info->inputs[j];
1445 if (!tensor)
1446 continue;
1447 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1448 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1449 else {
1450 tensor_dots[k].name = k;
1451 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1452 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1453 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1454 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1455 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1456 ++k;
1457 }
1458 }
1459 for (j = 0; j < exec_info->output_size; j++)
69
Loop condition is true. Entering loop body
72
Loop condition is false. Execution continues on line 1439
1460 {
1461 ccv_nnc_tensor_t* tensor = exec_info->outputs[j];
1462 if (!tensor
69.1
'tensor' is non-null
)
70
Taking false branch
1463 continue;
1464 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
71
Taking true branch
1465 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1466 else {
1467 tensor_dots[k].name = k;
1468 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1469 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1470 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1471 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1472 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1473 ++k;
1474 }
1475 }
1476 }
1477 tensor_count = k; // We may over count, now shrink.
1478 // To group overlap memory into one zone, we sort it by start ptr first (secondary by the tensor pointer).
1479 _ccv_nnc_tensor_dot_sort_by_ptr(tensor_dots, tensor_count, 0);
1480 int index = 0, zone = 0;
1481 uintptr_t tensor_ref = tensor_count > 0 ? tensor_dots[0].tensor_ref : 0;
74
Assuming 'tensor_count' is > 0
75
'?' condition is true
76
Dereference of null pointer
1482 uintptr_t end_ptr = tensor_count > 0 ? tensor_dots[0].end_ptr : 0;
1483 // Then, it is trivial, we go by end ptr. If the next start ptr is still within the end ptr (start ptr <= end ptr),
1484 // they are the same zone.
1485 for (i = 0; i < tensor_count; i++)
1486 {
1487 if (tensor_dots[i].tensor_ref != tensor_ref)
1488 {
1489 tensor_ref = tensor_dots[i].tensor_ref;
1490 ++index;
1491 }
1492 if (tensor_dots[i].start_ptr > end_ptr)
1493 {
1494 end_ptr = ccv_max(end_ptr, tensor_dots[i].end_ptr)({ typeof (end_ptr) _a = (end_ptr); typeof (tensor_dots[i].end_ptr
) _b = (tensor_dots[i].end_ptr); (_a > _b) ? _a : _b; })
;
1495 ++zone;
1496 }
1497 tensor_dots[i].index = index;
1498 tensor_dots[i].zone = zone;
1499 }
1500 // We already have index and zone assigned, but the problem is that these are not very human interpretable (because
1501 // it follows the pointer from low to high, not the tensor creation order). The following code renamed both the index
1502 // and the zone so that it is much more understandable.
1503 const int index_count = index + 1;
1504 const int zone_count = zone + 1;
1505 int* remap = (int*)ccmallocmalloc(sizeof(int) * (tensor_count + index_count + zone_count));
1506 int* rename_index = remap + tensor_count;
1507 int* rename_zone = rename_index + index_count;
1508 for (i = 0; i < tensor_count; i++)
1509 remap[tensor_dots[i].name] = i;
1510 for (i = 0; i < index_count; i++)
1511 rename_index[i] = -1;
1512 for (i = 0; i < zone_count; i++)
1513 rename_zone[i] = -1;
1514 index = 0;
1515 zone = 0;
1516 for (i = 0; i < tensor_count; i++)
1517 {
1518 ccv_nnc_tensor_dot_t* tensor_dot = tensor_dots + remap[i];
1519 if (rename_index[tensor_dot->index] == -1)
1520 rename_index[tensor_dot->index] = index++;
1521 if (rename_zone[tensor_dot->zone] == -1)
1522 rename_zone[tensor_dot->zone] = zone++;
1523 }
1524 ccv_nnc_tensor_dot_recovery_t recovery = {
1525 .dots = tensor_dots,
1526 .remap = remap,
1527 .rename_index = rename_index,
1528 .rename_zone = rename_zone,
1529 };
1530 return recovery;
1531}
1532
1533static void _ccv_nnc_graph_tensor_dot_recovery_free(const ccv_nnc_tensor_dot_recovery_t recovery)
1534{
1535 ccfreefree(recovery.dots);
1536 ccfreefree(recovery.remap);
1537}
1538
1539static void _ccv_nnc_graph_dot_tensor_multiview_one(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int depth, int* tensor_index, FILE* out)
1540{
1541 const int count = mv->kind + mv->repeat;
1542 int i, j;
1543 fputs("|{", out);
1544 for (i = 0; i < count; i++)
1545 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1546 {
1547 fprintf(out, "{%d", i);
1548 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1549 fputc('*', out); // Denotes that we loop on this.
1550 _ccv_nnc_graph_dot_tensor_multiview_one((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], recovery, depth, tensor_index, out);
1551 if (i == count - 1)
1552 fputc('}', out);
1553 else
1554 fputs("}|", out);
1555 } else {
1556 fprintf(out, "{%d", i);
1557 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1558 fputc('*', out); // Denotes that we loop on this.
1559 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1560 fprintf(out, "|zone%d", recovery.rename_zone[tensor_dot->zone]);
1561 for (j = 0; j < depth; j++)
1562 fputc('\'', out);
1563 uintptr_t aptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1564 // For the last one, we don't extend to full ainc.
1565 size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1566 // Print out the range as well.
1567 fprintf(out, "|{%#010x|%#010x}", (uint32_t)aptr, (uint32_t)(aptr + dim_size - 1));
1568 ++(*tensor_index);
1569 if (i == count - 1)
1570 fputc('}', out);
1571 else
1572 fputs("}|", out);
1573 }
1574 fputc('}', out);
1575}
1576
1577static void _ccv_nnc_graph_dot_tensor_multiview(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, int* tensor_index, FILE* out)
1578{
1579 // if it has an alias pointer, or, it is a long form.
1580 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1581 fputc('{', out);
1582 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1583 fprintf(out, "multiview%d", recovery.rename_index[tensor_dot->index]);
1584 int i;
1585 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1586 fputc('\'', out);
1587 if (CCV_GET_TAPE_ALLOC(mv->type)((mv->type) & CCV_TAPE_ALLOC))
1588 fputs(" (t)", out);
1589 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1590 {
1591 _ccv_nnc_graph_dot_tensor_multiview_one(mv, recovery, depth, tensor_index, out);
1592 const ccv_nnc_tensor_t* root = (ccv_nnc_tensor_t*)mv;
1593 while (CCV_IS_TENSOR_MULTIVIEW(root)((*(int*)(root)) & CCV_TENSOR_MULTIVIEW))
1594 root = CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)root)(((ccv_nnc_tensor_multiview_t*)root)->_heap_data ? ((ccv_nnc_tensor_multiview_t
*)root)->_heap_data : ((ccv_nnc_tensor_multiview_t*)root)->
_inline_data)
[0];
1595 fprintf(out, "|%d", root->info.dim[0]);
1596 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && root->info.dim[i]; i++)
1597 fprintf(out, "x%d", root->info.dim[i]);
1598 fputc('}', out);
1599 } else
1600 *tensor_index += _ccv_nnc_graph_dot_tensor_multiview_count(mv);
1601}
1602
1603static void _ccv_nnc_graph_dot_node(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int exec_index, ccv_nnc_stream_context_t** const streams, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* const tensor_index)
1604{
1605 fprintf(out, "node%d [shape=record,label=\"", exec_index);
1606 _ccv_nnc_graph_dot_exec(exec_index, exec_info, schd_info, streams, flags, out);
1607 int i;
1608 int k = *tensor_index;
1609 if (exec_info->input_size > 0)
1610 {
1611 fputs("|{Input", out);
1612 for (i = 0; i < exec_info->input_size; i++)
1613 if (exec_info->inputs[i])
1614 {
1615 fputc('|', out);
1616 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1617 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1618 else {
1619 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1620 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1621 ++k;
1622 }
1623 } else
1624 fputs("|-", out);
1625 fputc('}', out);
1626 }
1627 if (exec_info->output_size > 0)
1628 {
1629 fputs("|{Output", out);
1630 for (i = 0; i < exec_info->output_size; i++)
1631 if (exec_info->outputs[i])
1632 {
1633 fputc('|', out);
1634 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1635 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1636 else {
1637 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1638 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1639 ++k;
1640 }
1641 } else
1642 fputs("|-", out);
1643 fputc('}', out);
1644 }
1645 fputs("\"];\n", out);
1646 *tensor_index = k;
1647}
1648
1649static void _ccv_nnc_graph_dot_while_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const ccv_nnc_graph_t* const while_graph, const int flags, const int depth, FILE* out, int* tensor_index)
1650{
1651 int i;
1652 fprintf(out, "label=<<b>while%d </b>Command: ", exec_index);
1653 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1654 fputs(">;\n", out);
1655 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1656 int k = *tensor_index;
1657 if (exec_info->input_size > 0)
1658 {
1659 fputs("{Input|{", out);
1660 for (i = 0; i < exec_info->input_size; i++)
1661 {
1662 if (i > 0)
1663 fputc('|', out);
1664 if (exec_info->inputs[i])
1665 {
1666 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1667 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1668 else {
1669 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1670 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1671 ++k;
1672 }
1673 } else
1674 fputc('-', out);
1675 }
1676 fputs("}}", out);
1677 }
1678 if (exec_info->output_size > 0)
1679 {
1680 if (exec_info->input_size > 0)
1681 fputs("|", out);
1682 fputs("{Output|{", out);
1683 for (i = 0; i < exec_info->output_size; i++)
1684 {
1685 if (i > 0)
1686 fputc('|', out);
1687 if (exec_info->outputs[i])
1688 {
1689 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1690 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1691 else {
1692 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1693 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1694 ++k;
1695 }
1696 } else
1697 fputc('-', out);
1698 }
1699 fputs("}}", out);
1700 }
1701 fputs("}\"];\n", out);
1702 *tensor_index = k;
1703}
1704
1705static void _ccv_nnc_graph_dot_case_of_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* tensor_index)
1706{
1707 int i;
1708 fprintf(out, "label=<<b>caseof%d </b>Command: ", exec_index);
1709 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1710 fputs(">;\n", out);
1711 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1712 int k = *tensor_index;
1713 if (exec_info->input_size > 0)
1714 {
1715 fputs("{Input|{", out);
1716 for (i = 0; i < exec_info->input_size; i++)
1717 {
1718 if (i > 0)
1719 fputc('|', out);
1720 if (exec_info->inputs[i])
1721 {
1722 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1723 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1724 else {
1725 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1726 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1727 ++k;
1728 }
1729 } else
1730 fputc('-', out);
1731 }
1732 fputs("}}", out);
1733 }
1734 if (exec_info->output_size > 0)
1735 {
1736 if (exec_info->input_size > 0)
1737 fputs("|", out);
1738 fputs("{Output|{", out);
1739 for (i = 0; i < exec_info->output_size; i++)
1740 {
1741 if (i > 0)
1742 fputc('|', out);
1743 if (exec_info->outputs[i])
1744 {
1745 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1746 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1747 else {
1748 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1749 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1750 ++k;
1751 }
1752 } else
1753 fputc('-', out);
1754 }
1755 fputs("}}", out);
1756 }
1757 fputs("}\"];\n", out);
1758 *tensor_index = k;
1759}
1760
1761static void _ccv_nnc_graph_dot_sub_graphs(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_tensor_dot_recovery_t p_recovery, const ccv_array_t* const sub_graphs, const int flags, const int depth, FILE* out, int* tensor_index, int* exec_index)
1762{
1763 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
7
Assuming the condition is false
8
Taking false branch
21
Assuming the condition is false
22
Taking false branch
35
Assuming the condition is false
36
Taking false branch
1764 {
1765 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1766 const ccv_nnc_graph_t* const while_graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[0] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0]
- 1)))
;
1767 // Output this node info within this subgraph.
1768 _ccv_nnc_graph_dot_while_label(exec_info, *exec_index, p_recovery, while_graph, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1769 } else if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
9
Assuming the condition is false
10
Taking false branch
23
Assuming the condition is false
24
Taking false branch
37
Assuming the condition is false
38
Taking false branch
1770 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1771 _ccv_nnc_graph_dot_case_of_label(exec_info, *exec_index, p_recovery, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1772 }
1773 ++(*exec_index);
1774 int p;
1775 for (p = 0; p < exec_info->graph_ref_size; p++)
11
Assuming 'p' is < field 'graph_ref_size'
12
Loop condition is true. Entering loop body
25
Assuming 'p' is < field 'graph_ref_size'
26
Loop condition is true. Entering loop body
39
Assuming 'p' is < field 'graph_ref_size'
40
Loop condition is true. Entering loop body
45
Assuming 'p' is < field 'graph_ref_size'
46
Loop condition is true. Entering loop body
1776 {
1777 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
13
Taking false branch
27
Taking false branch
41
Taking false branch
47
Taking false branch
1778 {
1779 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=\"\"\n", *exec_index, *exec_index);
1780 ++(*exec_index);
1781 }
1782 const ccv_nnc_graph_t* const graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[p] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[p]
- 1)))
;
14
'?' condition is false
28
'?' condition is false
42
'?' condition is false
48
'?' condition is false
1783 const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule;
1784 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
49
Calling '_ccv_nnc_graph_tensor_dot_recovery'
1785 int i, j;
1786 int k = 0;
1787 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1788 // Output styles.
1789 for (i = 0; i
42.1
'i' is >= field 'rnum'
< graph->exec_info->rnum; i++)
15
Loop condition is true. Entering loop body
29
Loop condition is true. Entering loop body
43
Loop condition is false. Execution continues on line 1803
1790 {
1791 node_id[i] = *exec_index;
1792 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1793 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0]
)
16
Assuming field '_heap_graph_ref' is null
17
'?' condition is false
18
Assuming the condition is true
19
Taking true branch
30
Assuming field '_heap_graph_ref' is null
31
'?' condition is false
32
Assuming the condition is true
33
Taking true branch
1794 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, depth + 1, out, &k, exec_index);
20
Calling '_ccv_nnc_graph_dot_sub_graphs'
34
Calling '_ccv_nnc_graph_dot_sub_graphs'
1795 else {
1796 _ccv_nnc_graph_dot_node(exec_info,
1797 schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0,
1798 *exec_index, graph->streams, recovery, flags, depth, out, &k);
1799 ++(*exec_index);
1800 }
1801 }
1802 // Output connections.
1803 for (i = 0; i
43.1
'i' is >= field 'rnum'
< graph->exec_info->rnum; i++)
44
Loop condition is false. Execution continues on line 1822
1804 {
1805 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1806 if (exec_info->outgoings)
1807 for (j = 0; j < exec_info->outgoings->rnum; j++)
1808 {
1809 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1810 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1811 // If both are sub-graphs, have both tail and head specified.
1812 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1813 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1814 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1815 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1816 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1817 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1818 else
1819 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1820 }
1821 }
1822 fputs("}\n", out);
1823 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1824 ccfreefree(node_id);
1825 }
1826 // Extra subgraph cluster.
1827 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1828 fputs("}\n", out);
1829}
1830
1831void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out)
1832{
1833 fputs("digraph G {\ncompound=true;\n", out);
1834 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1835 int i, j;
1836 int k = 0, c = 0;
1837 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1838 const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule;
1839 // Output styles.
1840 for (i = 0; i < graph->exec_info->rnum; i++)
1
Loop condition is true. Entering loop body
1841 {
1842 node_id[i] = c;
1843 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1844 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0]
)
2
Assuming field '_heap_graph_ref' is null
3
'?' condition is false
4
Assuming the condition is true
5
Taking true branch
1845 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, 1, out, &k, &c);
6
Calling '_ccv_nnc_graph_dot_sub_graphs'
1846 else {
1847 _ccv_nnc_graph_dot_node(exec_info,
1848 schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0,
1849 c, graph->streams, recovery, flags, 0, out, &k);
1850 ++c;
1851 }
1852 }
1853 // Output connections.
1854 for (i = 0; i < graph->exec_info->rnum; i++)
1855 {
1856 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1857 if (exec_info->outgoings)
1858 for (j = 0; j < exec_info->outgoings->rnum; j++)
1859 {
1860 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1861 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1862 // If both are sub-graphs, have both tail and head specified.
1863 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1864 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1865 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1866 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1867 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1868 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1869 else
1870 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1871 }
1872 }
1873 fputs("}\n", out);
1874 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1875 ccfreefree(node_id);
1876}
1877
1878void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1879{
1880 // exec current node, for synchronous CPU execution, no stream unit.
1881 int i;
1882#define visitor(node, idx, ...) \
1883 do { \
1884 if (node->cmd.cmd == CCV_NNC_NOOP) \
1885 continue; \
1886 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) \
1887 for (i = 0; i < node->graph_ref_size; i++) \
1888 { \
1889 ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[i] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref
? (node)->_heap_graph_ref : (node)->_inline_graph_ref)
[i] - 1)))
; \
1890 ccv_nnc_graph_autotune(sub_graph, max_workspace_size, flags, 0, 0, 0, 0); \
1891 } \
1892 else { \
1893 /* Need to unwrap these tensors */ \
1894 for (i = 0; i < node->input_size + node->output_size; i++) \
1895 if (node->inputs[i] && CCV_IS_TENSOR_MULTIVIEW(node->inputs[i])((*(int*)(node->inputs[i])) & CCV_TENSOR_MULTIVIEW)) \
1896 node->inputs[i] = _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)node->inputs[i]); \
1897 PRINT(CCV_CLI_VERBOSE, "%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node
->cmd.cmd), idx, node->input_size, node->output_size
); fflush(stdout); } } while (0)
; \
1898 for (i = 0; i < node->input_size; i++) \
1899 PRINT(CCV_CLI_VERBOSE, "|-> %d. %p (%p)\n", i + 1, node->inputs[i], (node->inputs[i] ? node->inputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-> %d. %p (%p)\n", i + 1, node->inputs[i], (
node->inputs[i] ? node->inputs[i]->data.u8 : 0)); fflush
(stdout); } } while (0)
; \
1900 for (i = 0; i < node->output_size; i++) \
1901 PRINT(CCV_CLI_VERBOSE, "|<- %d. %p (%p)\n", i + 1, node->outputs[i], (node->outputs[i] ? node->outputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|<- %d. %p (%p)\n", i + 1, node->outputs[i],
(node->outputs[i] ? node->outputs[i]->data.u8 : 0))
; fflush(stdout); } } while (0)
; \
1902 node->cmd = ccv_nnc_cmd_autotune(node->cmd, max_workspace_size, node->hint, flags, node->inputs, node->input_size, node->outputs, node->output_size, 0); \
1903 } \
1904 } while (0)
1905 const ccv_nnc_graph_exec_t* const graph_sources = sources ? sources : (graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0);
1906 const int graph_source_size = source_size ? source_size : (graph->sources ? graph->sources->rnum : 0);
1907 const ccv_nnc_graph_exec_t* const graph_destinations = destinations ? destinations : (graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0);
1908 const int graph_destination_size = destination_size ? destination_size : (graph->destinations ? graph->destinations->rnum : 0);
1909 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++
) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph
) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } int _exist_size_
[2] = { (graph_source_size), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size
); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph ==
graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _exist_size_
[0] = (graph_source_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size
); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_
].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_
[0] = (graph_destination_size); _exist_size_[1] = 0; _p_ = 0,
_q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph.c"
, 1909, __extension__ __PRETTY_FUNCTION__); })); _incomings_[
(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
graph_source_size); _i_++) { ((void) sizeof (((graph_sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources
)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (graph_source_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (
_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++
_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); if (_incomings_[(graph_destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph.c", 1909, __extension__ __PRETTY_FUNCTION__)
; })); } else if (_incomings_[(graph_destinations)[_i_].d].c >
0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void*)(
((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0)))) + (graph_destinations)[
_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
1910#undef visitor
1911}
1912
1913void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph)
1914{
1915 int i, j;
1916 for (i = 0; i < graph->exec_info->rnum; i++)
1917 {
1918 ccv_nnc_graph_exec_info_t *info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1919 if (info->_heap_graph_ref)
1920 ccfreefree(info->_heap_graph_ref);
1921 ccv_array_t* outgoings = info->outgoings;
1922 if (outgoings)
1923 ccv_array_free(outgoings);
1924 // We allocate inputs & outputs in continuous fashion, therefore, only need to free the input array.
1925 if (info->inputs)
1926 ccfreefree(info->inputs);
1927 if (info->input_flags)
1928 ccfreefree(info->input_flags);
1929 if (info->updates)
1930 ccfreefree(info->updates);
1931 if ((info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) && info->p_while.inputs)
1932 ccfreefree(info->p_while.inputs);
1933 }
1934 if (graph->tensor_wraps)
1935 {
1936 for (i = 0; i < graph->tensor_wraps->rnum; i++)
1937 {
1938 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, i)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(i)))
;
1939 if (tensor_wrap_array)
1940 {
1941 for (j = 0; j < tensor_wrap_array->size; j++)
1942 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[j]);
1943 ccfreefree(tensor_wrap_array);
1944 }
1945 }
1946 ccv_array_free(graph->tensor_wraps);
1947 }
1948 if (graph->tensor_wraps_refs)
1949 ccv_array_free(graph->tensor_wraps_refs);
1950 if (graph->breakpoints)
1951 ccfreefree(graph->breakpoints);
1952 if (graph->sources)
1953 ccv_array_free(graph->sources);
1954 if (graph->destinations)
1955 ccv_array_free(graph->destinations);
1956 if (graph->default_schedule)
1957 ccv_nnc_graph_static_schedule_free(graph->default_schedule);
1958 if (graph->streams)
1959 {
1960 // If the graph has parent graph, the default stream is allocated by the parent graph, we need to skip.
1961 if (!graph->p)
1962 ccv_nnc_stream_context_free(graph->streams[0]);
1963 for (i = 1; i < graph->stream_size; i++)
1964 ccv_nnc_stream_context_free(graph->streams[i]);
1965 ccfreefree(graph->streams);
1966 }
1967 if (graph->block_stream_tasks)
1968 ccfreefree(graph->block_stream_tasks);
1969 if (graph->signals)
1970 {
1971 for (i = 0; i < graph->signal_size; i++)
1972 ccv_nnc_stream_signal_free(graph->signals[i]);
1973 ccfreefree(graph->signals);
1974 }
1975 if (graph->carry_overs)
1976 {
1977 for (i = 0; i < graph->carry_overs->rnum; i++)
1978 {
1979 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t
)(graph->carry_overs)->rsize * (size_t)(i)))
;
1980 _ccv_nnc_graph_tensor_wrap_free(carry_over->from);
1981 _ccv_nnc_graph_tensor_wrap_free(carry_over->to);
1982 }
1983 ccv_array_free(graph->carry_overs);
1984 }
1985 if (graph->sub_graphs)
1986 {
1987 for (i = 0; i < graph->sub_graphs->rnum; i++)
1988 ccv_nnc_graph_free(*(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
);
1989 ccv_array_free(graph->sub_graphs);
1990 }
1991 ccv_array_free(graph->exec_info);
1992 if (graph->buffer)
1993 ccfreefree(graph->buffer);
1994 ccfreefree(graph);
1995}