Bug Summary

File:nnc/ccv_nnc_graph.c
Warning:line 1695, column 2
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_graph.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model static -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -target-feature +sse2 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -resource-dir /usr/local/lib/clang/8.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_UCONTEXT -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -I /usr/local/include -internal-isystem /usr/local/include -internal-isystem /usr/local/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir /home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fmessage-length 0 -fblocks -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -o /home/liu/buildslave/public_html/analyze/2019-07-03-215927-77989-1 -x c ccv_nnc_graph.c -faddrsig
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_graph.h"
6
7#pragma mark - Level-2 API
8
9ccv_nnc_graph_t* ccv_nnc_graph_new(void)
10{
11 ccv_nnc_graph_t* graph = (ccv_nnc_graph_t*)cccalloccalloc(1, sizeof(ccv_nnc_graph_t));
12 graph->exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), 5, 0);
13 return graph;
14}
15
16void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size)
17{
18 if (!graph->sources)
19 graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), source_size, 0);
20 else
21 ccv_array_clear(graph->sources);
22 int i;
23 for (i = 0; i < source_size; i++)
24 ccv_array_push(graph->sources, sources + i);
25 graph->topsorted = 0;
26}
27
28ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph)
29{
30 return graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0;
31}
32
33int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph)
34{
35 return graph->sources ? graph->sources->rnum : 0;
36}
37
38void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
39{
40 if (!graph->destinations)
41 graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), destination_size, 0);
42 else
43 ccv_array_clear(graph->sources);
44 int i;
45 for (i = 0; i < destination_size; i++)
46 ccv_array_push(graph->destinations, destinations + i);
47 graph->topsorted = 0;
48}
49
50ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph)
51{
52 return graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0;
53}
54
55int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph)
56{
57 return graph->destinations ? graph->destinations->rnum : 0;
58}
59
60void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd)
61{
62 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 62, __extension__ __PRETTY_FUNCTION__); }
))
;
63 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 63, __extension__ __PRETTY_FUNCTION__); }
))
;
64 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
65 exec_info->cmd = cmd;
66}
67
68void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint)
69{
70 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 70, __extension__ __PRETTY_FUNCTION__); }
))
;
71 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 71, __extension__ __PRETTY_FUNCTION__); }
))
;
72 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
73 exec_info->hint = hint;
74}
75
76static int _ccv_nnc_tensor_multiview_level_count(const ccv_nnc_tensor_multiview_t* const mv)
77{
78 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
79 return 1;
80 const int count = mv->kind + mv->repeat;
81 int i, c = 0;
82 for (i = 0; i < count; i++)
83 {
84 ccv_nnc_tensor_t* tv = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i];
85 if (tv == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)))
86 c = ccv_max(c, 1)({ typeof (c) _a = (c); typeof (1) _b = (1); (_a > _b) ? _a
: _b; })
;
87 else
88 c = ccv_max(c, _ccv_nnc_tensor_multiview_level_count((ccv_nnc_tensor_multiview_t*)tv))({ typeof (c) _a = (c); typeof (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)) _b = (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)); (_a > _b) ? _a : _b; }
)
;
89 }
90 return c + 1;
91}
92
93static ccv_nnc_graph_tensor_wrap_t* _ccv_nnc_graph_tensor_wrap_new(const ccv_nnc_tensor_multiview_t* const mv)
94{
95 const int level_count = _ccv_nnc_tensor_multiview_level_count(mv);
96 ccv_nnc_graph_tensor_wrap_t* tensor_wrap = (ccv_nnc_graph_tensor_wrap_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_tensor_wrap_t) + sizeof(ccv_nnc_tensor_t*) * (level_count - 1));
97 tensor_wrap->update_required = 0;
98 tensor_wrap->count = level_count;
99 tensor_wrap->index = 0;
100 tensor_wrap->tensors[0] = (ccv_nnc_tensor_t*)mv;
101 return tensor_wrap;
102}
103
104static void _ccv_nnc_graph_exec_rewind(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
105{
106 if (!info->tensor_wraps_ref)
107 return;
108 int i;
109 assert(info->tensor_wraps_ref <= graph->tensor_wraps->rnum)((void) sizeof ((info->tensor_wraps_ref <= graph->tensor_wraps
->rnum) ? 1 : 0), __extension__ ({ if (info->tensor_wraps_ref
<= graph->tensor_wraps->rnum) ; else __assert_fail (
"info->tensor_wraps_ref <= graph->tensor_wraps->rnum"
, "ccv_nnc_graph.c", 109, __extension__ __PRETTY_FUNCTION__);
}))
;
110 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;;
111 // Rewind from tensor wraps.
112 for (i = 0; i < info->input_size; i++)
113 if (tensor_wrap_array->tensor_wraps[i])
114 info->inputs[i] = tensor_wrap_array->tensor_wraps[i]->tensors[0];
115 const int d = info->input_size;
116 for (i = 0; i < info->output_size; i++)
117 if (tensor_wrap_array->tensor_wraps[d + i])
118 info->outputs[i] = tensor_wrap_array->tensor_wraps[d + i]->tensors[0];
119 const int dd = info->input_size + info->output_size;
120 for (i = 0; i < info->update_size; i++)
121 if (tensor_wrap_array->tensor_wraps[dd + i])
122 info->updates[i] = tensor_wrap_array->tensor_wraps[dd + i]->tensors[0];
123}
124
125static void _ccv_nnc_graph_tensor_wrap_free(ccv_nnc_graph_tensor_wrap_t* const tensor_wrap)
126{
127 ccfreefree(tensor_wrap);
128}
129
130ccv_nnc_graph_tensor_wrap_array_t* ccv_nnc_get_tensor_wrap_array(ccv_nnc_graph_t* const graph, const int tensor_wrap_size, int* const tensor_wraps_ref)
131{
132 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = *tensor_wraps_ref ? (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, *tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(*tensor_wraps_ref
- 1)))
: 0;
133 // Otherwise, find an open slot.
134 if (!tensor_wrap_array_ref)
135 {
136 if (!graph->tensor_wraps)
137 graph->tensor_wraps = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wrap_array_t*), 0, 0);
138 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = 0;
139 ccv_array_push(graph->tensor_wraps, &tensor_wrap_array);
140 tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, graph->tensor_wraps->rnum - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(graph->tensor_wraps
->rnum - 1)))
;
141 *tensor_wraps_ref = graph->tensor_wraps->rnum;
142 }
143 int i;
144 if (*tensor_wrap_array_ref)
145 {
146 if ((*tensor_wrap_array_ref)->size != tensor_wrap_size)
147 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)ccreallocrealloc(*tensor_wrap_array_ref, sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1));
148 for (i = (*tensor_wrap_array_ref)->size; i < tensor_wrap_size; i++)
149 (*tensor_wrap_array_ref)->tensor_wraps[i] = 0;
150 } else
151 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)cccalloccalloc(sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1), 1);
152 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
153 tensor_wrap_array->size = tensor_wrap_size;
154 return tensor_wrap_array;
155}
156
157void ccv_nnc_set_tensor_wraps(ccv_nnc_graph_tensor_wrap_t** const tensor_wraps, ccv_nnc_tensor_t* const* const tensors, const int tensor_size)
158{
159 int i;
160 for (i = 0; i < tensor_size; i++)
161 if (tensors[i])
162 {
163 if (CCV_IS_TENSOR_MULTIVIEW(tensors[i])((*(int*)(tensors[i])) & CCV_TENSOR_MULTIVIEW) &&
164 ((ccv_nnc_tensor_multiview_t*)tensors[i])->anchor != CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1)
165 {
166 if (!tensor_wraps[i] || tensors[i] != tensor_wraps[i]->tensors[0])
167 {
168 if (tensor_wraps[i])
169 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
170 tensor_wraps[i] = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)tensors[i]);
171 }
172 } else {
173 if (tensor_wraps[i])
174 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
175 tensor_wraps[i] = 0;
176 }
177 }
178}
179
180void ccv_nnc_graph_register_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
181{
182 ccv_nnc_graph_t* p = graph;
183 const ccv_nnc_graph_tensor_wraps_ref_t tensor_wraps_ref = {
184 .d = tensor_wraps_ref_d,
185 .graph = graph,
186 };
187 do {
188 if (!p->tensor_wraps_refs)
189 {
190 p->tensor_wraps_refs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wraps_ref_t), 0, 0);
191 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
192 } else {
193 int i;
194 int has_tensor_wraps_ref = 0;
195 for (i = 0; !has_tensor_wraps_ref && i < p->tensor_wraps_refs->rnum; i++)
196 {
197 ccv_nnc_graph_tensor_wraps_ref_t* tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
198 has_tensor_wraps_ref = (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph);
199 }
200 if (!has_tensor_wraps_ref)
201 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
202 }
203 p = p->p;
204 } while (p);
205}
206
207static void _ccv_nnc_graph_redo_tensor_wraps(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
208{
209 int i;
210 const int has_wrap = ccv_nnc_tensors_have_wraps(info->inputs, info->input_size) ||
211 ccv_nnc_tensors_have_wraps(info->outputs, info->output_size) ||
212 ccv_nnc_tensors_have_wraps(info->updates, info->update_size);
213 if (has_wrap)
214 {
215 const int tensor_wrap_size = info->input_size + info->output_size + info->update_size;
216 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = ccv_nnc_get_tensor_wrap_array(graph, tensor_wrap_size, &info->tensor_wraps_ref);
217 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps, info->inputs, info->input_size);
218 const int d = info->input_size;
219 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + d, info->outputs, info->output_size);
220 const int dd = info->input_size + info->output_size;
221 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + dd, info->updates, info->update_size);
222 } else if (info->tensor_wraps_ref) {
223 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;
224 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
225 if (tensor_wrap_array)
226 {
227 for (i = 0; i < tensor_wrap_array->size; i++)
228 if (tensor_wrap_array->tensor_wraps[i])
229 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[i]);
230 ccfreefree(tensor_wrap_array);
231 *tensor_wrap_array_ref = 0;
232 info->tensor_wraps_ref = 0;
233 }
234 }
235}
236
237static void _ccv_nnc_graph_deregister_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
238{
239 ccv_nnc_graph_t* p = graph;
240 do {
241 int i;
242 // Remove from the array.
243 if (p->tensor_wraps_refs)
244 for (i = 0; i < p->tensor_wraps_refs->rnum; i++)
245 {
246 ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
247 if (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph)
248 {
249 --p->tensor_wraps_refs->rnum;
250 if (i < p->tensor_wraps_refs->rnum)
251 memcpy(tensor_wraps_ref, tensor_wraps_ref + 1, sizeof(ccv_nnc_graph_exec_t) * (p->tensor_wraps_refs->rnum - i));
252 break;
253 }
254 }
255 p = p->p;
256 } while (p);
257}
258
259void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size)
260{
261 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 261, __extension__ __PRETTY_FUNCTION__);
}))
;
262 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 262, __extension__ __PRETTY_FUNCTION__);
}))
;
263 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
264 assert(input_flag_size <= info->input_size)((void) sizeof ((input_flag_size <= info->input_size) ?
1 : 0), __extension__ ({ if (input_flag_size <= info->
input_size) ; else __assert_fail ("input_flag_size <= info->input_size"
, "ccv_nnc_graph.c", 264, __extension__ __PRETTY_FUNCTION__);
}))
;
265 assert(output_flag_size <= info->output_size)((void) sizeof ((output_flag_size <= info->output_size)
? 1 : 0), __extension__ ({ if (output_flag_size <= info->
output_size) ; else __assert_fail ("output_flag_size <= info->output_size"
, "ccv_nnc_graph.c", 265, __extension__ __PRETTY_FUNCTION__);
}))
;
266 if (info->input_size + info->output_size == 0)
267 return;
268 if (!info->input_flags)
269 {
270 info->input_flags = (int*)cccalloccalloc(info->input_size + info->output_size, sizeof(int));
271 info->output_flags = info->input_flags + info->input_size;
272 }
273 if (input_flag_size > 0)
274 memcpy(info->input_flags, input_flags, sizeof(int) * input_flag_size);
275 if (output_flag_size > 0)
276 memcpy(info->output_flags, output_flags, sizeof(int) * output_flag_size);
277}
278
279void ccv_nnc_graph_exec_set_peer(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t peer_exec)
280{
281 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 281, __extension__ __PRETTY_FUNCTION__);
}))
;
282 assert(exec.d >= 0)((void) sizeof ((exec.d >= 0) ? 1 : 0), __extension__ ({ if
(exec.d >= 0) ; else __assert_fail ("exec.d >= 0", "ccv_nnc_graph.c"
, 282, __extension__ __PRETTY_FUNCTION__); }))
;
283 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 283, __extension__ __PRETTY_FUNCTION__);
}))
;
284 assert(peer_exec.graph == graph || peer_exec.graph == graph->peer)((void) sizeof ((peer_exec.graph == graph || peer_exec.graph ==
graph->peer) ? 1 : 0), __extension__ ({ if (peer_exec.graph
== graph || peer_exec.graph == graph->peer) ; else __assert_fail
("peer_exec.graph == graph || peer_exec.graph == graph->peer"
, "ccv_nnc_graph.c", 284, __extension__ __PRETTY_FUNCTION__);
}))
;
285 assert(peer_exec.d >= 0)((void) sizeof ((peer_exec.d >= 0) ? 1 : 0), __extension__
({ if (peer_exec.d >= 0) ; else __assert_fail ("peer_exec.d >= 0"
, "ccv_nnc_graph.c", 285, __extension__ __PRETTY_FUNCTION__);
}))
;
286 if (peer_exec.graph == graph)
287 { assert(peer_exec.d < graph->exec_info->rnum)((void) sizeof ((peer_exec.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (peer_exec.d < graph->exec_info
->rnum) ; else __assert_fail ("peer_exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 287, __extension__ __PRETTY_FUNCTION__);
}))
; }
288 else
289 { assert(peer_exec.d < graph->peer->exec_info->rnum)((void) sizeof ((peer_exec.d < graph->peer->exec_info
->rnum) ? 1 : 0), __extension__ ({ if (peer_exec.d < graph
->peer->exec_info->rnum) ; else __assert_fail ("peer_exec.d < graph->peer->exec_info->rnum"
, "ccv_nnc_graph.c", 289, __extension__ __PRETTY_FUNCTION__);
}))
; }
290 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
291 exec_info->peer_ref = peer_exec.d + 1;
292}
293
294static ccv_nnc_tensor_t* _ccv_nnc_any_tensor_from_tensor_multiview(ccv_nnc_tensor_multiview_t* const mv)
295{
296 ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv;
297 while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
298 {
299 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
300 const int count = 0;
301 const int off = mv->kind;
302 const int mod = mv->repeat;
303 // If reached the root.
304 tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[count >= off ? ((count - off) % mod) + off : count]; // Unwrap.
305 }
306 return tensor;
307}
308
309void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
310{
311 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 311, __extension__ __PRETTY_FUNCTION__);
}))
;
312 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 312, __extension__ __PRETTY_FUNCTION__);
}))
;
313 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
314 // De-register from the graph if it contains multiview tensors.
315 if (info->tensor_wraps_ref)
316 _ccv_nnc_graph_deregister_tensor_wraps(graph, info->tensor_wraps_ref - 1);
317 // In case it is already executed, rewind.
318 _ccv_nnc_graph_exec_rewind(info, graph);
319 if (input_size == 0 && output_size == 0)
320 {
321 if (info->input_size > 0 || info->output_size > 0)
322 ccfreefree(info->inputs);
323 info->inputs = 0;
324 info->outputs = 0;
325 info->input_size = 0;
326 info->output_size = 0;
327 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
328 if (info->tensor_wraps_ref)
329 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
330 return;
331 }
332 if (info->inputs)
333 info->inputs = (ccv_nnc_tensor_t**)ccreallocrealloc(info->inputs, sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
334 else
335 info->inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
336 info->outputs = info->inputs + input_size;
337 if (inputs)
338 memcpy(info->inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
339 if (outputs)
340 memcpy(info->outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
341 int i;
342 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
343 for (i = 0; i < input_size + output_size; i++)
344 if (info->inputs[i])
345 {
346 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i])((*(int*)(info->inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i];
347 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
348 }
349 info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes);
350 info->input_size = input_size;
351 info->output_size = output_size;
352 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
353 // Register again if the tensor wraps exist.
354 if (info->tensor_wraps_ref)
355 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
356 // Free flags.
357 if (info->input_flags)
358 {
359 ccfreefree(info->input_flags);
360 info->input_flags = info->output_flags = 0;
361 }
362}
363
364void ccv_nnc_graph_exec_add_update(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update)
365{
366 assert(CCV_IS_TENSOR_MULTIVIEW(update))((void) sizeof ((((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(update)", "ccv_nnc_graph.c"
, 366, __extension__ __PRETTY_FUNCTION__); }))
;
367 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 367, __extension__ __PRETTY_FUNCTION__);
}))
;
368 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 368, __extension__ __PRETTY_FUNCTION__);
}))
;
369 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
370 const int register_tensor_wraps = !info->tensor_wraps_ref;
371 const int update_index = info->update_size;
372 ++info->update_size;
373 if (info->updates)
374 info->updates = (ccv_nnc_tensor_t**)ccreallocrealloc(info->updates, sizeof(ccv_nnc_tensor_t*) * info->update_size);
375 else
376 info->updates = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * info->update_size);
377 info->updates[update_index] = update;
378 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
379 if (register_tensor_wraps)
380 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
381}
382
383ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
384{
385 int d = graph->exec_info->rnum;
386 ccv_nnc_graph_exec_info_t info = {
387 .cmd = cmd,
388 .hint = hint,
389 .input_size = input_size,
390 .output_size = output_size,
391 };
392 assert(inputs || input_size == 0)((void) sizeof ((inputs || input_size == 0) ? 1 : 0), __extension__
({ if (inputs || input_size == 0) ; else __assert_fail ("inputs || input_size == 0"
, "ccv_nnc_graph.c", 392, __extension__ __PRETTY_FUNCTION__);
}))
;
393 assert(outputs || output_size == 0)((void) sizeof ((outputs || output_size == 0) ? 1 : 0), __extension__
({ if (outputs || output_size == 0) ; else __assert_fail ("outputs || output_size == 0"
, "ccv_nnc_graph.c", 393, __extension__ __PRETTY_FUNCTION__);
}))
;
394 if (input_size > 0 || output_size > 0)
395 {
396 info.inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
397 info.outputs = info.inputs + input_size;
398 if (inputs)
399 memcpy(info.inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
400 if (outputs)
401 memcpy(info.outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
402 info.input_size = input_size;
403 info.output_size = output_size;
404 int i;
405 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
406 for (i = 0; i < input_size + output_size; i++)
407 if (info.inputs[i])
408 {
409 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i])((*(int*)(info.inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i];
410 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
411 }
412 info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes);
413 }
414 _ccv_nnc_graph_redo_tensor_wraps(&info, graph);
415 // Add itself to the graph's wraps array, this will help the run time when we run the graph and do unwrapping.
416 if (info.tensor_wraps_ref)
417 ccv_nnc_graph_register_tensor_wraps(graph, info.tensor_wraps_ref - 1);
418 ccv_array_push(graph->exec_info, &info);
419 return (ccv_nnc_graph_exec_t){
420 .d = d,
421 .graph = graph,
422 };
423}
424
425void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to)
426{
427 ccv_nnc_graph_tensor_carry_over_t carry_over = {
428 .from = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)from),
429 .to = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)to)
430 };
431 if (!graph->carry_overs)
432 graph->carry_overs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_carry_over_t), 0, 0);
433 ccv_array_push(graph->carry_overs, &carry_over);
434}
435
436int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
437{
438 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 438, __extension__ __PRETTY_FUNCTION__);
}))
;
439 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 439, __extension__ __PRETTY_FUNCTION__);
}))
;
440 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 440, __extension__ __PRETTY_FUNCTION__);
}))
;
441 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 441, __extension__ __PRETTY_FUNCTION__);
}))
;
442 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
443 if (src_info->outgoings == 0)
444 src_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0);
445 else {
446 int i;
447 // Check if this is already connected, if so, skip.
448 for (i = 0; i < src_info->outgoings->rnum; i++)
449 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
450 return -1;
451 }
452 ccv_array_push(src_info->outgoings, &destination.d);
453 graph->topsorted = 0;
454 return 0;
455}
456
457int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
458{
459 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 459, __extension__ __PRETTY_FUNCTION__);
}))
;
460 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 460, __extension__ __PRETTY_FUNCTION__);
}))
;
461 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 461, __extension__ __PRETTY_FUNCTION__);
}))
;
462 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 462, __extension__ __PRETTY_FUNCTION__);
}))
;
463 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
464 if (!src_info->outgoings)
465 return -1;
466 int i, j = -1;
467 // Check if this is already connected, if so, skip.
468 for (i = 0; i < src_info->outgoings->rnum; i++)
469 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
470 {
471 j = i;
472 break;
473 }
474 if (j < 0)
475 return -1;
476 if (j < src_info->outgoings->rnum - 1)
477 *(int*)ccv_array_get(src_info->outgoings, j)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(j)))
= *(int*)ccv_array_get(src_info->outgoings, src_info->outgoings->rnum - 1)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(src_info->outgoings
->rnum - 1)))
;
478 --src_info->outgoings->rnum;
479 ccv_nnc_graph_exec_info_t* dest_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, destination.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(destination.d)))
;
480 if (dest_info->outgoings)
481 for (i = 0; i < dest_info->outgoings->rnum; i++)
482 ccv_array_add_unique_int(src_info->outgoings, *(int*)ccv_array_get(dest_info->outgoings, i)((void*)(((char*)((dest_info->outgoings)->data)) + (size_t
)(dest_info->outgoings)->rsize * (size_t)(i)))
);
483 graph->topsorted = 0;
484 return 0;
485}
486
487int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph)
488{
489 return graph->exec_info ? graph->exec_info->rnum : 0;
490}
491
492void* ccv_nnc_graph_buffer(ccv_nnc_graph_t* const graph, int size)
493{
494 if (graph->buffer_size >= size)
495 return graph->buffer;
496 graph->buffer_size = size;
497 graph->buffer = (graph->buffer) ? ccreallocrealloc(graph->buffer, size) : ccmallocmalloc(size);
498 return graph->buffer;
499}
500
501void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size)
502{
503 assert(exec_cvt_size == graph->exec_info->rnum)((void) sizeof ((exec_cvt_size == graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (exec_cvt_size == graph->exec_info
->rnum) ; else __assert_fail ("exec_cvt_size == graph->exec_info->rnum"
, "ccv_nnc_graph.c", 503, __extension__ __PRETTY_FUNCTION__);
}))
;
504 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 504, __extension__ __PRETTY_FUNCTION__);
}))
;
505 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 505, __extension__ __PRETTY_FUNCTION__);
}))
;
506 int i, j;
507 for (i = 0; i < exec_cvt_size; i++)
508 exec_cvt[i] = -1;
509 ccv_array_t* exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), graph->exec_info->rnum, 0);
510 // If there are breakpoints, it is more complicated, we first start to the breakpoints, and then continue from the breakpoints to the destinations.
511 if (graph->breakpoint_size)
512 {
513 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, graph->breakpoints, graph->breakpoint_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->sources
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->sources)->data)) + (size_t)(
graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph->breakpoint_size); _i_++) { ((void) sizeof
(((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 513, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph->breakpoints)[_i_].d].d = 1; } for (_i_ = 0; _i_ <
(graph->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } }
else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_q_][_exist_size_[_q_]] = d
; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) =
(_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[(graph->breakpoints)[_i_].d].r == 4)
continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph->
breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_
[(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 513, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(graph->breakpoints)[_i_].d].c > 0) continue; _visit_->
node[_visit_->size].index = (((graph->breakpoints)[_i_]
.d)); _visit_->node[_visit_->size].term = ((_incomings_
[(graph->breakpoints)[_i_].d].d)); ++_visit_->size;; } if
(_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
((_visit_->size <= (graph->exec_info->rnum)) ? 1
: 0), __extension__ ({ if (_visit_->size <= (graph->
exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
514 for (i = 0; i < graph->breakpoint_size; i++)
515 exec_cvt[graph->breakpoints[i].d] = -2; // Mark this as breakpoints, so we will skip the first round.
516 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
517 assert(!node->peer_ref)((void) sizeof ((!node->peer_ref) ? 1 : 0), __extension__ (
{ if (!node->peer_ref) ; else __assert_fail ("!node->peer_ref"
, "ccv_nnc_graph.c", 517, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a peer ref, we cannot fix it up.
518 if (exec_cvt[idx] == -2) // Skip breakpoint.
519 continue;
520 // Loop over node and push to the array.
521 ccv_array_push(exec_info, node);
522 // Go to its sub-graph to fix exec_idx
523 for (i = 0; i < node->graph_ref_size; i++)
524 {
525 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
526 if (graph_ref >= 0)
527 {
528 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
529 sub_graph->exec_idx = exec_info->rnum;
530 }
531 }
532 exec_cvt[idx] = exec_info->rnum - 1;
533 } ccv_nnc_graph_visit_endfor} }
534 ccv_nnc_graph_visit_free(visit);
535 graph->breakpoint_offset = exec_info->rnum;
536 visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } int
_exist_size_[2] = { (graph->breakpoint_size), 0, }; int _p_
= 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 1) continue; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 2) continue; _incomings_[_idx_].r = 2
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges
= _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; }
_edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_
; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++
_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) =
(_i_)); } for (_i_ = 0; _i_ < (graph->destinations->
rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->breakpoint_size); _i_++) { ((void) sizeof ((
(graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 536, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 536, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
537 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
538 assert(!node->peer_ref)((void) sizeof ((!node->peer_ref) ? 1 : 0), __extension__ (
{ if (!node->peer_ref) ; else __assert_fail ("!node->peer_ref"
, "ccv_nnc_graph.c", 538, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a peer ref, we cannot fix it up.
539 // Loop over node and push to the array.
540 ccv_array_push(exec_info, node);
541 // Go to its sub-graph to fix exec_idx
542 for (i = 0; i < node->graph_ref_size; i++)
543 {
544 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
545 if (graph_ref >= 0)
546 {
547 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
548 sub_graph->exec_idx = exec_info->rnum;
549 }
550 }
551 exec_cvt[idx] = exec_info->rnum - 1;
552 } ccv_nnc_graph_visit_endfor} }
553 ccv_nnc_graph_visit_free(visit);
554 for (i = 0; i < graph->breakpoint_size; i++)
555 { assert(exec_cvt[graph->breakpoints[i].d] >= 0)((void) sizeof ((exec_cvt[graph->breakpoints[i].d] >= 0
) ? 1 : 0), __extension__ ({ if (exec_cvt[graph->breakpoints
[i].d] >= 0) ; else __assert_fail ("exec_cvt[graph->breakpoints[i].d] >= 0"
, "ccv_nnc_graph.c", 555, __extension__ __PRETTY_FUNCTION__);
}))
; } // All breakpoints should be assigned.
556 } else {
557 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 :
0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((
char*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->sources
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->sources)->data)) + (size_t)(
graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->destinations
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->sources->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->destinations->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 557, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
558 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
559 assert(!node->peer_ref)((void) sizeof ((!node->peer_ref) ? 1 : 0), __extension__ (
{ if (!node->peer_ref) ; else __assert_fail ("!node->peer_ref"
, "ccv_nnc_graph.c", 559, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a peer ref, we cannot fix it up.
560 // Loop over node and push to the array.
561 ccv_array_push(exec_info, node);
562 // Go to its sub-graph to fix exec_idx
563 for (i = 0; i < node->graph_ref_size; i++)
564 {
565 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
566 if (graph_ref >= 0)
567 {
568 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
569 sub_graph->exec_idx = exec_info->rnum;
570 }
571 }
572 exec_cvt[idx] = exec_info->rnum - 1;
573 } ccv_nnc_graph_visit_endfor} }
574 ccv_nnc_graph_visit_free(visit);
575 }
576 assert(graph->exec_info->rnum == exec_info->rnum)((void) sizeof ((graph->exec_info->rnum == exec_info->
rnum) ? 1 : 0), __extension__ ({ if (graph->exec_info->
rnum == exec_info->rnum) ; else __assert_fail ("graph->exec_info->rnum == exec_info->rnum"
, "ccv_nnc_graph.c", 576, __extension__ __PRETTY_FUNCTION__);
}))
;
577 ccv_array_free(graph->exec_info);
578 graph->exec_info = exec_info;
579 for (i = 0; i < graph->sources->rnum; i++)
580 {
581 ccv_nnc_graph_exec_t* const source = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, i)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(i)))
;
582 source->d = exec_cvt[source->d];
583 }
584 for (i = 0; i < graph->destinations->rnum; i++)
585 {
586 ccv_nnc_graph_exec_t* const destination = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
587 destination->d = exec_cvt[destination->d];
588 }
589 // Update all outgoings to reflect the latest.
590 for (i = 0; i < exec_info->rnum; i++)
591 {
592 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(exec_info, i)((void*)(((char*)((exec_info)->data)) + (size_t)(exec_info
)->rsize * (size_t)(i)))
;
593 if (info->outgoings)
594 for (j = 0; j < info->outgoings->rnum; j++)
595 *(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
= exec_cvt[*(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
];
596 }
597 graph->topsorted = 1;
598}
599
600typedef struct {
601 int device_id;
602 int exec_idx;
603 ccv_array_t* signal_set;
604 ccv_array_t* command_set; // The set of command executed in this stream. In case there is a tie (on rank). We will check this.
605} ccv_nnc_stream_data_t;
606
607static void _ccv_nnc_graph_schedule_assign_signals(ccv_array_t* const incoming, ccv_nnc_graph_exec_info_t* const node, ccv_array_t* const stream_data, int* const signal_size, ccv_nnc_graph_exec_info_t* const exec_info, const int exec_info_size)
608{
609 assert(incoming->rnum > 0)((void) sizeof ((incoming->rnum > 0) ? 1 : 0), __extension__
({ if (incoming->rnum > 0) ; else __assert_fail ("incoming->rnum > 0"
, "ccv_nnc_graph.c", 609, __extension__ __PRETTY_FUNCTION__);
}))
;
610 int i, j, k;
611 int wait_size = 0, max_wait_size = 0;
612 for (i = 0; i < incoming->rnum; i++)
613 {
614 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
615 ccv_nnc_graph_exec_info_t* const incoming_exec_info = exec_info + incoming_idx;
616 assert(incoming_exec_info->schedule.stream_size > 0)((void) sizeof ((incoming_exec_info->schedule.stream_size >
0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->schedule
.stream_size > 0) ; else __assert_fail ("incoming_exec_info->schedule.stream_size > 0"
, "ccv_nnc_graph.c", 616, __extension__ __PRETTY_FUNCTION__);
}))
;
617 max_wait_size += incoming_exec_info->schedule.stream_size;
618 }
619 int waits[ccv_max(1, max_wait_size)({ typeof (1) _a = (1); typeof (max_wait_size) _b = (max_wait_size
); (_a > _b) ? _a : _b; })
];
620 assert(node->schedule.stream_size > 0)((void) sizeof ((node->schedule.stream_size > 0) ? 1 : 0
), __extension__ ({ if (node->schedule.stream_size > 0)
; else __assert_fail ("node->schedule.stream_size > 0"
, "ccv_nnc_graph.c", 620, __extension__ __PRETTY_FUNCTION__);
}))
;
621 for (i = 0; i < incoming->rnum; i++)
622 {
623 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
624 assert(incoming_idx < exec_info_size)((void) sizeof ((incoming_idx < exec_info_size) ? 1 : 0), __extension__
({ if (incoming_idx < exec_info_size) ; else __assert_fail
("incoming_idx < exec_info_size", "ccv_nnc_graph.c", 624,
__extension__ __PRETTY_FUNCTION__); }))
;
625 assert(incoming_idx >= 0)((void) sizeof ((incoming_idx >= 0) ? 1 : 0), __extension__
({ if (incoming_idx >= 0) ; else __assert_fail ("incoming_idx >= 0"
, "ccv_nnc_graph.c", 625, __extension__ __PRETTY_FUNCTION__);
}))
;
626 ccv_nnc_graph_exec_info_t* const incoming_exec_info = exec_info + incoming_idx;
627 assert(incoming_exec_info->schedule.stream_size > 0)((void) sizeof ((incoming_exec_info->schedule.stream_size >
0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->schedule
.stream_size > 0) ; else __assert_fail ("incoming_exec_info->schedule.stream_size > 0"
, "ccv_nnc_graph.c", 627, __extension__ __PRETTY_FUNCTION__);
}))
;
628 int stream_synced = 1;
629 // If the current node's stream is a subset of the incoming node's stream, there
630 // is no need to sync with signal, because we are already synced with the incoming.
631 for (j = 0; stream_synced && j < node->schedule.stream_size; j++)
632 {
633 const int s = SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[j];
634 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 634
, __extension__ __PRETTY_FUNCTION__); }))
;
635 int flag = 0;
636 for (k = 0; !flag && k < incoming_exec_info->schedule.stream_size; k++)
637 flag = (SCHEDULE_STREAMS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_streams : (incoming_exec_info->schedule
)._heap_streams)
[k] == s);
638 stream_synced = flag;
639 }
640 if (stream_synced)
641 continue;
642 // Otherwise, find the streams we need to sync with, and create signals for these.
643 for (j = 0; j < incoming_exec_info->schedule.stream_size; j++)
644 {
645 const int s = SCHEDULE_STREAMS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_streams : (incoming_exec_info->schedule
)._heap_streams)
[j];
646 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 646
, __extension__ __PRETTY_FUNCTION__); }))
;
647 int flag = 0;
648 for (k = 0; !flag && k < node->schedule.stream_size; k++)
649 flag = (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[k] == s);
650 if (!flag) // Need to have a signal.
651 {
652 if (SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j] < 0)
653 SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j] = (*signal_size)++;
654 else {
655 int flag = 0;
656 // If any of the stream the current node has already seen this signal, we are good already.
657 for (k = 0; !flag && k < node->schedule.stream_size; k++)
658 {
659 assert(SCHEDULE_STREAMS(node->schedule)[k] >= 0)((void) sizeof ((((node->schedule).stream_size <= 1 ? (
node->schedule)._inline_streams : (node->schedule)._heap_streams
)[k] >= 0) ? 1 : 0), __extension__ ({ if (((node->schedule
).stream_size <= 1 ? (node->schedule)._inline_streams :
(node->schedule)._heap_streams)[k] >= 0) ; else __assert_fail
("SCHEDULE_STREAMS(node->schedule)[k] >= 0", "ccv_nnc_graph.c"
, 659, __extension__ __PRETTY_FUNCTION__); }))
;
660 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(node->schedule)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((node->schedule).stream_size <=
1 ? (node->schedule)._inline_streams : (node->schedule
)._heap_streams)[k])))
;
661 flag = (data->signal_set && ccv_array_find_int(data->signal_set, SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j]));
662 }
663 if (flag)
664 continue;
665 }
666 // Otherwise, we need to wait for this. Currently, our granularity is about wait on all streams.
667 waits[wait_size++] = SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j];
668 // All streams on this node have seen this signal.
669 for (k = 0; k < node->schedule.stream_size; k++)
670 {
671 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(node->schedule)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((node->schedule).stream_size <=
1 ? (node->schedule)._inline_streams : (node->schedule
)._heap_streams)[k])))
;
672 if (!data->signal_set)
673 data->signal_set = ccv_array_new(sizeof(int), 0, 0);
674 ccv_array_push(data->signal_set, &SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j]);
675 }
676 }
677 }
678 }
679 node->schedule.wait_size = wait_size;
680 if (wait_size > 0)
681 {
682 node->schedule.waits = node->schedule.waits ? ccreallocrealloc(node->schedule.waits, sizeof(int) * wait_size) : ccmallocmalloc(sizeof(int) * wait_size);
683 memcpy(node->schedule.waits, waits, sizeof(int) * wait_size);
684 }
685}
686
687typedef struct {
688 int rank;
689 ccv_array_t* outgoings;
690} ccv_nnc_incoming_t;
691
692static int _ccv_nnc_device_ids_for_stream_data(ccv_nnc_graph_exec_info_t* const node, const int device_id, ccv_array_t* const stream_data, int* const device_ids, const int max_device_id_size)
693{
694 int device_id_size = ccv_nnc_device_ids_for_io(node->inputs, node->input_size, node->outputs, node->output_size, device_ids, max_device_id_size);
695 if (device_id_size == 0)
696 {
697 // If there is a default data, use that device id. Otherwise, use the device id passed in (this will be the default data device id).
698 if (stream_data->rnum > 0)
699 {
700 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
701 device_ids[0] = default_data->device_id;
702 } else
703 device_ids[0] = device_id >= 0 ? device_id : 0;
704 device_id_size = 1;
705 }
706 return device_id_size;
707}
708
709static void _ccv_nnc_graph_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type, const int device_id, ccv_nnc_stream_context_t* const stream_context)
710{
711 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 711, __extension__ __PRETTY_FUNCTION__);
}))
;
712 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 712, __extension__ __PRETTY_FUNCTION__);
}))
;
713 assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__
({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted"
, "ccv_nnc_graph.c", 713, __extension__ __PRETTY_FUNCTION__);
}))
; // Only support this on a topsorted graph.
714 const int exec_info_size = graph->exec_info->rnum;
715 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0)))
;
716 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, exec_info, exec_info_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_
].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const
int _heap_mem_ = (exec_info_size + _incoming_edges_ > 1024
); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t
) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t*
_exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), (
int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size),
}; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for
(_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((
void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph
->sources)->data)) + (size_t)(graph->sources)->rsize
* (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if ((exec_info)
[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].
outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((
char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t)((
exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (
_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_
+= _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_
[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c
; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; }
} ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ =
0; _i_ < (graph->destinations->rnum); _i_++) { ((void
) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->sources->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 4; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 3 && _d_ < (graph->destinations
->rnum)) { _exists_[_p_][_i_] = d; continue; } } else for (
_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++
) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_
].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size
)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
717 int i, j, k;
718 // Generate exec dependencies (or, in other words, partial ordering of executions).
719 ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(exec_info_size, exec_info_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
720 int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_info_size * 2);
721 int buf_size;
722#define for_block(x, val) \
723 do { \
724 if (((int32_t*)val)[0] > 0) \
725 { \
726 buf[buf_size * 2] = x; \
727 buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \
728 ++buf_size; \
729 } \
730 } while (0)
731 ccv_nnc_graph_visit_for(visit, exec_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_info)) const node __attribute__((unused
)) = (exec_info) + idx;
{
732 buf_size = 0; /* save all its parent deps to this buffer */
733 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
734 if (node->schedule.stream_size > 1)
735 ccfreefree(node->schedule._heap_streams);
736 node->schedule.stream_size = 0;
737 node->schedule.wait_size = 0;
738 if (vector)
739 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
740 if (!node->outgoings)
741 continue;
742 for (i = 0; i < node->outgoings->rnum; i++)
743 {
744 int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
745 const int32_t one = 1;
746 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx);
747 /* If not found, set, if the current node is the destination node, no need
748 * set itself as parent of subsequent nodes because its terminal nature. */
749 if (!term && (!cell.i32 || cell.i32[0] == 0))
750 ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one);
751 for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */
752 {
753 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]);
754 /* If not found, set */
755 if (!cell.i32 || cell.i32[0] == 0)
756 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]);
757 else {
758 /* Otherwise, set to the longest one */
759 int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2
+ 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; })
;
760 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep);
761 }
762 }
763 }
764 } ccv_nnc_graph_visit_endfor} }
765#undef for_block
766 ccfreefree(buf);
767 // Algorithm to allocate signals and streams for this graph.
768 ccv_array_t* const stream_data = ccv_array_new(sizeof(ccv_nnc_stream_data_t), 0, 0);
769 ccv_array_t** const outgoings = cccalloccalloc(exec_info_size, sizeof(ccv_array_t*));
770 ccv_nnc_incoming_t* const incomings = cccalloccalloc(exec_info_size, sizeof(ccv_nnc_incoming_t));
771 int max_device_id_size = 1;
772 // Filter out outgoing nodes that we will be able to access it afterwards anyway.
773 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
774 max_device_id_size = ccv_max(node->input_size + node->output_size, max_device_id_size)({ typeof (node->input_size + node->output_size) _a = (
node->input_size + node->output_size); typeof (max_device_id_size
) _b = (max_device_id_size); (_a > _b) ? _a : _b; })
;
775 if (node->outgoings)
776 {
777 outgoings[idx] = ccv_array_new(sizeof(int), 0, 0);
778 for (i = 0; i < node->outgoings->rnum; i++)
779 {
780 const int di = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
781 int flag = 0;
782 for (j = 0; !flag && j < node->outgoings->rnum; j++)
783 {
784 if (j != i)
785 {
786 const int dj = *(int*)ccv_array_get(node->outgoings, j)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(j)))
;
787 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, di, dj);
788 flag = (cell.i32 && cell.i32[0]);
789 }
790 }
791 if (!flag)
792 {
793 ccv_array_push(outgoings[idx], &di);
794 if (!incomings[di].outgoings)
795 incomings[di].outgoings = ccv_array_new(sizeof(int), 1, 0);
796 ccv_array_push(incomings[di].outgoings, &idx);
797 }
798 }
799 // If we have outgoing nodes, I cannot filter out all of them.
800 assert(node->outgoings->rnum == 0 || outgoings[idx]->rnum > 0)((void) sizeof ((node->outgoings->rnum == 0 || outgoings
[idx]->rnum > 0) ? 1 : 0), __extension__ ({ if (node->
outgoings->rnum == 0 || outgoings[idx]->rnum > 0) ; else
__assert_fail ("node->outgoings->rnum == 0 || outgoings[idx]->rnum > 0"
, "ccv_nnc_graph.c", 800, __extension__ __PRETTY_FUNCTION__);
}))
;
801 }
802 } ccv_nnc_graph_visit_endfor} }
803#define visitor(node, idx, _) \
804 if (node->outgoings) \
805 for (i = 0; i < node->outgoings->rnum; i++) \
806 { \
807 const int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
; \
808 node->rank = ccv_max(incomings[d].rank + 1, node->rank)({ typeof (incomings[d].rank + 1) _a = (incomings[d].rank + 1
); typeof (node->rank) _b = (node->rank); (_a > _b) ?
_a : _b; })
; \
809 }
810 CCV_NNC_GRAPH_VISIT(graph, incomings, exec_info_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_
+= ((incomings)[_i_].outgoings) ? (incomings)[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (exec_info_size + _incoming_edges_
> 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_)
_incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2
+ _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_info_size
) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size
)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size
)), (int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size
), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size)
; for (_i_ = 0; _i_ < (graph->destinations->rnum); _i_
++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->destinations->rnum), 0, }; int _p_ = 0, _q_ = 1
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if ((incomings)[_idx_].outgoings)
for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((incomings
)[_idx_].outgoings)->data)) + (size_t)((incomings)[_idx_].
outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->destinations->rnum); _exist_size_[1] = 0; _p_ = 0, _q_
= 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if ((incomings)
[_idx_].outgoings) for (_j_ = 0; _j_ < (incomings)[_idx_].
outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((
char*)(((incomings)[_idx_].outgoings)->data)) + (size_t)((
incomings)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (
_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_
+= _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_
[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c
; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; }
} ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ =
0; _i_ < (graph->sources->rnum); _i_++) { ((void) sizeof
((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (graph
->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->sources)->data)) + (size_t)(graph->sources)->
rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->destinations->rnum); _exist_size_[1] = 0;
int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor(((incomings) + _idx_
), (_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d
) { ++_d_; _incomings_[_idx_].r = 4; } if ((incomings)[_idx_]
.outgoings) { if ((incomings)[_idx_].outgoings->rnum == 1)
{ const int d = *(int*)((void*)(((char*)(((incomings)[_idx_]
.outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(graph->sources->rnum)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
incomings)[_idx_].outgoings)->data)) + (size_t)((incomings
)[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (graph->sources->rnum)) { _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_
; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph->sources->rnum); _i_++) { ((void)
sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ (
{ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d].r == 4) continue; if (!(
0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((
void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0))))[_i_].d].c == 0) ? 1 :
0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->sources)->data)) + (size_t)(
graph->sources)->rsize * (size_t)(0))))[_i_].d].c == 0)
; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].d].c > 0) continue
; visitor(((incomings) + ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d), (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].d), (_incomings_
[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while
(0);
;
811#undef visitor
812 int device_ids[max_device_id_size];
813 int outgoing_device_ids[max_device_id_size];
814 int signal_size = 0;
815 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
816 // Go through the incomings.
817 const int device_id_size = _ccv_nnc_device_ids_for_stream_data(node, device_id, stream_data, device_ids, max_device_id_size);
818 if (node->schedule.stream_size == 0)
819 {
820 node->schedule.stream_size = device_id_size; // At least at the same size as the device_id_size.
821 if (device_id_size > 1)
822 {
823 node->schedule._heap_streams = (int*)ccmallocmalloc(sizeof(int) * device_id_size * 2);
824 node->schedule._heap_signals = (node->schedule._heap_streams + device_id_size);
825 }
826 for (i = 0; i < device_id_size; i++)
827 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = -1, SCHEDULE_SIGNALS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_signals : (node->schedule)._heap_signals)
[i] = -1;
828 }
829 for (i = 0; i < device_id_size; i++)
830 // Go through until the end to assign streams.
831 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] < 0)
832 {
833 int stream_idx = -1;
834 int stream_has_command = 0;
835 // First, find a good stream in stream data (the stream is good if it can be recycled, and it has the same command).
836 // Otherwise, we prefer a usable stream (it doesn't have the command, but it can be recycled).
837 for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++)
838 {
839 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(j)))
;
840 if (data->device_id == device_ids[i])
841 {
842 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, data->exec_idx);
843 // If there is a path to conclude that exec_idx is before idx, then we can reuse
844 // this stream. Otherwise the work in this "empty stream" could still be ongoing,
845 // and we may delay the following work unnecessarily.
846 if (cell.i32 && cell.i32[0] > 0)
847 {
848 if (ccv_array_find_uint(data->command_set, node->cmd.cmd))
849 stream_idx = j, stream_has_command = 1;
850 else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet.
851 stream_idx = j;
852 }
853 }
854 }
855 if (stream_idx < 0)
856 {
857 stream_idx = stream_data->rnum;
858 const ccv_nnc_stream_data_t data = {
859 .device_id = device_ids[i],
860 };
861 ccv_array_push(stream_data, &data);
862 }
863 assert(stream_idx >= 0)((void) sizeof ((stream_idx >= 0) ? 1 : 0), __extension__ (
{ if (stream_idx >= 0) ; else __assert_fail ("stream_idx >= 0"
, "ccv_nnc_graph.c", 863, __extension__ __PRETTY_FUNCTION__);
}))
;
864 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
865 if (!data->command_set)
866 data->command_set = ccv_array_new(sizeof(uint32_t), 1, 0);
867 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = stream_idx;
868 ccv_array_add_unique_uint(data->command_set, node->cmd.cmd);
869 // Assign all subsequent node to use this stream.
870 int outgoing_idx = idx;
871 while (outgoings[outgoing_idx] && outgoings[outgoing_idx]->rnum)
872 {
873 int highest_rank = -1;
874 int highest_idx = -1;
875 int stream_n = -1;
876 int stream_has_command = 0;
877 for (j = 0; j < outgoings[outgoing_idx]->rnum; j++)
878 {
879 const int d = *(int*)ccv_array_get(outgoings[outgoing_idx], j)((void*)(((char*)((outgoings[outgoing_idx])->data)) + (size_t
)(outgoings[outgoing_idx])->rsize * (size_t)(j)))
;
880 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + d;
881 const int outgoing_device_id_size = _ccv_nnc_device_ids_for_stream_data(outgoing_node, device_id, stream_data, outgoing_device_ids, max_device_id_size);
882 if (outgoing_node->schedule.stream_size == 0)
883 {
884 outgoing_node->schedule.stream_size = outgoing_device_id_size; // At least at the same size as the device_id_size.
885 if (outgoing_device_id_size > 1)
886 {
887 outgoing_node->schedule._heap_streams = (int*)ccmallocmalloc(sizeof(int) * outgoing_device_id_size * 2);
888 outgoing_node->schedule._heap_signals = (outgoing_node->schedule._heap_streams + outgoing_device_id_size);
889 }
890 for (k = 0; k < outgoing_device_id_size; k++)
891 SCHEDULE_STREAMS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_streams : (outgoing_node->schedule)
._heap_streams)
[k] = -1, SCHEDULE_SIGNALS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_signals : (outgoing_node->schedule)
._heap_signals)
[k] = -1;
892 }
893 assert(outgoing_node->schedule.stream_size == outgoing_device_id_size)((void) sizeof ((outgoing_node->schedule.stream_size == outgoing_device_id_size
) ? 1 : 0), __extension__ ({ if (outgoing_node->schedule.stream_size
== outgoing_device_id_size) ; else __assert_fail ("outgoing_node->schedule.stream_size == outgoing_device_id_size"
, "ccv_nnc_graph.c", 893, __extension__ __PRETTY_FUNCTION__);
}))
;
894 for (k = 0; k < outgoing_device_id_size; k++)
895 // If it should be on the same device and the stream is not assign, potentially.
896 if (outgoing_device_ids[k] == device_ids[i] &&
897 SCHEDULE_STREAMS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_streams : (outgoing_node->schedule)
._heap_streams)
[k] < 0 &&
898 (incomings[d].rank > highest_rank ||
899 (incomings[d].rank == highest_rank &&
900 !stream_has_command && ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd))))
901 {
902 highest_rank = incomings[d].rank;
903 highest_idx = d;
904 stream_n = k;
905 // This is 1 if rank is the same (thus, I must break the tie already), if the rank is not the same, we need to compute this.
906 stream_has_command = (incomings[d].rank == highest_rank || ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd));
907 }
908 }
909 if (highest_idx >= 0)
910 {
911 outgoing_idx = highest_idx;
912 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + outgoing_idx;
913 assert(stream_n >= 0)((void) sizeof ((stream_n >= 0) ? 1 : 0), __extension__ ({
if (stream_n >= 0) ; else __assert_fail ("stream_n >= 0"
, "ccv_nnc_graph.c", 913, __extension__ __PRETTY_FUNCTION__);
}))
;
914 SCHEDULE_STREAMS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_streams : (outgoing_node->schedule)
._heap_streams)
[stream_n] = stream_idx;
915 ccv_array_add_unique_uint(data->command_set, outgoing_node->cmd.cmd);
916 } else
917 break;
918 }
919 data->exec_idx = outgoing_idx;
920 }
921 } ccv_nnc_graph_visit_endfor} }
922 // Go through to assign signals when necessary.
923 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
924 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
925 _ccv_nnc_graph_schedule_assign_signals(incomings[idx].outgoings, node, stream_data, &signal_size, exec_info, exec_info_size);
926 } ccv_nnc_graph_visit_endfor} }
927 for (i = 0; i < exec_info_size; i++)
928 if (outgoings[i])
929 ccv_array_free(outgoings[i]);
930 ccfreefree(outgoings);
931 for (i = 0; i < exec_info_size; i++)
932 if (incomings[i].outgoings)
933 ccv_array_free(incomings[i].outgoings);
934 ccfreefree(incomings);
935 ccv_matrix_free(exec_dep);
936 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
937 if (device_id >= 0)
938 {
939 // If the default stream (stream 0) is not the same as desired stream, swap with the one that is.
940 if (default_data->device_id != device_id)
941 {
942 int exchange_stream_idx = -1;
943 // Find the stream idx to exchange.
944 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
945 int flag = 0;
946 for(i = 0; !flag && i < node->schedule.stream_size; i++)
947 {
948 const int stream_idx = SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i];
949 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
950 if (data->device_id == device_id)
951 {
952 exchange_stream_idx = stream_idx;
953 flag = 1;
954 }
955 }
956 if (flag)
957 break;
958 } ccv_nnc_graph_visit_endfor} }
959 assert(exchange_stream_idx >= 0)((void) sizeof ((exchange_stream_idx >= 0) ? 1 : 0), __extension__
({ if (exchange_stream_idx >= 0) ; else __assert_fail ("exchange_stream_idx >= 0"
, "ccv_nnc_graph.c", 959, __extension__ __PRETTY_FUNCTION__);
}))
;
960 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
961 for (i = 0; i < node->schedule.stream_size; i++)
962 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] == 0)
963 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = -1;
964 } ccv_nnc_graph_visit_endfor} }
965 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
966 for (i = 0; i < node->schedule.stream_size; i++)
967 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] == exchange_stream_idx)
968 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = 0;
969 } ccv_nnc_graph_visit_endfor} }
970 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
971 for (i = 0; i < node->schedule.stream_size; i++)
972 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] == -1)
973 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = exchange_stream_idx;
974 } ccv_nnc_graph_visit_endfor} }
975 ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, exchange_stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(exchange_stream_idx)))
)->device_id = default_data->device_id;
976 default_data->device_id = device_id;
977 }
978 }
979 int graph_wait_size = 0;
980 for (i = 0; i < graph->destinations->rnum; i++)
981 {
982 const int idx = *(int*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
983 for (j = 0; j < exec_info[idx].schedule.stream_size; j++)
984 if (SCHEDULE_STREAMS(exec_info[idx].schedule)((exec_info[idx].schedule).stream_size <= 1 ? (exec_info[idx
].schedule)._inline_streams : (exec_info[idx].schedule)._heap_streams
)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
985 ++graph_wait_size;
986 }
987 if (graph_wait_size > 0)
988 graph->waits = (graph->waits) ? ccreallocrealloc(graph->waits, sizeof(int) * graph_wait_size) : ccmallocmalloc(sizeof(int) * graph_wait_size);
989 graph_wait_size = 0;
990 for (i = 0; i < graph->destinations->rnum; i++)
991 {
992 const int idx = *(int*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
993 ccv_nnc_graph_exec_info_t* const destination_exec_info = exec_info + idx;
994 for (j = 0; j < exec_info[idx].schedule.stream_size; j++)
995 if (SCHEDULE_STREAMS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_streams : (destination_exec_info
->schedule)._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
996 {
997 ccv_nnc_stream_data_t* const default_stream_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
998 if (SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j] < 0)
999 SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j] = signal_size++;
1000 else if (default_stream_data->signal_set && ccv_array_find_int(default_stream_data->signal_set, SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j]))
1001 continue;
1002 graph->waits[graph_wait_size++] = SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j];
1003 }
1004 }
1005 graph->wait_size = graph_wait_size;
1006 for (i = 0; i < stream_data->rnum; i++)
1007 {
1008 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1009 if (data->signal_set)
1010 ccv_array_free(data->signal_set);
1011 assert(data->command_set)((void) sizeof ((data->command_set) ? 1 : 0), __extension__
({ if (data->command_set) ; else __assert_fail ("data->command_set"
, "ccv_nnc_graph.c", 1011, __extension__ __PRETTY_FUNCTION__)
; }))
;
1012 ccv_array_free(data->command_set);
1013 }
1014 // Allocate streams & signals
1015 graph->stream_size = stream_data->rnum;
1016 graph->streams = (ccv_nnc_stream_context_t**)ccmallocmalloc(sizeof(ccv_nnc_stream_context_t*) * graph->stream_size);
1017 graph->block_stream_tasks = (ccv_nnc_stream_task_t**)cccalloccalloc(graph->stream_size, sizeof(ccv_nnc_stream_task_t*));
1018 if (stream_context)
1019 graph->streams[0] = stream_context;
1020 for (i = (stream_context ? 1 : 0); i < stream_data->rnum; i++)
1021 {
1022 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1023 int type = stream_type;
1024 CCV_TENSOR_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1025 graph->streams[i] = ccv_nnc_stream_context_new(type);
1026 }
1027 int default_stream_type = stream_type;
1028 CCV_TENSOR_SET_DEVICE_ID(default_stream_type, default_data->device_id)(default_stream_type) = (((default_stream_type) & ~0xfff00
) | (((default_data->device_id) & 0xfff) << 8))
;
1029 graph->signal_size = signal_size;
1030 graph->signals = (ccv_nnc_stream_signal_t**)cccalloccalloc(signal_size, sizeof(ccv_nnc_stream_signal_t*));
1031 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1032 for (i = 0; i < node->schedule.stream_size; i++)
1033 if (SCHEDULE_SIGNALS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_signals : (node->schedule)._heap_signals)
[i] >= 0)
1034 {
1035 const int signal = SCHEDULE_SIGNALS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_signals : (node->schedule)._heap_signals)
[i];
1036 if (!graph->signals[signal])
1037 {
1038 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(node->schedule)[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((node->schedule).stream_size <=
1 ? (node->schedule)._inline_streams : (node->schedule
)._heap_streams)[i])))
;
1039 int type = stream_type;
1040 CCV_TENSOR_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1041 graph->signals[signal] = ccv_nnc_stream_signal_new(type);
1042 }
1043 }
1044 } ccv_nnc_graph_visit_endfor} }
1045 ccv_nnc_graph_visit_free(visit);
1046 for (i = 0; i < signal_size; i++)
1047 { assert(graph->signals[i])((void) sizeof ((graph->signals[i]) ? 1 : 0), __extension__
({ if (graph->signals[i]) ; else __assert_fail ("graph->signals[i]"
, "ccv_nnc_graph.c", 1047, __extension__ __PRETTY_FUNCTION__)
; }))
; }
1048 if (!graph->extern_signal)
1049 graph->extern_signal = ccv_nnc_stream_signal_new(default_stream_type);
1050 // Do this recursively for its sub graphs.
1051 if (graph->sub_graphs)
1052 for (i = 0; i < graph->sub_graphs->rnum; i++)
1053 {
1054 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
;
1055 if (sub_graph)
1056 {
1057 const int exec_idx = sub_graph->exec_idx - 1;
1058 assert(exec_info[exec_idx].schedule.stream_size == 1)((void) sizeof ((exec_info[exec_idx].schedule.stream_size == 1
) ? 1 : 0), __extension__ ({ if (exec_info[exec_idx].schedule
.stream_size == 1) ; else __assert_fail ("exec_info[exec_idx].schedule.stream_size == 1"
, "ccv_nnc_graph.c", 1058, __extension__ __PRETTY_FUNCTION__)
; }))
;
1059 const int stream_idx = SCHEDULE_STREAMS(exec_info[exec_idx].schedule)((exec_info[exec_idx].schedule).stream_size <= 1 ? (exec_info
[exec_idx].schedule)._inline_streams : (exec_info[exec_idx].schedule
)._heap_streams)
[0];
1060 const int device_id = ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
)->device_id;
1061 _ccv_nnc_graph_static_schedule(sub_graph, stream_type, device_id, graph->streams[stream_idx]);
1062 }
1063 }
1064 ccv_array_free(stream_data);
1065}
1066
1067void ccv_nnc_graph_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type)
1068{
1069 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1069, __extension__ __PRETTY_FUNCTION__)
; }))
;
1070 _ccv_nnc_graph_static_schedule(graph, stream_type, -1, 0);
1071}
1072
1073ccv_nnc_stream_context_t* ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph)
1074{
1075 if (graph->streams && graph->stream_size > 0)
1076 return graph->streams[0];
1077 return 0;
1078}
1079
1080static void _ccv_nnc_graph_dot_exec(const int index, const ccv_nnc_graph_exec_info_t* const exec_info, ccv_nnc_stream_context_t** const streams, const int flags, FILE* out)
1081{
1082 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1083 fputc('{', out);
1084 fprintf(out, "node%d", index);
1085 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1086 {
1087 fputs("|Command: ", out);
1088 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1089 if (exec_info->schedule.stream_size > 0)
1090 {
1091 int i, flag = 0;
1092 fputs("|Stream: ", out);
1093 for (i = 0; i < exec_info->schedule.stream_size; i++)
1094 {
1095 const int device_id = streams ? CCV_TENSOR_GET_DEVICE_ID(streams[SCHEDULE_STREAMS(exec_info->schedule)[i]]->type)(((streams[((exec_info->schedule).stream_size <= 1 ? (exec_info
->schedule)._inline_streams : (exec_info->schedule)._heap_streams
)[i]]->type) & 0xfff00) >> 8)
: 0;
1096 if (i == 0)
1097 fprintf(out, "%d (d%d)", SCHEDULE_STREAMS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_streams : (exec_info->schedule)._heap_streams
)
[i], device_id);
1098 else
1099 fprintf(out, ", %d (d%d)", SCHEDULE_STREAMS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_streams : (exec_info->schedule)._heap_streams
)
[i], device_id);
1100 }
1101 for (i = 0; i < exec_info->schedule.stream_size; i++)
1102 if (SCHEDULE_SIGNALS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_signals : (exec_info->schedule)._heap_signals
)
[i] >= 0)
1103 {
1104 if (!flag)
1105 {
1106 flag = 1;
1107 fprintf(out, "|Signal: %d", SCHEDULE_SIGNALS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_signals : (exec_info->schedule)._heap_signals
)
[i]);
1108 } else
1109 fprintf(out, ", %d", SCHEDULE_SIGNALS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_signals : (exec_info->schedule)._heap_signals
)
[i]);
1110 }
1111 }
1112 if (exec_info->schedule.wait_size > 0)
1113 {
1114 fputs("|Wait: ", out);
1115 int i;
1116 for (i = 0; i < exec_info->schedule.wait_size - 1; i++)
1117 fprintf(out, "%d, ", exec_info->schedule.waits[i]);
1118 fprintf(out, "%d", exec_info->schedule.waits[exec_info->schedule.wait_size - 1]);
1119 }
1120 fputc('}', out);
1121 }
1122}
1123
1124static void _ccv_nnc_graph_dot_tensor(const int index, const ccv_nnc_tensor_t* const tensor, const int zone, const int flags, const int depth, FILE* out)
1125{
1126 // if it has an alias pointer, or, it is a long form.
1127 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1128 fputc('{', out);
1129 const int is_tensor_view = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW);
1130 if (is_tensor_view)
1131 fprintf(out, "tensorview%d", index);
1132 else
1133 fprintf(out, "tensor%d", index);
1134 int i;
1135 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1136 fputc('\'', out);
1137 if (CCV_GET_TAPE_ALLOC(tensor->type)((tensor->type) & CCV_TAPE_ALLOC))
1138 fputs(" (t)", out);
1139 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1140 {
1141 const int device_id = CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8);
1142 fprintf(out, "|d%d|zone%d", device_id, zone);
1143 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1144 fputc('\'', out);
1145 uintptr_t aptr = (uintptr_t)tensor->data.u8;
1146 const int* ainc = is_tensor_view ? ((ccv_nnc_tensor_view_t*)(tensor))->inc : tensor->info.dim;
1147 // For the last one, we don't extend to full ainc.
1148 size_t ainc_size = (ccv_nnc_dimension_count(ainc) - ainc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1149 // Print out the range as well.
1150 fprintf(out, "|{%#010x|%#010x}|%d", (uint32_t)aptr, (uint32_t)(aptr + ainc_size - 1), tensor->info.dim[0]);
1151 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(8) && tensor->info.dim[i]; i++)
1152 fprintf(out, "x%d", tensor->info.dim[i]);
1153 fputc('}', out);
1154 }
1155}
1156
1157typedef struct {
1158 int index;
1159 int name;
1160 int zone;
1161 uintptr_t tensor_ref;
1162 uintptr_t start_ptr;
1163 uintptr_t end_ptr;
1164} ccv_nnc_tensor_dot_t;
1165
1166typedef struct {
1167 ccv_nnc_tensor_dot_t* dots;
1168 int* remap;
1169 int* rename_zone;
1170 int* rename_index;
1171} ccv_nnc_tensor_dot_recovery_t;
1172
1173// First sort by start_ptr, then sort by tensor ptr (so that we will have the same tensor sorted to one cluster).
1174#define less_than(i1, i2, aux) ((i1).start_ptr < (i2).start_ptr || ((i1).start_ptr == (i2).start_ptr && (i1).tensor_ref < (i2).tensor_ref))
1175static CCV_IMPLEMENT_QSORT(_ccv_nnc_tensor_dot_sort_by_ptr, ccv_nnc_tensor_dot_t, less_than)void _ccv_nnc_tensor_dot_sort_by_ptr(ccv_nnc_tensor_dot_t *array
, size_t total, int aux) { int isort_thresh = 7; ccv_nnc_tensor_dot_t
t; int sp = 0; struct { ccv_nnc_tensor_dot_t *lb; ccv_nnc_tensor_dot_t
*ub; } stack[48]; if( total <= 1 ) return; stack[0].lb = array
; stack[0].ub = array + (total - 1); while( sp >= 0 ) { ccv_nnc_tensor_dot_t
* left = stack[sp].lb; ccv_nnc_tensor_dot_t* right = stack[sp
--].ub; for(;;) { int i, n = (int)(right - left) + 1, m; ccv_nnc_tensor_dot_t
* ptr; ccv_nnc_tensor_dot_t* ptr2; if( n <= isort_thresh )
{ insert_sort: for( ptr = left + 1; ptr <= right; ptr++ )
{ for( ptr2 = ptr; ptr2 > left && less_than(ptr2[
0],ptr2[-1], aux); ptr2--) (((t)) = ((ptr2[0])), ((ptr2[0])) =
((ptr2[-1])), ((ptr2[-1])) = ((t))); } break; } else { ccv_nnc_tensor_dot_t
* left0; ccv_nnc_tensor_dot_t* left1; ccv_nnc_tensor_dot_t* right0
; ccv_nnc_tensor_dot_t* right1; ccv_nnc_tensor_dot_t* pivot; ccv_nnc_tensor_dot_t
* a; ccv_nnc_tensor_dot_t* b; ccv_nnc_tensor_dot_t* c; int swap_cnt
= 0; left0 = left; right0 = right; pivot = left + (n/2); if(
n > 40 ) { int d = n / 8; a = left, b = left + d, c = left
+ 2*d; left = less_than(*a, *b, aux) ? (less_than(*b, *c, aux
) ? b : (less_than(*a, *c, aux) ? c : a)) : (less_than(*c, *b
, aux) ? b : (less_than(*a, *c, aux) ? a : c)); a = pivot - d
, b = pivot, c = pivot + d; pivot = less_than(*a, *b, aux) ? (
less_than(*b, *c, aux) ? b : (less_than(*a, *c, aux) ? c : a)
) : (less_than(*c, *b, aux) ? b : (less_than(*a, *c, aux) ? a
: c)); a = right - 2*d, b = right - d, c = right; right = less_than
(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than(*a, *
c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than(
*a, *c, aux) ? a : c)); } a = left, b = pivot, c = right; pivot
= less_than(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than
(*a, *c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than
(*a, *c, aux) ? a : c)); if( pivot != left0 ) { (((t)) = ((*pivot
)), ((*pivot)) = ((*left0)), ((*left0)) = ((t))); pivot = left0
; } left = left1 = left0 + 1; right = right1 = right0; for(;;
) { while( left <= right && !less_than(*pivot, *left
, aux) ) { if( !less_than(*left, *pivot, aux) ) { if( left >
left1 ) (((t)) = ((*left1)), ((*left1)) = ((*left)), ((*left
)) = ((t))); swap_cnt = 1; left1++; } left++; } while( left <=
right && !less_than(*right, *pivot, aux) ) { if( !less_than
(*pivot, *right, aux) ) { if( right < right1 ) (((t)) = ((
*right1)), ((*right1)) = ((*right)), ((*right)) = ((t))); swap_cnt
= 1; right1--; } right--; } if( left > right ) break; (((
t)) = ((*left)), ((*left)) = ((*right)), ((*right)) = ((t)));
swap_cnt = 1; left++; right--; } if( swap_cnt == 0 ) { left =
left0, right = right0; goto insert_sort; } n = ({ typeof ((int
)(left1 - left0)) _a = ((int)(left1 - left0)); typeof ((int)(
left - left1)) _b = ((int)(left - left1)); (_a < _b) ? _a :
_b; }); for( i = 0; i < n; i++ ) (((t)) = ((left0[i])), (
(left0[i])) = ((left[i-n])), ((left[i-n])) = ((t))); n = ({ typeof
((int)(right0 - right1)) _a = ((int)(right0 - right1)); typeof
((int)(right1 - right)) _b = ((int)(right1 - right)); (_a <
_b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left
[i])), ((left[i])) = ((right0[i-n+1])), ((right0[i-n+1])) = (
(t))); n = (int)(left - left1); m = (int)(right1 - right); if
( n > 1 ) { if( m > 1 ) { if( n > m ) { stack[++sp].
lb = left0; stack[sp].ub = left0 + n - 1; left = right0 - m +
1, right = right0; } else { stack[++sp].lb = right0 - m + 1;
stack[sp].ub = right0; left = left0, right = left0 + n - 1; }
} else left = left0, right = left0 + n - 1; } else if( m >
1 ) left = right0 - m + 1, right = right0; else break; } } }
}
1176#undef less_than
1177
1178static int _ccv_nnc_graph_dot_tensor_multiview_count(const ccv_nnc_tensor_multiview_t* const mv)
1179{
1180 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
1181 return 1;
1182 const int count = mv->kind + mv->repeat;
1183 int i, c = 0;
1184 for (i = 0; i < count; i++)
1185 c += _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
1186 return c;
1187}
1188
1189static void _ccv_nnc_graph_dot_tensor_multiview_tensor_dots(const ccv_nnc_tensor_multiview_t* const mv, ccv_nnc_tensor_dot_t* const tensor_dots, int* tensor_index)
1190{
1191 const int count = mv->kind + mv->repeat;
1192 int i;
1193 for (i = 0; i < count; i++)
1194 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1195 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], tensor_dots, tensor_index);
1196 else {
1197 tensor_dots[*tensor_index].name = *tensor_index;
1198 tensor_dots[*tensor_index].start_ptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1199 // Because tv's pointer will get updated, it is not correct in this case to have one tensor_ref.
1200 tensor_dots[*tensor_index].tensor_ref = tensor_dots[*tensor_index].start_ptr;
1201 const size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1202 tensor_dots[*tensor_index].end_ptr = tensor_dots[*tensor_index].start_ptr + dim_size - 1;
1203 ++(*tensor_index);
1204 }
1205}
1206
1207static ccv_nnc_tensor_dot_recovery_t _ccv_nnc_graph_tensor_dot_recovery(const ccv_nnc_graph_t* const graph)
1208{
1209 int i, j;
1210 // Recover tensor relationships for all tensors referenced in the graph.
1211 // Most notably, we have to give these indexes, and find if they point to
1212 // the same memory region, and whether they overlap. These information
1213 // are lost since we converted from symbolic form to the execution form.
1214 // and here we do our best to recover because that is easier to understand
1215 // if we want to present the graph visually (also, we don't want to put this
1216 // information into the tensor or execution graph to avoid overhead, thus,
1217 // recovering is the best we can do).
1218 int tensor_count = 0;
1219 for (i = 0; i < graph->exec_info->rnum; i++)
1220 {
1221 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1222 for (j = 0; j < exec_info->input_size; j++)
1223 if (exec_info->inputs[j])
1224 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[j])((*(int*)(exec_info->inputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->inputs[j]) : 1;
1225 for (j = 0; j < exec_info->output_size; j++)
1226 if (exec_info->outputs[j])
1227 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[j])((*(int*)(exec_info->outputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->outputs[j]) : 1;
1228 }
1229 ccv_nnc_tensor_dot_t* tensor_dots = tensor_count > 0 ? (ccv_nnc_tensor_dot_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_dot_t) * tensor_count) : 0;
1230 int k = 0;
1231 for (i = 0; i < graph->exec_info->rnum; i++)
1232 {
1233 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1234 for (j = 0; j < exec_info->input_size; j++)
1235 {
1236 ccv_nnc_tensor_t* tensor = exec_info->inputs[j];
1237 if (!tensor)
1238 continue;
1239 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1240 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1241 else {
1242 tensor_dots[k].name = k;
1243 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1244 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1245 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1246 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1247 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1248 ++k;
1249 }
1250 }
1251 for (j = 0; j < exec_info->output_size; j++)
1252 {
1253 ccv_nnc_tensor_t* tensor = exec_info->outputs[j];
1254 if (!tensor)
1255 continue;
1256 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1257 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1258 else {
1259 tensor_dots[k].name = k;
1260 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1261 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1262 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1263 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1264 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1265 ++k;
1266 }
1267 }
1268 }
1269 tensor_count = k; // We may over count, now shrink.
1270 // To group overlap memory into one zone, we sort it by start ptr first (secondary by the tensor pointer).
1271 _ccv_nnc_tensor_dot_sort_by_ptr(tensor_dots, tensor_count, 0);
1272 int index = 0, zone = 0;
1273 uintptr_t tensor_ref = tensor_count > 0 ? tensor_dots[0].tensor_ref : 0;
1274 uintptr_t end_ptr = tensor_count > 0 ? tensor_dots[0].end_ptr : 0;
1275 // Then, it is trivial, we go by end ptr. If the next start ptr is still within the end ptr (start ptr <= end ptr),
1276 // they are the same zone.
1277 for (i = 0; i < tensor_count; i++)
1278 {
1279 if (tensor_dots[i].tensor_ref != tensor_ref)
1280 {
1281 tensor_ref = tensor_dots[i].tensor_ref;
1282 ++index;
1283 }
1284 if (tensor_dots[i].start_ptr > end_ptr)
1285 {
1286 end_ptr = ccv_max(end_ptr, tensor_dots[i].end_ptr)({ typeof (end_ptr) _a = (end_ptr); typeof (tensor_dots[i].end_ptr
) _b = (tensor_dots[i].end_ptr); (_a > _b) ? _a : _b; })
;
1287 ++zone;
1288 }
1289 tensor_dots[i].index = index;
1290 tensor_dots[i].zone = zone;
1291 }
1292 // We already have index and zone assigned, but the problem is that these are not very human interpretable (because
1293 // it follows the pointer from low to high, not the tensor creation order). The following code renamed both the index
1294 // and the zone so that it is much more understandable.
1295 const int index_count = index + 1;
1296 const int zone_count = zone + 1;
1297 int* remap = (int*)ccmallocmalloc(sizeof(int) * (tensor_count + index_count + zone_count));
1298 int* rename_index = remap + tensor_count;
1299 int* rename_zone = rename_index + index_count;
1300 for (i = 0; i < tensor_count; i++)
1301 remap[tensor_dots[i].name] = i;
1302 for (i = 0; i < index_count; i++)
1303 rename_index[i] = -1;
1304 for (i = 0; i < zone_count; i++)
1305 rename_zone[i] = -1;
1306 index = 0;
1307 zone = 0;
1308 for (i = 0; i < tensor_count; i++)
1309 {
1310 ccv_nnc_tensor_dot_t* tensor_dot = tensor_dots + remap[i];
1311 if (rename_index[tensor_dot->index] == -1)
1312 rename_index[tensor_dot->index] = index++;
1313 if (rename_zone[tensor_dot->zone] == -1)
1314 rename_zone[tensor_dot->zone] = zone++;
1315 }
1316 ccv_nnc_tensor_dot_recovery_t recovery = {
1317 .dots = tensor_dots,
1318 .remap = remap,
1319 .rename_index = rename_index,
1320 .rename_zone = rename_zone,
1321 };
1322 return recovery;
1323}
1324
1325static void _ccv_nnc_graph_tensor_dot_recovery_free(const ccv_nnc_tensor_dot_recovery_t recovery)
1326{
1327 ccfreefree(recovery.dots);
1328 ccfreefree(recovery.remap);
1329}
1330
1331static void _ccv_nnc_graph_dot_tensor_multiview_one(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int depth, int* tensor_index, FILE* out)
1332{
1333 const int count = mv->kind + mv->repeat;
1334 int i, j;
1335 fputs("|{", out);
1336 for (i = 0; i < count; i++)
1337 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1338 {
1339 fprintf(out, "{%d", i);
1340 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1341 fputc('*', out); // Denotes that we loop on this.
1342 _ccv_nnc_graph_dot_tensor_multiview_one((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], recovery, depth, tensor_index, out);
1343 if (i == count - 1)
1344 fputc('}', out);
1345 else
1346 fputs("}|", out);
1347 } else {
1348 fprintf(out, "{%d", i);
1349 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1350 fputc('*', out); // Denotes that we loop on this.
1351 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1352 fprintf(out, "|zone%d", recovery.rename_zone[tensor_dot->zone]);
1353 for (j = 0; j < depth; j++)
1354 fputc('\'', out);
1355 uintptr_t aptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1356 // For the last one, we don't extend to full ainc.
1357 size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1358 // Print out the range as well.
1359 fprintf(out, "|{%#010x|%#010x}", (uint32_t)aptr, (uint32_t)(aptr + dim_size - 1));
1360 ++(*tensor_index);
1361 if (i == count - 1)
1362 fputc('}', out);
1363 else
1364 fputs("}|", out);
1365 }
1366 fputc('}', out);
1367}
1368
1369static void _ccv_nnc_graph_dot_tensor_multiview(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, int* tensor_index, FILE* out)
1370{
1371 // if it has an alias pointer, or, it is a long form.
1372 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1373 fputc('{', out);
1374 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1375 fprintf(out, "multiview%d", recovery.rename_index[tensor_dot->index]);
1376 int i;
1377 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1378 fputc('\'', out);
1379 if (CCV_GET_TAPE_ALLOC(mv->type)((mv->type) & CCV_TAPE_ALLOC))
1380 fputs(" (t)", out);
1381 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1382 {
1383 _ccv_nnc_graph_dot_tensor_multiview_one(mv, recovery, depth, tensor_index, out);
1384 const ccv_nnc_tensor_t* root = (ccv_nnc_tensor_t*)mv;
1385 while (CCV_IS_TENSOR_MULTIVIEW(root)((*(int*)(root)) & CCV_TENSOR_MULTIVIEW))
1386 root = CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)root)(((ccv_nnc_tensor_multiview_t*)root)->_heap_data ? ((ccv_nnc_tensor_multiview_t
*)root)->_heap_data : ((ccv_nnc_tensor_multiview_t*)root)->
_inline_data)
[0];
1387 fprintf(out, "|%d", root->info.dim[0]);
1388 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(8) && root->info.dim[i]; i++)
1389 fprintf(out, "x%d", root->info.dim[i]);
1390 fputc('}', out);
1391 } else
1392 *tensor_index += _ccv_nnc_graph_dot_tensor_multiview_count(mv);
1393}
1394
1395static void _ccv_nnc_graph_dot_node(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, ccv_nnc_stream_context_t** const streams, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* const tensor_index)
1396{
1397 fprintf(out, "node%d [shape=record,label=\"", exec_index);
1398 _ccv_nnc_graph_dot_exec(exec_index, exec_info, streams, flags, out);
1399 int i;
1400 int k = *tensor_index;
1401 if (exec_info->input_size > 0)
1402 {
1403 fputs("|{Input", out);
1404 for (i = 0; i < exec_info->input_size; i++)
1405 if (exec_info->inputs[i])
1406 {
1407 fputc('|', out);
1408 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1409 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1410 else {
1411 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1412 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1413 ++k;
1414 }
1415 } else
1416 fputs("|-", out);
1417 fputc('}', out);
1418 }
1419 if (exec_info->output_size > 0)
1420 {
1421 fputs("|{Output", out);
1422 for (i = 0; i < exec_info->output_size; i++)
1423 if (exec_info->outputs[i])
1424 {
1425 fputc('|', out);
1426 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1427 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1428 else {
1429 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1430 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1431 ++k;
1432 }
1433 } else
1434 fputs("|-", out);
1435 fputc('}', out);
1436 }
1437 fputs("\"];\n", out);
1438 *tensor_index = k;
1439}
1440
1441static void _ccv_nnc_graph_dot_while_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const ccv_nnc_graph_t* const while_graph, const int flags, const int depth, FILE* out, int* tensor_index)
1442{
1443 int i;
1444 fprintf(out, "label=<<b>while%d </b>Command: ", exec_index);
1445 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1446 fputs(">;\n", out);
1447 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1448 int k = *tensor_index;
1449 if (exec_info->input_size > 0)
1450 {
1451 fputs("{Input|{", out);
1452 for (i = 0; i < exec_info->input_size; i++)
1453 {
1454 if (i > 0)
1455 fputc('|', out);
1456 if (exec_info->inputs[i])
1457 {
1458 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1459 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1460 else {
1461 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1462 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1463 ++k;
1464 }
1465 } else
1466 fputc('-', out);
1467 }
1468 fputs("}}", out);
1469 }
1470 if (exec_info->output_size > 0)
1471 {
1472 if (exec_info->input_size > 0)
1473 fputs("|", out);
1474 fputs("{Output|{", out);
1475 for (i = 0; i < exec_info->output_size; i++)
1476 {
1477 if (i > 0)
1478 fputc('|', out);
1479 if (exec_info->outputs[i])
1480 {
1481 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1482 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1483 else {
1484 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1485 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1486 ++k;
1487 }
1488 } else
1489 fputc('-', out);
1490 }
1491 fputs("}}", out);
1492 }
1493 fputs("}\"];\n", out);
1494 *tensor_index = k;
1495}
1496
1497static void _ccv_nnc_graph_dot_case_of_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* tensor_index)
1498{
1499 int i;
1500 fprintf(out, "label=<<b>caseof%d </b>Command: ", exec_index);
1501 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1502 fputs(">;\n", out);
1503 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1504 int k = *tensor_index;
1505 if (exec_info->input_size > 0)
1506 {
1507 fputs("{Input|{", out);
1508 for (i = 0; i < exec_info->input_size; i++)
1509 {
1510 if (i > 0)
1511 fputc('|', out);
1512 if (exec_info->inputs[i])
1513 {
1514 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1515 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1516 else {
1517 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1518 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1519 ++k;
1520 }
1521 } else
1522 fputc('-', out);
1523 }
1524 fputs("}}", out);
1525 }
1526 if (exec_info->output_size > 0)
1527 {
1528 if (exec_info->input_size > 0)
1529 fputs("|", out);
1530 fputs("{Output|{", out);
1531 for (i = 0; i < exec_info->output_size; i++)
1532 {
1533 if (i > 0)
1534 fputc('|', out);
1535 if (exec_info->outputs[i])
1536 {
1537 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1538 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1539 else {
1540 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1541 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1542 ++k;
1543 }
1544 } else
1545 fputc('-', out);
1546 }
1547 fputs("}}", out);
1548 }
1549 fputs("}\"];\n", out);
1550 *tensor_index = k;
1551}
1552
1553static void _ccv_nnc_graph_dot_sub_graphs(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_tensor_dot_recovery_t p_recovery, const ccv_array_t* const sub_graphs, const int flags, const int depth, FILE* out, int* tensor_index, int* exec_index)
1554{
1555 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
1556 {
1557 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1558 const ccv_nnc_graph_t* const while_graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[0] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0]
- 1)))
;
1559 // Output this node info within this subgraph.
1560 _ccv_nnc_graph_dot_while_label(exec_info, *exec_index, p_recovery, while_graph, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1561 } else if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
1562 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1563 _ccv_nnc_graph_dot_case_of_label(exec_info, *exec_index, p_recovery, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1564 }
1565 ++(*exec_index);
1566 int p;
1567 for (p = 0; p < exec_info->graph_ref_size; p++)
1568 {
1569 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1570 {
1571 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=\"\"\n", *exec_index, *exec_index);
1572 ++(*exec_index);
1573 }
1574 const ccv_nnc_graph_t* const graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[p] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[p]
- 1)))
;
1575 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1576 int i, j;
1577 int k = 0;
1578 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1579 // Output styles.
1580 for (i = 0; i < graph->exec_info->rnum; i++)
1581 {
1582 node_id[i] = *exec_index;
1583 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1584 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0])
1585 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, depth + 1, out, &k, exec_index);
1586 else {
1587 _ccv_nnc_graph_dot_node(exec_info, *exec_index, graph->streams, recovery, flags, depth, out, &k);
1588 ++(*exec_index);
1589 }
1590 }
1591 // Output connections.
1592 for (i = 0; i < graph->exec_info->rnum; i++)
1593 {
1594 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1595 if (exec_info->outgoings)
1596 for (j = 0; j < exec_info->outgoings->rnum; j++)
1597 {
1598 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1599 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1600 // If both are sub-graphs, have both tail and head specified.
1601 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1602 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1603 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1604 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1605 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1606 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1607 else
1608 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1609 }
1610 }
1611 fputs("}\n", out);
1612 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1613 ccfreefree(node_id);
1614 }
1615 // Extra subgraph cluster.
1616 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1617 fputs("}\n", out);
1618}
1619
1620void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out)
1621{
1622 fputs("digraph G {\ncompound=true;\n", out);
1623 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1624 int i, j;
1625 int k = 0, c = 0;
1626 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1627 // Output styles.
1628 for (i = 0; i < graph->exec_info->rnum; i++)
1629 {
1630 node_id[i] = c;
1631 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1632 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0])
1633 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, 1, out, &k, &c);
1634 else {
1635 _ccv_nnc_graph_dot_node(exec_info, c, graph->streams, recovery, flags, 0, out, &k);
1636 ++c;
1637 }
1638 }
1639 // Output connections.
1640 for (i = 0; i < graph->exec_info->rnum; i++)
1641 {
1642 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1643 if (exec_info->outgoings)
1644 for (j = 0; j < exec_info->outgoings->rnum; j++)
1645 {
1646 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1647 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1648 // If both are sub-graphs, have both tail and head specified.
1649 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1650 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1651 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1652 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1653 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1654 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1655 else
1656 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1657 }
1658 }
1659 fputs("}\n", out);
1660 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1661 ccfreefree(node_id);
1662}
1663
1664void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1665{
1666 // exec current node, for synchronous CPU execution, no stream unit.
1667 int i;
1668#define visitor(node, idx, ...) \
1669 do { \
1670 if (node->cmd.cmd == CCV_NNC_NOOP) \
1671 continue; \
1672 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) \
1673 for (i = 0; i < node->graph_ref_size; i++) \
1674 { \
1675 ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[i] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref
? (node)->_heap_graph_ref : (node)->_inline_graph_ref)
[i] - 1)))
; \
1676 ccv_nnc_graph_autotune(sub_graph, max_workspace_size, flags, 0, 0, 0, 0); \
1677 } \
1678 else { \
1679 /* Need to unwrap these tensors */ \
1680 for (i = 0; i < node->input_size + node->output_size; i++) \
1681 if (node->inputs[i] && CCV_IS_TENSOR_MULTIVIEW(node->inputs[i])((*(int*)(node->inputs[i])) & CCV_TENSOR_MULTIVIEW)) \
1682 node->inputs[i] = _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)node->inputs[i]); \
1683 PRINT(CCV_CLI_VERBOSE, "%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node
->cmd.cmd), idx, node->input_size, node->output_size
); fflush(stdout); } } while (0)
; \
1684 for (i = 0; i < node->input_size; i++) \
1685 PRINT(CCV_CLI_VERBOSE, "|-> %d. %p (%p)\n", i + 1, node->inputs[i], (node->inputs[i] ? node->inputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-> %d. %p (%p)\n", i + 1, node->inputs[i], (
node->inputs[i] ? node->inputs[i]->data.u8 : 0)); fflush
(stdout); } } while (0)
; \
1686 for (i = 0; i < node->output_size; i++) \
1687 PRINT(CCV_CLI_VERBOSE, "|<- %d. %p (%p)\n", i + 1, node->outputs[i], (node->outputs[i] ? node->outputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|<- %d. %p (%p)\n", i + 1, node->outputs[i],
(node->outputs[i] ? node->outputs[i]->data.u8 : 0))
; fflush(stdout); } } while (0)
; \
1688 node->cmd = ccv_nnc_cmd_autotune(node->cmd, max_workspace_size, node->hint, flags, node->inputs, node->input_size, node->outputs, node->output_size, 0); \
1689 } \
1690 } while (0)
1691 const ccv_nnc_graph_exec_t* const graph_sources = sources ? sources : (graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0);
1
Assuming 'sources' is null
2
'?' condition is false
3
Assuming the condition is false
4
'?' condition is false
5
'graph_sources' initialized to a null pointer value
1692 const int graph_source_size = source_size ? source_size : (graph->sources ? graph->sources->rnum : 0);
6
Assuming 'source_size' is not equal to 0
7
'?' condition is true
1693 const ccv_nnc_graph_exec_t* const graph_destinations = destinations ? destinations : (graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0);
8
Assuming 'destinations' is non-null
9
'?' condition is true
1694 const int graph_destination_size = destination_size ? destination_size : (graph->destinations ? graph->destinations->rnum : 0);
10
Assuming 'destination_size' is not equal to 0
11
'?' condition is true
1695 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++
) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph
) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1695, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } int _exist_size_
[2] = { (graph_source_size), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size
); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph ==
graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1695, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _exist_size_
[0] = (graph_source_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size
); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_
].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1695, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_
[0] = (graph_destination_size); _exist_size_[1] = 0; _p_ = 0,
_q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph.c"
, 1695, __extension__ __PRETTY_FUNCTION__); })); _incomings_[
(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
graph_source_size); _i_++) { ((void) sizeof (((graph_sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources
)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1695, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (graph_source_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (
_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++
_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1695, __extension__ __PRETTY_FUNCTION__)
; })); if (_incomings_[(graph_destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph.c", 1695, __extension__ __PRETTY_FUNCTION__)
; })); } else if (_incomings_[(graph_destinations)[_i_].d].c >
0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void*)(
((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0)))) + (graph_destinations)[
_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
12
Assuming the condition is false
13
Loop condition is false. Execution continues on line 1695
14
Taking false branch
15
Assuming '_i_' is < 'graph_source_size'
16
Loop condition is true. Entering loop body
17
Dereference of null pointer
1696#undef visitor
1697}
1698
1699void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph)
1700{
1701 int i, j;
1702 for (i = 0; i < graph->exec_info->rnum; i++)
1703 {
1704 ccv_nnc_graph_exec_info_t *info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1705 if (info->_heap_graph_ref)
1706 ccfreefree(info->_heap_graph_ref);
1707 ccv_array_t* outgoings = info->outgoings;
1708 if (outgoings)
1709 ccv_array_free(outgoings);
1710 // We allocate inputs & outputs in continuous fashion, therefore, only need to free the input array.
1711 if (info->inputs)
1712 ccfreefree(info->inputs);
1713 if (info->input_flags)
1714 ccfreefree(info->input_flags);
1715 if (info->updates)
1716 ccfreefree(info->updates);
1717 if ((info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) && info->p_while.inputs)
1718 ccfreefree(info->p_while.inputs);
1719 if (info->schedule.stream_size > 1)
1720 ccfreefree(info->schedule._heap_streams);
1721 if (info->schedule.waits)
1722 ccfreefree(info->schedule.waits);
1723 }
1724 if (graph->tensor_wraps)
1725 {
1726 for (i = 0; i < graph->tensor_wraps->rnum; i++)
1727 {
1728 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, i)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(i)))
;
1729 if (tensor_wrap_array)
1730 {
1731 for (j = 0; j < tensor_wrap_array->size; j++)
1732 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[j]);
1733 ccfreefree(tensor_wrap_array);
1734 }
1735 }
1736 ccv_array_free(graph->tensor_wraps);
1737 }
1738 if (graph->tensor_wraps_refs)
1739 ccv_array_free(graph->tensor_wraps_refs);
1740 if (graph->breakpoints)
1741 ccfreefree(graph->breakpoints);
1742 if (graph->sources)
1743 ccv_array_free(graph->sources);
1744 if (graph->destinations)
1745 ccv_array_free(graph->destinations);
1746 if (graph->streams)
1747 {
1748 // If the graph has parent graph, the default stream is allocated by the parent graph, we need to skip.
1749 if (!graph->p)
1750 ccv_nnc_stream_context_free(graph->streams[0]);
1751 for (i = 1; i < graph->stream_size; i++)
1752 ccv_nnc_stream_context_free(graph->streams[i]);
1753 ccfreefree(graph->streams);
1754 }
1755 if (graph->block_stream_tasks)
1756 ccfreefree(graph->block_stream_tasks);
1757 if (graph->signals)
1758 {
1759 for (i = 0; i < graph->signal_size; i++)
1760 ccv_nnc_stream_signal_free(graph->signals[i]);
1761 ccfreefree(graph->signals);
1762 }
1763 if (graph->extern_signal)
1764 ccv_nnc_stream_signal_free(graph->extern_signal);
1765 if (graph->waits)
1766 ccfreefree(graph->waits);
1767 if (graph->carry_overs)
1768 {
1769 for (i = 0; i < graph->carry_overs->rnum; i++)
1770 {
1771 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t
)(graph->carry_overs)->rsize * (size_t)(i)))
;
1772 _ccv_nnc_graph_tensor_wrap_free(carry_over->from);
1773 _ccv_nnc_graph_tensor_wrap_free(carry_over->to);
1774 }
1775 ccv_array_free(graph->carry_overs);
1776 }
1777 if (graph->sub_graphs)
1778 {
1779 for (i = 0; i < graph->sub_graphs->rnum; i++)
1780 ccv_nnc_graph_free(*(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
);
1781 ccv_array_free(graph->sub_graphs);
1782 }
1783 ccv_array_free(graph->exec_info);
1784 ccfreefree(graph);
1785}