Bug Summary

File:nnc/ccv_nnc_graph.c
Warning:line 810, column 2
Use of zero-allocated memory

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_graph.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model static -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -target-feature +sse2 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -resource-dir /usr/local/lib/clang/8.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_UCONTEXT -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -I /usr/local/include -internal-isystem /usr/local/include -internal-isystem /usr/local/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir /home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fmessage-length 0 -fblocks -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -o /home/liu/buildslave/public_html/analyze/2019-05-04-163002-105371-1 -x c ccv_nnc_graph.c -faddrsig
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_graph.h"
6
7#pragma mark - Level-2 API
8
9ccv_nnc_graph_t* ccv_nnc_graph_new(void)
10{
11 ccv_nnc_graph_t* graph = (ccv_nnc_graph_t*)cccalloccalloc(1, sizeof(ccv_nnc_graph_t));
12 graph->exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), 5, 0);
13 return graph;
14}
15
16void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size)
17{
18 if (!graph->sources)
19 graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), source_size, 0);
20 else
21 ccv_array_clear(graph->sources);
22 int i;
23 for (i = 0; i < source_size; i++)
24 ccv_array_push(graph->sources, sources + i);
25 graph->topsorted = 0;
26}
27
28ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph)
29{
30 return graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
: 0;
31}
32
33int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph)
34{
35 return graph->sources ? graph->sources->rnum : 0;
36}
37
38void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
39{
40 if (!graph->destinations)
41 graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), destination_size, 0);
42 else
43 ccv_array_clear(graph->sources);
44 int i;
45 for (i = 0; i < destination_size; i++)
46 ccv_array_push(graph->destinations, destinations + i);
47 graph->topsorted = 0;
48}
49
50ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph)
51{
52 return graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
: 0;
53}
54
55int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph)
56{
57 return graph->destinations ? graph->destinations->rnum : 0;
58}
59
60void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd)
61{
62 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 62, __extension__ __PRETTY_FUNCTION__); }
))
;
63 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 63, __extension__ __PRETTY_FUNCTION__); }
))
;
64 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
65 exec_info->cmd = cmd;
66}
67
68void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint)
69{
70 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 70, __extension__ __PRETTY_FUNCTION__); }
))
;
71 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 71, __extension__ __PRETTY_FUNCTION__); }
))
;
72 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
73 exec_info->hint = hint;
74}
75
76static int _ccv_nnc_tensor_multiview_level_count(const ccv_nnc_tensor_multiview_t* const mv)
77{
78 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
79 return 1;
80 const int count = mv->kind + mv->repeat;
81 int i, c = 0;
82 for (i = 0; i < count; i++)
83 {
84 ccv_nnc_tensor_t* tv = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i];
85 if (tv == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)))
86 c = ccv_max(c, 1)({ typeof (c) _a = (c); typeof (1) _b = (1); (_a > _b) ? _a
: _b; })
;
87 else
88 c = ccv_max(c, _ccv_nnc_tensor_multiview_level_count((ccv_nnc_tensor_multiview_t*)tv))({ typeof (c) _a = (c); typeof (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)) _b = (_ccv_nnc_tensor_multiview_level_count
((ccv_nnc_tensor_multiview_t*)tv)); (_a > _b) ? _a : _b; }
)
;
89 }
90 return c + 1;
91}
92
93static ccv_nnc_graph_tensor_wrap_t* _ccv_nnc_graph_tensor_wrap_new(const ccv_nnc_tensor_multiview_t* const mv)
94{
95 const int level_count = _ccv_nnc_tensor_multiview_level_count(mv);
96 ccv_nnc_graph_tensor_wrap_t* tensor_wrap = (ccv_nnc_graph_tensor_wrap_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_tensor_wrap_t) + sizeof(ccv_nnc_tensor_t*) * (level_count - 1));
97 tensor_wrap->update_required = 0;
98 tensor_wrap->count = level_count;
99 tensor_wrap->index = 0;
100 tensor_wrap->tensors[0] = (ccv_nnc_tensor_t*)mv;
101 return tensor_wrap;
102}
103
104static void _ccv_nnc_graph_exec_rewind(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
105{
106 if (!info->tensor_wraps_ref)
107 return;
108 int i;
109 assert(info->tensor_wraps_ref <= graph->tensor_wraps->rnum)((void) sizeof ((info->tensor_wraps_ref <= graph->tensor_wraps
->rnum) ? 1 : 0), __extension__ ({ if (info->tensor_wraps_ref
<= graph->tensor_wraps->rnum) ; else __assert_fail (
"info->tensor_wraps_ref <= graph->tensor_wraps->rnum"
, "ccv_nnc_graph.c", 109, __extension__ __PRETTY_FUNCTION__);
}))
;
110 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;;
111 // Rewind from tensor wraps.
112 for (i = 0; i < info->input_size; i++)
113 if (tensor_wrap_array->tensor_wraps[i])
114 info->inputs[i] = tensor_wrap_array->tensor_wraps[i]->tensors[0];
115 const int d = info->input_size;
116 for (i = 0; i < info->output_size; i++)
117 if (tensor_wrap_array->tensor_wraps[d + i])
118 info->outputs[i] = tensor_wrap_array->tensor_wraps[d + i]->tensors[0];
119 const int dd = info->input_size + info->output_size;
120 for (i = 0; i < info->update_size; i++)
121 if (tensor_wrap_array->tensor_wraps[dd + i])
122 info->updates[i] = tensor_wrap_array->tensor_wraps[dd + i]->tensors[0];
123}
124
125static void _ccv_nnc_graph_tensor_wrap_free(ccv_nnc_graph_tensor_wrap_t* const tensor_wrap)
126{
127 ccfreefree(tensor_wrap);
128}
129
130ccv_nnc_graph_tensor_wrap_array_t* ccv_nnc_get_tensor_wrap_array(ccv_nnc_graph_t* const graph, const int tensor_wrap_size, int* const tensor_wraps_ref)
131{
132 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = *tensor_wraps_ref ? (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, *tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(*tensor_wraps_ref
- 1)))
: 0;
133 // Otherwise, find an open slot.
134 if (!tensor_wrap_array_ref)
135 {
136 if (!graph->tensor_wraps)
137 graph->tensor_wraps = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wrap_array_t*), 0, 0);
138 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = 0;
139 ccv_array_push(graph->tensor_wraps, &tensor_wrap_array);
140 tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, graph->tensor_wraps->rnum - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(graph->tensor_wraps
->rnum - 1)))
;
141 *tensor_wraps_ref = graph->tensor_wraps->rnum;
142 }
143 int i;
144 if (*tensor_wrap_array_ref)
145 {
146 if ((*tensor_wrap_array_ref)->size != tensor_wrap_size)
147 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)ccreallocrealloc(*tensor_wrap_array_ref, sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1));
148 for (i = (*tensor_wrap_array_ref)->size; i < tensor_wrap_size; i++)
149 (*tensor_wrap_array_ref)->tensor_wraps[i] = 0;
150 } else
151 *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)cccalloccalloc(sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1), 1);
152 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
153 tensor_wrap_array->size = tensor_wrap_size;
154 return tensor_wrap_array;
155}
156
157void ccv_nnc_set_tensor_wraps(ccv_nnc_graph_tensor_wrap_t** const tensor_wraps, ccv_nnc_tensor_t* const* const tensors, const int tensor_size)
158{
159 int i;
160 for (i = 0; i < tensor_size; i++)
161 if (tensors[i])
162 {
163 if (CCV_IS_TENSOR_MULTIVIEW(tensors[i])((*(int*)(tensors[i])) & CCV_TENSOR_MULTIVIEW) &&
164 ((ccv_nnc_tensor_multiview_t*)tensors[i])->anchor != CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1)
165 {
166 if (!tensor_wraps[i] || tensors[i] != tensor_wraps[i]->tensors[0])
167 {
168 if (tensor_wraps[i])
169 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
170 tensor_wraps[i] = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)tensors[i]);
171 }
172 } else {
173 if (tensor_wraps[i])
174 _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]);
175 tensor_wraps[i] = 0;
176 }
177 }
178}
179
180void ccv_nnc_graph_register_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
181{
182 ccv_nnc_graph_t* p = graph;
183 const ccv_nnc_graph_tensor_wraps_ref_t tensor_wraps_ref = {
184 .d = tensor_wraps_ref_d,
185 .graph = graph,
186 };
187 do {
188 if (!p->tensor_wraps_refs)
189 {
190 p->tensor_wraps_refs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wraps_ref_t), 0, 0);
191 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
192 } else {
193 int i;
194 int has_tensor_wraps_ref = 0;
195 for (i = 0; !has_tensor_wraps_ref && i < p->tensor_wraps_refs->rnum; i++)
196 {
197 ccv_nnc_graph_tensor_wraps_ref_t* tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
198 has_tensor_wraps_ref = (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph);
199 }
200 if (!has_tensor_wraps_ref)
201 ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref);
202 }
203 p = p->p;
204 } while (p);
205}
206
207static void _ccv_nnc_graph_redo_tensor_wraps(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph)
208{
209 int i;
210 const int has_wrap = ccv_nnc_tensors_have_wraps(info->inputs, info->input_size) ||
211 ccv_nnc_tensors_have_wraps(info->outputs, info->output_size) ||
212 ccv_nnc_tensors_have_wraps(info->updates, info->update_size);
213 if (has_wrap)
214 {
215 const int tensor_wrap_size = info->input_size + info->output_size + info->update_size;
216 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = ccv_nnc_get_tensor_wrap_array(graph, tensor_wrap_size, &info->tensor_wraps_ref);
217 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps, info->inputs, info->input_size);
218 const int d = info->input_size;
219 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + d, info->outputs, info->output_size);
220 const int dd = info->input_size + info->output_size;
221 ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + dd, info->updates, info->update_size);
222 } else if (info->tensor_wraps_ref) {
223 ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref
- 1)))
;
224 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref;
225 if (tensor_wrap_array)
226 {
227 for (i = 0; i < tensor_wrap_array->size; i++)
228 if (tensor_wrap_array->tensor_wraps[i])
229 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[i]);
230 ccfreefree(tensor_wrap_array);
231 *tensor_wrap_array_ref = 0;
232 info->tensor_wraps_ref = 0;
233 }
234 }
235}
236
237static void _ccv_nnc_graph_deregister_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d)
238{
239 ccv_nnc_graph_t* p = graph;
240 do {
241 int i;
242 // Remove from the array.
243 if (p->tensor_wraps_refs)
244 for (i = 0; i < p->tensor_wraps_refs->rnum; i++)
245 {
246 ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t
)(p->tensor_wraps_refs)->rsize * (size_t)(i)))
;
247 if (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph)
248 {
249 --p->tensor_wraps_refs->rnum;
250 if (i < p->tensor_wraps_refs->rnum)
251 memcpy(tensor_wraps_ref, tensor_wraps_ref + 1, sizeof(ccv_nnc_graph_exec_t) * (p->tensor_wraps_refs->rnum - i));
252 break;
253 }
254 }
255 p = p->p;
256 } while (p);
257}
258
259void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size)
260{
261 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 261, __extension__ __PRETTY_FUNCTION__);
}))
;
262 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 262, __extension__ __PRETTY_FUNCTION__);
}))
;
263 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
264 assert(input_flag_size <= info->input_size)((void) sizeof ((input_flag_size <= info->input_size) ?
1 : 0), __extension__ ({ if (input_flag_size <= info->
input_size) ; else __assert_fail ("input_flag_size <= info->input_size"
, "ccv_nnc_graph.c", 264, __extension__ __PRETTY_FUNCTION__);
}))
;
265 assert(output_flag_size <= info->output_size)((void) sizeof ((output_flag_size <= info->output_size)
? 1 : 0), __extension__ ({ if (output_flag_size <= info->
output_size) ; else __assert_fail ("output_flag_size <= info->output_size"
, "ccv_nnc_graph.c", 265, __extension__ __PRETTY_FUNCTION__);
}))
;
266 if (info->input_size + info->output_size == 0)
267 return;
268 if (!info->input_flags)
269 {
270 info->input_flags = (int*)cccalloccalloc(info->input_size + info->output_size, sizeof(int));
271 info->output_flags = info->input_flags + info->input_size;
272 }
273 if (input_flag_size > 0)
274 memcpy(info->input_flags, input_flags, sizeof(int) * input_flag_size);
275 if (output_flag_size > 0)
276 memcpy(info->output_flags, output_flags, sizeof(int) * output_flag_size);
277}
278
279void ccv_nnc_graph_exec_set_peer(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t peer_exec)
280{
281 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 281, __extension__ __PRETTY_FUNCTION__);
}))
;
282 assert(exec.d >= 0)((void) sizeof ((exec.d >= 0) ? 1 : 0), __extension__ ({ if
(exec.d >= 0) ; else __assert_fail ("exec.d >= 0", "ccv_nnc_graph.c"
, 282, __extension__ __PRETTY_FUNCTION__); }))
;
283 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 283, __extension__ __PRETTY_FUNCTION__);
}))
;
284 assert(peer_exec.graph == graph || peer_exec.graph == graph->peer)((void) sizeof ((peer_exec.graph == graph || peer_exec.graph ==
graph->peer) ? 1 : 0), __extension__ ({ if (peer_exec.graph
== graph || peer_exec.graph == graph->peer) ; else __assert_fail
("peer_exec.graph == graph || peer_exec.graph == graph->peer"
, "ccv_nnc_graph.c", 284, __extension__ __PRETTY_FUNCTION__);
}))
;
285 assert(peer_exec.d >= 0)((void) sizeof ((peer_exec.d >= 0) ? 1 : 0), __extension__
({ if (peer_exec.d >= 0) ; else __assert_fail ("peer_exec.d >= 0"
, "ccv_nnc_graph.c", 285, __extension__ __PRETTY_FUNCTION__);
}))
;
286 if (peer_exec.graph == graph)
287 { assert(peer_exec.d < graph->exec_info->rnum)((void) sizeof ((peer_exec.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (peer_exec.d < graph->exec_info
->rnum) ; else __assert_fail ("peer_exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 287, __extension__ __PRETTY_FUNCTION__);
}))
; }
288 else
289 { assert(peer_exec.d < graph->peer->exec_info->rnum)((void) sizeof ((peer_exec.d < graph->peer->exec_info
->rnum) ? 1 : 0), __extension__ ({ if (peer_exec.d < graph
->peer->exec_info->rnum) ; else __assert_fail ("peer_exec.d < graph->peer->exec_info->rnum"
, "ccv_nnc_graph.c", 289, __extension__ __PRETTY_FUNCTION__);
}))
; }
290 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
291 exec_info->peer_ref = peer_exec.d + 1;
292}
293
294static ccv_nnc_tensor_t* _ccv_nnc_any_tensor_from_tensor_multiview(ccv_nnc_tensor_multiview_t* const mv)
295{
296 ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv;
297 while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
298 {
299 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
300 const int count = 0;
301 const int off = mv->kind;
302 const int mod = mv->repeat;
303 // If reached the root.
304 tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[count >= off ? ((count - off) % mod) + off : count]; // Unwrap.
305 }
306 return tensor;
307}
308
309void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
310{
311 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 311, __extension__ __PRETTY_FUNCTION__);
}))
;
312 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 312, __extension__ __PRETTY_FUNCTION__);
}))
;
313 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
314 // De-register from the graph if it contains multiview tensors.
315 if (info->tensor_wraps_ref)
316 _ccv_nnc_graph_deregister_tensor_wraps(graph, info->tensor_wraps_ref - 1);
317 // In case it is already executed, rewind.
318 _ccv_nnc_graph_exec_rewind(info, graph);
319 if (input_size == 0 && output_size == 0)
320 {
321 if (info->input_size > 0 || info->output_size > 0)
322 ccfreefree(info->inputs);
323 info->inputs = 0;
324 info->outputs = 0;
325 info->input_size = 0;
326 info->output_size = 0;
327 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
328 if (info->tensor_wraps_ref)
329 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
330 return;
331 }
332 if (info->inputs)
333 info->inputs = (ccv_nnc_tensor_t**)ccreallocrealloc(info->inputs, sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
334 else
335 info->inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
336 info->outputs = info->inputs + input_size;
337 if (inputs)
338 memcpy(info->inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
339 if (outputs)
340 memcpy(info->outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
341 int i;
342 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
343 for (i = 0; i < input_size + output_size; i++)
344 if (info->inputs[i])
345 {
346 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i])((*(int*)(info->inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i];
347 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
348 }
349 info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes);
350 info->input_size = input_size;
351 info->output_size = output_size;
352 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
353 // Register again if the tensor wraps exist.
354 if (info->tensor_wraps_ref)
355 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
356 // Free flags.
357 if (info->input_flags)
358 {
359 ccfreefree(info->input_flags);
360 info->input_flags = info->output_flags = 0;
361 }
362}
363
364void ccv_nnc_graph_exec_add_update(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update)
365{
366 assert(CCV_IS_TENSOR_MULTIVIEW(update))((void) sizeof ((((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (((*(int*)(update)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(update)", "ccv_nnc_graph.c"
, 366, __extension__ __PRETTY_FUNCTION__); }))
;
367 assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1
: 0), __extension__ ({ if (exec.d < graph->exec_info->
rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 367, __extension__ __PRETTY_FUNCTION__);
}))
;
368 assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__
({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph"
, "ccv_nnc_graph.c", 368, __extension__ __PRETTY_FUNCTION__);
}))
;
369 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(exec.d)))
;
370 const int register_tensor_wraps = !info->tensor_wraps_ref;
371 const int update_index = info->update_size;
372 ++info->update_size;
373 if (info->updates)
374 info->updates = (ccv_nnc_tensor_t**)ccreallocrealloc(info->updates, sizeof(ccv_nnc_tensor_t*) * info->update_size);
375 else
376 info->updates = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * info->update_size);
377 info->updates[update_index] = update;
378 _ccv_nnc_graph_redo_tensor_wraps(info, graph);
379 if (register_tensor_wraps)
380 ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1);
381}
382
383ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
384{
385 int d = graph->exec_info->rnum;
386 ccv_nnc_graph_exec_info_t info = {
387 .cmd = cmd,
388 .hint = hint,
389 .input_size = input_size,
390 .output_size = output_size,
391 };
392 assert(inputs || input_size == 0)((void) sizeof ((inputs || input_size == 0) ? 1 : 0), __extension__
({ if (inputs || input_size == 0) ; else __assert_fail ("inputs || input_size == 0"
, "ccv_nnc_graph.c", 392, __extension__ __PRETTY_FUNCTION__);
}))
;
393 assert(outputs || output_size == 0)((void) sizeof ((outputs || output_size == 0) ? 1 : 0), __extension__
({ if (outputs || output_size == 0) ; else __assert_fail ("outputs || output_size == 0"
, "ccv_nnc_graph.c", 393, __extension__ __PRETTY_FUNCTION__);
}))
;
394 if (input_size > 0 || output_size > 0)
395 {
396 info.inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size));
397 info.outputs = info.inputs + input_size;
398 if (inputs)
399 memcpy(info.inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size);
400 if (outputs)
401 memcpy(info.outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size);
402 info.input_size = input_size;
403 info.output_size = output_size;
404 int i;
405 int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0;
406 for (i = 0; i < input_size + output_size; i++)
407 if (info.inputs[i])
408 {
409 ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i])((*(int*)(info.inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i];
410 tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype;
411 }
412 info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes);
413 }
414 _ccv_nnc_graph_redo_tensor_wraps(&info, graph);
415 // Add itself to the graph's wraps array, this will help the run time when we run the graph and do unwrapping.
416 if (info.tensor_wraps_ref)
417 ccv_nnc_graph_register_tensor_wraps(graph, info.tensor_wraps_ref - 1);
418 ccv_array_push(graph->exec_info, &info);
419 return (ccv_nnc_graph_exec_t){
420 .d = d,
421 .graph = graph,
422 };
423}
424
425void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to)
426{
427 ccv_nnc_graph_tensor_carry_over_t carry_over = {
428 .from = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)from),
429 .to = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)to)
430 };
431 if (!graph->carry_overs)
432 graph->carry_overs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_carry_over_t), 0, 0);
433 ccv_array_push(graph->carry_overs, &carry_over);
434}
435
436int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
437{
438 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 438, __extension__ __PRETTY_FUNCTION__);
}))
;
439 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 439, __extension__ __PRETTY_FUNCTION__);
}))
;
440 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 440, __extension__ __PRETTY_FUNCTION__);
}))
;
441 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 441, __extension__ __PRETTY_FUNCTION__);
}))
;
442 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
443 if (src_info->outgoings == 0)
444 src_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0);
445 else {
446 int i;
447 // Check if this is already connected, if so, skip.
448 for (i = 0; i < src_info->outgoings->rnum; i++)
449 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
450 return -1;
451 }
452 ccv_array_push(src_info->outgoings, &destination.d);
453 graph->topsorted = 0;
454 return 0;
455}
456
457int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination)
458{
459 assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__
({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph"
, "ccv_nnc_graph.c", 459, __extension__ __PRETTY_FUNCTION__);
}))
;
460 assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__
({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph"
, "ccv_nnc_graph.c", 460, __extension__ __PRETTY_FUNCTION__);
}))
;
461 assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ?
1 : 0), __extension__ ({ if (source.d < graph->exec_info
->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 461, __extension__ __PRETTY_FUNCTION__);
}))
;
462 assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (destination.d < graph->
exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum"
, "ccv_nnc_graph.c", 462, __extension__ __PRETTY_FUNCTION__);
}))
;
463 ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(source.d)))
;
464 if (!src_info->outgoings)
465 return -1;
466 int i, j = -1;
467 // Check if this is already connected, if so, skip.
468 for (i = 0; i < src_info->outgoings->rnum; i++)
469 if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(i)))
== destination.d)
470 {
471 j = i;
472 break;
473 }
474 if (j < 0)
475 return -1;
476 if (j < src_info->outgoings->rnum - 1)
477 *(int*)ccv_array_get(src_info->outgoings, j)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(j)))
= *(int*)ccv_array_get(src_info->outgoings, src_info->outgoings->rnum - 1)((void*)(((char*)((src_info->outgoings)->data)) + (size_t
)(src_info->outgoings)->rsize * (size_t)(src_info->outgoings
->rnum - 1)))
;
478 --src_info->outgoings->rnum;
479 ccv_nnc_graph_exec_info_t* dest_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, destination.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(destination.d)))
;
480 if (dest_info->outgoings)
481 for (i = 0; i < dest_info->outgoings->rnum; i++)
482 ccv_array_add_unique_int(src_info->outgoings, *(int*)ccv_array_get(dest_info->outgoings, i)((void*)(((char*)((dest_info->outgoings)->data)) + (size_t
)(dest_info->outgoings)->rsize * (size_t)(i)))
);
483 graph->topsorted = 0;
484 return 0;
485}
486
487int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph)
488{
489 return graph->exec_info ? graph->exec_info->rnum : 0;
490}
491
492void* ccv_nnc_graph_buffer(ccv_nnc_graph_t* const graph, int size)
493{
494 if (graph->buffer_size >= size)
495 return graph->buffer;
496 graph->buffer_size = size;
497 graph->buffer = (graph->buffer) ? ccreallocrealloc(graph->buffer, size) : ccmallocmalloc(size);
498 return graph->buffer;
499}
500
501void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size)
502{
503 assert(exec_cvt_size == graph->exec_info->rnum)((void) sizeof ((exec_cvt_size == graph->exec_info->rnum
) ? 1 : 0), __extension__ ({ if (exec_cvt_size == graph->exec_info
->rnum) ; else __assert_fail ("exec_cvt_size == graph->exec_info->rnum"
, "ccv_nnc_graph.c", 503, __extension__ __PRETTY_FUNCTION__);
}))
;
504 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 504, __extension__ __PRETTY_FUNCTION__);
}))
;
505 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 505, __extension__ __PRETTY_FUNCTION__);
}))
;
506 int i, j;
507 for (i = 0; i < exec_cvt_size; i++)
508 exec_cvt[i] = -1;
509 ccv_array_t* exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), graph->exec_info->rnum, 0);
510 // If there are breakpoints, it is more complicated, we first start to the breakpoints, and then continue from the breakpoints to the destinations.
511 if (graph->breakpoint_size)
512 {
513 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, graph->breakpoints, graph->breakpoint_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; } ccv_nnc_incoming_t
; const int _heap_mem_ = (graph->exec_info->rnum > 1024
); int _i_, _j_; ccv_nnc_incoming_t* _incomings_; if (_heap_mem_
) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (graph->exec_info->rnum) + sizeof(int32_t) * (graph
->exec_info->rnum) * 2); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_info
->rnum) + sizeof(int32_t) * (graph->exec_info->rnum)
* 2); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); for (_i_ = 0; _i_ < (graph->
exec_info->rnum); _i_++) _incomings_[_i_].r = 1; int32_t* _exists_
[2] = { (int32_t*)(_incomings_ + (graph->exec_info->rnum
)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) +
(graph->exec_info->rnum), }; for (_i_ = 0; _i_ < (graph
->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (!_incomings_[_idx_].r) continue; _incomings_
[_idx_].r = 0; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char
*)((graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_
< ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)(
(void*)(((char*)((graph->exec_info)->data)) + (size_t)(
graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[(graph->breakpoints)[_i_].d].d = 1; } for
(_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((
void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph
->sources)->data)) + (size_t)(graph->sources)->rsize
* (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 1; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _d_ < (graph->breakpoint_size)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _d_ < (graph->breakpoint_size)) {
_exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; }
} } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); }
for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) {
((void) sizeof (((graph->breakpoints)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_]
.graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[(graph->breakpoints)[_i_].d].r) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph->breakpoints
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail (
"_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 513, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(graph->breakpoints)[_i_].d].c > 0) continue; _visit_->
node[_visit_->size].index = (((graph->breakpoints)[_i_]
.d)); _visit_->node[_visit_->size].term = ((_incomings_
[(graph->breakpoints)[_i_].d].d)); ++_visit_->size;; } if
(_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
((_visit_->size <= (graph->exec_info->rnum)) ? 1
: 0), __extension__ ({ if (_visit_->size <= (graph->
exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)"
, "ccv_nnc_graph.c", 513, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
514 for (i = 0; i < graph->breakpoint_size; i++)
515 exec_cvt[graph->breakpoints[i].d] = -2; // Mark this as breakpoints, so we will skip the first round.
516 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
517 assert(!node->peer_ref)((void) sizeof ((!node->peer_ref) ? 1 : 0), __extension__ (
{ if (!node->peer_ref) ; else __assert_fail ("!node->peer_ref"
, "ccv_nnc_graph.c", 517, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a peer ref, we cannot fix it up.
518 if (exec_cvt[idx] == -2) // Skip breakpoint.
519 continue;
520 // Loop over node and push to the array.
521 ccv_array_push(exec_info, node);
522 // Go to its sub-graph to fix exec_idx
523 for (i = 0; i < node->graph_ref_size; i++)
524 {
525 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
526 if (graph_ref >= 0)
527 {
528 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
529 sub_graph->exec_idx = exec_info->rnum;
530 }
531 }
532 exec_cvt[idx] = exec_info->rnum - 1;
533 } ccv_nnc_graph_visit_endfor} }
534 ccv_nnc_graph_visit_free(visit);
535 graph->breakpoint_offset = exec_info->rnum;
536 visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; } ccv_nnc_incoming_t
; const int _heap_mem_ = (graph->exec_info->rnum > 1024
); int _i_, _j_; ccv_nnc_incoming_t* _incomings_; if (_heap_mem_
) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (graph->exec_info->rnum) + sizeof(int32_t) * (graph
->exec_info->rnum) * 2); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_info
->rnum) + sizeof(int32_t) * (graph->exec_info->rnum)
* 2); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); for (_i_ = 0; _i_ < (graph->
exec_info->rnum); _i_++) _incomings_[_i_].r = 1; int32_t* _exists_
[2] = { (int32_t*)(_incomings_ + (graph->exec_info->rnum
)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) +
(graph->exec_info->rnum), }; for (_i_ = 0; _i_ < (graph
->breakpoint_size); _i_++) { ((void) sizeof (((graph->breakpoints
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->
breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } int
_exist_size_[2] = { (graph->breakpoint_size), 0, }; int _p_
= 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (!_incomings_[_idx_
].r) continue; _incomings_[_idx_].r = 0; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->destinations
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->breakpoint_size); _i_++) { ((void) sizeof ((
(graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c"
, 536, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 1; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _d_ < (graph->destinations->rnum)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _d_ < (graph->destinations->rnum
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph->destinations->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r) continue; if (!(0)) {
((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((void*
)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0)
? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 536, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 536, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
537 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
538 assert(!node->peer_ref)((void) sizeof ((!node->peer_ref) ? 1 : 0), __extension__ (
{ if (!node->peer_ref) ; else __assert_fail ("!node->peer_ref"
, "ccv_nnc_graph.c", 538, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a peer ref, we cannot fix it up.
539 // Loop over node and push to the array.
540 ccv_array_push(exec_info, node);
541 // Go to its sub-graph to fix exec_idx
542 for (i = 0; i < node->graph_ref_size; i++)
543 {
544 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
545 if (graph_ref >= 0)
546 {
547 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
548 sub_graph->exec_idx = exec_info->rnum;
549 }
550 }
551 exec_cvt[idx] = exec_info->rnum - 1;
552 } ccv_nnc_graph_visit_endfor} }
553 ccv_nnc_graph_visit_free(visit);
554 for (i = 0; i < graph->breakpoint_size; i++)
555 { assert(exec_cvt[graph->breakpoints[i].d] >= 0)((void) sizeof ((exec_cvt[graph->breakpoints[i].d] >= 0
) ? 1 : 0), __extension__ ({ if (exec_cvt[graph->breakpoints
[i].d] >= 0) ; else __assert_fail ("exec_cvt[graph->breakpoints[i].d] >= 0"
, "ccv_nnc_graph.c", 555, __extension__ __PRETTY_FUNCTION__);
}))
; } // All breakpoints should be assigned.
556 } else {
557 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_info->rnum) - 1)); _visit_->size = 0;
do { typedef struct { int8_t d; int8_t r; uint16_t c; } ccv_nnc_incoming_t
; const int _heap_mem_ = (graph->exec_info->rnum > 1024
); int _i_, _j_; ccv_nnc_incoming_t* _incomings_; if (_heap_mem_
) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (graph->exec_info->rnum) + sizeof(int32_t) * (graph
->exec_info->rnum) * 2); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_info
->rnum) + sizeof(int32_t) * (graph->exec_info->rnum)
* 2); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); for (_i_ = 0; _i_ < (graph->
exec_info->rnum); _i_++) _incomings_[_i_].r = 1; int32_t* _exists_
[2] = { (int32_t*)(_incomings_ + (graph->exec_info->rnum
)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) +
(graph->exec_info->rnum), }; for (_i_ = 0; _i_ < (graph
->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (!_incomings_[_idx_].r) continue; _incomings_
[_idx_].r = 0; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char
*)((graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_
< ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)(
(void*)(((char*)((graph->exec_info)->data)) + (size_t)(
graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->destinations
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->sources->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 1; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _d_ < (graph->destinations->rnum)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _d_ < (graph->destinations->rnum
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph->destinations->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].graph == graph
) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r) continue; if (!(0)) {
((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((void*
)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0)
? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 557, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (graph
->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_
->size <= (graph->exec_info->rnum)) ; else __assert_fail
("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c"
, 557, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
558 ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))) const node __attribute__
((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((
graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0)))) + idx;
{
559 assert(!node->peer_ref)((void) sizeof ((!node->peer_ref) ? 1 : 0), __extension__ (
{ if (!node->peer_ref) ; else __assert_fail ("!node->peer_ref"
, "ccv_nnc_graph.c", 559, __extension__ __PRETTY_FUNCTION__);
}))
; // If node has a peer ref, we cannot fix it up.
560 // Loop over node and push to the array.
561 ccv_array_push(exec_info, node);
562 // Go to its sub-graph to fix exec_idx
563 for (i = 0; i < node->graph_ref_size; i++)
564 {
565 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
566 if (graph_ref >= 0)
567 {
568 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(graph_ref)))
;
569 sub_graph->exec_idx = exec_info->rnum;
570 }
571 }
572 exec_cvt[idx] = exec_info->rnum - 1;
573 } ccv_nnc_graph_visit_endfor} }
574 ccv_nnc_graph_visit_free(visit);
575 }
576 assert(graph->exec_info->rnum == exec_info->rnum)((void) sizeof ((graph->exec_info->rnum == exec_info->
rnum) ? 1 : 0), __extension__ ({ if (graph->exec_info->
rnum == exec_info->rnum) ; else __assert_fail ("graph->exec_info->rnum == exec_info->rnum"
, "ccv_nnc_graph.c", 576, __extension__ __PRETTY_FUNCTION__);
}))
;
577 ccv_array_free(graph->exec_info);
578 graph->exec_info = exec_info;
579 for (i = 0; i < graph->sources->rnum; i++)
580 {
581 ccv_nnc_graph_exec_t* const source = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, i)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(i)))
;
582 source->d = exec_cvt[source->d];
583 }
584 for (i = 0; i < graph->destinations->rnum; i++)
585 {
586 ccv_nnc_graph_exec_t* const destination = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
587 destination->d = exec_cvt[destination->d];
588 }
589 // Update all outgoings to reflect the latest.
590 for (i = 0; i < exec_info->rnum; i++)
591 {
592 ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(exec_info, i)((void*)(((char*)((exec_info)->data)) + (size_t)(exec_info
)->rsize * (size_t)(i)))
;
593 if (info->outgoings)
594 for (j = 0; j < info->outgoings->rnum; j++)
595 *(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
= exec_cvt[*(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)(
info->outgoings)->rsize * (size_t)(j)))
];
596 }
597 graph->topsorted = 1;
598}
599
600typedef struct {
601 int device_id;
602 int exec_idx;
603 ccv_array_t* signal_set;
604 ccv_array_t* command_set; // The set of command executed in this stream. In case there is a tie (on rank). We will check this.
605} ccv_nnc_stream_data_t;
606
607static void _ccv_nnc_graph_schedule_assign_signals(ccv_array_t* const incoming, ccv_nnc_graph_exec_info_t* const node, ccv_array_t* const stream_data, int* const signal_size, ccv_nnc_graph_exec_info_t* const exec_info, const int exec_info_size)
608{
609 assert(incoming->rnum > 0)((void) sizeof ((incoming->rnum > 0) ? 1 : 0), __extension__
({ if (incoming->rnum > 0) ; else __assert_fail ("incoming->rnum > 0"
, "ccv_nnc_graph.c", 609, __extension__ __PRETTY_FUNCTION__);
}))
;
610 int i, j, k;
611 int wait_size = 0, max_wait_size = 0;
612 for (i = 0; i < incoming->rnum; i++)
613 {
614 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
615 ccv_nnc_graph_exec_info_t* const incoming_exec_info = exec_info + incoming_idx;
616 assert(incoming_exec_info->schedule.stream_size > 0)((void) sizeof ((incoming_exec_info->schedule.stream_size >
0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->schedule
.stream_size > 0) ; else __assert_fail ("incoming_exec_info->schedule.stream_size > 0"
, "ccv_nnc_graph.c", 616, __extension__ __PRETTY_FUNCTION__);
}))
;
617 max_wait_size += incoming_exec_info->schedule.stream_size;
618 }
619 int waits[ccv_max(1, max_wait_size)({ typeof (1) _a = (1); typeof (max_wait_size) _b = (max_wait_size
); (_a > _b) ? _a : _b; })
];
620 assert(node->schedule.stream_size > 0)((void) sizeof ((node->schedule.stream_size > 0) ? 1 : 0
), __extension__ ({ if (node->schedule.stream_size > 0)
; else __assert_fail ("node->schedule.stream_size > 0"
, "ccv_nnc_graph.c", 620, __extension__ __PRETTY_FUNCTION__);
}))
;
621 for (i = 0; i < incoming->rnum; i++)
622 {
623 const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)->
rsize * (size_t)(i)))
;
624 assert(incoming_idx < exec_info_size)((void) sizeof ((incoming_idx < exec_info_size) ? 1 : 0), __extension__
({ if (incoming_idx < exec_info_size) ; else __assert_fail
("incoming_idx < exec_info_size", "ccv_nnc_graph.c", 624,
__extension__ __PRETTY_FUNCTION__); }))
;
625 assert(incoming_idx >= 0)((void) sizeof ((incoming_idx >= 0) ? 1 : 0), __extension__
({ if (incoming_idx >= 0) ; else __assert_fail ("incoming_idx >= 0"
, "ccv_nnc_graph.c", 625, __extension__ __PRETTY_FUNCTION__);
}))
;
626 ccv_nnc_graph_exec_info_t* const incoming_exec_info = exec_info + incoming_idx;
627 assert(incoming_exec_info->schedule.stream_size > 0)((void) sizeof ((incoming_exec_info->schedule.stream_size >
0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->schedule
.stream_size > 0) ; else __assert_fail ("incoming_exec_info->schedule.stream_size > 0"
, "ccv_nnc_graph.c", 627, __extension__ __PRETTY_FUNCTION__);
}))
;
628 int stream_synced = 1;
629 // If the current node's stream is a subset of the incoming node's stream, there
630 // is no need to sync with signal, because we are already synced with the incoming.
631 for (j = 0; stream_synced && j < node->schedule.stream_size; j++)
632 {
633 const int s = SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[j];
634 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 634
, __extension__ __PRETTY_FUNCTION__); }))
;
635 int flag = 0;
636 for (k = 0; !flag && k < incoming_exec_info->schedule.stream_size; k++)
637 flag = (SCHEDULE_STREAMS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_streams : (incoming_exec_info->schedule
)._heap_streams)
[k] == s);
638 stream_synced = flag;
639 }
640 if (stream_synced)
641 continue;
642 // Otherwise, find the streams we need to sync with, and create signals for these.
643 for (j = 0; j < incoming_exec_info->schedule.stream_size; j++)
644 {
645 const int s = SCHEDULE_STREAMS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_streams : (incoming_exec_info->schedule
)._heap_streams)
[j];
646 assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >=
0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 646
, __extension__ __PRETTY_FUNCTION__); }))
;
647 int flag = 0;
648 for (k = 0; !flag && k < node->schedule.stream_size; k++)
649 flag = (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[k] == s);
650 if (!flag) // Need to have a signal.
651 {
652 if (SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j] < 0)
653 SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j] = (*signal_size)++;
654 else {
655 int flag = 0;
656 // If any of the stream the current node has already seen this signal, we are good already.
657 for (k = 0; !flag && k < node->schedule.stream_size; k++)
658 {
659 assert(SCHEDULE_STREAMS(node->schedule)[k] >= 0)((void) sizeof ((((node->schedule).stream_size <= 1 ? (
node->schedule)._inline_streams : (node->schedule)._heap_streams
)[k] >= 0) ? 1 : 0), __extension__ ({ if (((node->schedule
).stream_size <= 1 ? (node->schedule)._inline_streams :
(node->schedule)._heap_streams)[k] >= 0) ; else __assert_fail
("SCHEDULE_STREAMS(node->schedule)[k] >= 0", "ccv_nnc_graph.c"
, 659, __extension__ __PRETTY_FUNCTION__); }))
;
660 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(node->schedule)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((node->schedule).stream_size <=
1 ? (node->schedule)._inline_streams : (node->schedule
)._heap_streams)[k])))
;
661 flag = (data->signal_set && ccv_array_find_int(data->signal_set, SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j]));
662 }
663 if (flag)
664 continue;
665 }
666 // Otherwise, we need to wait for this. Currently, our granularity is about wait on all streams.
667 waits[wait_size++] = SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j];
668 // All streams on this node have seen this signal.
669 for (k = 0; k < node->schedule.stream_size; k++)
670 {
671 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(node->schedule)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((node->schedule).stream_size <=
1 ? (node->schedule)._inline_streams : (node->schedule
)._heap_streams)[k])))
;
672 if (!data->signal_set)
673 data->signal_set = ccv_array_new(sizeof(int), 0, 0);
674 ccv_array_push(data->signal_set, &SCHEDULE_SIGNALS(incoming_exec_info->schedule)((incoming_exec_info->schedule).stream_size <= 1 ? (incoming_exec_info
->schedule)._inline_signals : (incoming_exec_info->schedule
)._heap_signals)
[j]);
675 }
676 }
677 }
678 }
679 node->schedule.wait_size = wait_size;
680 if (wait_size > 0)
681 {
682 node->schedule.waits = node->schedule.waits ? ccreallocrealloc(node->schedule.waits, sizeof(int) * wait_size) : ccmallocmalloc(sizeof(int) * wait_size);
683 memcpy(node->schedule.waits, waits, sizeof(int) * wait_size);
684 }
685}
686
687typedef struct {
688 int rank;
689 ccv_array_t* outgoings;
690} ccv_nnc_incoming_t;
691
692static int _ccv_nnc_device_ids_for_stream_data(ccv_nnc_graph_exec_info_t* const node, const int device_id, ccv_array_t* const stream_data, int* const device_ids, const int max_device_id_size)
693{
694 int device_id_size = ccv_nnc_device_ids_for_io(node->inputs, node->input_size, node->outputs, node->output_size, device_ids, max_device_id_size);
695 if (device_id_size == 0)
696 {
697 // If there is a default data, use that device id. Otherwise, use the device id passed in (this will be the default data device id).
698 if (stream_data->rnum > 0)
699 {
700 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
701 device_ids[0] = default_data->device_id;
702 } else
703 device_ids[0] = device_id >= 0 ? device_id : 0;
704 device_id_size = 1;
705 }
706 return device_id_size;
707}
708
709static void _ccv_nnc_graph_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type, const int device_id, ccv_nnc_stream_context_t* const stream_context)
710{
711 assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources
->rnum) ? 1 : 0), __extension__ ({ if (graph->sources &&
graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum"
, "ccv_nnc_graph.c", 711, __extension__ __PRETTY_FUNCTION__);
}))
;
1
Assuming the condition is true
2
Assuming the condition is true
3
Taking true branch
712 assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph->
destinations->rnum) ? 1 : 0), __extension__ ({ if (graph->
destinations && graph->destinations->rnum) ; else
__assert_fail ("graph->destinations && graph->destinations->rnum"
, "ccv_nnc_graph.c", 712, __extension__ __PRETTY_FUNCTION__);
}))
;
4
Assuming the condition is true
5
Assuming the condition is true
6
Taking true branch
713 assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__
({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted"
, "ccv_nnc_graph.c", 713, __extension__ __PRETTY_FUNCTION__);
}))
; // Only support this on a topsorted graph.
7
Assuming the condition is true
8
Taking true branch
714 const int exec_info_size = graph->exec_info->rnum;
715 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0)))
;
716 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, exec_info, exec_info_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct
{ int8_t d; int8_t r; uint16_t c; } ccv_nnc_incoming_t; const
int _heap_mem_ = (exec_info_size > 1024); int _i_, _j_; ccv_nnc_incoming_t
* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t
*)malloc(sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof
(int32_t) * (exec_info_size) * 2); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_info_size
) + sizeof(int32_t) * (exec_info_size) * 2); memset(_incomings_
, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); for (_i_
= 0; _i_ < (exec_info_size); _i_++) _incomings_[_i_].r = 1
; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size
)), (int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size
), }; for (_i_ = 0; _i_ < (graph->sources->rnum); _i_
++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (!_incomings_[_idx_].r) continue; _incomings_
[_idx_].r = 0; if ((exec_info)[_idx_].outgoings) for (_j_ = 0
; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings
)->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->destinations
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*
)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_
< (graph->sources->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->
data)) + (size_t)(graph->sources)->rsize * (size_t)(0))
))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; _visit_->node[_visit_->size].index
= ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_
[_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) {
++_d_; _incomings_[_idx_].r = 1; } if ((exec_info)[_idx_].outgoings
) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)->
data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (
size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_d_ < (graph->destinations->rnum)) { _exists_[_p_][
_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_d_ < (graph->destinations->rnum)) { _exists_[_q_][
_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } (
(_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0;
_i_ < (graph->destinations->rnum); _i_++) { ((void)
sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
destinations)->data)) + (size_t)(graph->destinations)->
rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail
("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d].r) continue; if (!(0)) {
((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((void*
)(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0)
? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->destinations)->data)) + (size_t)(graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d))
; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].
d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_)
; } while (0);; ((void) sizeof ((_visit_->size <= (exec_info_size
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size
)) ; else __assert_fail ("_visit_->size <= (exec_info_size)"
, "ccv_nnc_graph.c", 716, __extension__ __PRETTY_FUNCTION__);
})); _visit_; })
;
9
Assuming 'exec_info_size' is <= 1024
10
Taking false branch
11
Assuming '_i_' is >= 'exec_info_size'
12
Loop condition is false. Execution continues on line 716
13
Assuming the condition is false
14
Loop condition is false. Execution continues on line 716
15
Loop condition is false. Execution continues on line 716
16
Assuming the condition is true
17
Loop condition is true. Entering loop body
18
Assuming the condition is true
19
Taking true branch
20
Assuming the condition is false
21
Loop condition is false. Execution continues on line 716
22
Loop condition is false. Execution continues on line 716
23
Loop condition is false. Execution continues on line 716
24
Loop condition is true. Entering loop body
25
Taking true branch
26
Taking true branch
27
Execution continues on line 716
28
Loop condition is false. Execution continues on line 716
29
Taking false branch
30
Loop condition is false. Exiting loop
31
Assuming the condition is true
32
Taking true branch
717 int i, j, k;
718 // Generate exec dependencies (or, in other words, partial ordering of executions).
719 ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(exec_info_size, exec_info_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
720 int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_info_size * 2);
721 int buf_size;
722#define for_block(x, val) \
723 do { \
724 if (((int32_t*)val)[0] > 0) \
725 { \
726 buf[buf_size * 2] = x; \
727 buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \
728 ++buf_size; \
729 } \
730 } while (0)
731 ccv_nnc_graph_visit_for(visit, exec_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_info)) const node __attribute__((unused
)) = (exec_info) + idx;
{
33
Loop condition is false. Execution continues on line 766
732 buf_size = 0; /* save all its parent deps to this buffer */
733 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
734 if (node->schedule.stream_size > 1)
735 ccfreefree(node->schedule._heap_streams);
736 node->schedule.stream_size = 0;
737 node->schedule.wait_size = 0;
738 if (vector)
739 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
740 if (!node->outgoings)
741 continue;
742 for (i = 0; i < node->outgoings->rnum; i++)
743 {
744 int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
745 const int32_t one = 1;
746 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx);
747 /* If not found, set, if the current node is the destination node, no need
748 * set itself as parent of subsequent nodes because its terminal nature. */
749 if (!term && (!cell.i32 || cell.i32[0] == 0))
750 ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one);
751 for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */
752 {
753 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]);
754 /* If not found, set */
755 if (!cell.i32 || cell.i32[0] == 0)
756 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]);
757 else {
758 /* Otherwise, set to the longest one */
759 int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2
+ 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; })
;
760 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep);
761 }
762 }
763 }
764 } ccv_nnc_graph_visit_endfor} }
765#undef for_block
766 ccfreefree(buf);
767 // Algorithm to allocate signals and streams for this graph.
768 ccv_array_t* const stream_data = ccv_array_new(sizeof(ccv_nnc_stream_data_t), 0, 0);
769 ccv_array_t** const outgoings = cccalloccalloc(exec_info_size, sizeof(ccv_array_t*));
770 ccv_nnc_incoming_t* const incomings = cccalloccalloc(exec_info_size, sizeof(ccv_nnc_incoming_t));
34
Memory is allocated
771 int max_device_id_size = 1;
772 // Filter out outgoing nodes that we will be able to access it afterwards anyway.
773 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
35
Loop condition is false. Execution continues on line 810
774 max_device_id_size = ccv_max(node->input_size + node->output_size, max_device_id_size)({ typeof (node->input_size + node->output_size) _a = (
node->input_size + node->output_size); typeof (max_device_id_size
) _b = (max_device_id_size); (_a > _b) ? _a : _b; })
;
775 if (node->outgoings)
776 {
777 outgoings[idx] = ccv_array_new(sizeof(int), 0, 0);
778 for (i = 0; i < node->outgoings->rnum; i++)
779 {
780 const int di = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
781 int flag = 0;
782 for (j = 0; !flag && j < node->outgoings->rnum; j++)
783 {
784 if (j != i)
785 {
786 const int dj = *(int*)ccv_array_get(node->outgoings, j)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(j)))
;
787 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, di, dj);
788 flag = (cell.i32 && cell.i32[0]);
789 }
790 }
791 if (!flag)
792 {
793 ccv_array_push(outgoings[idx], &di);
794 if (!incomings[di].outgoings)
795 incomings[di].outgoings = ccv_array_new(sizeof(int), 1, 0);
796 ccv_array_push(incomings[di].outgoings, &idx);
797 }
798 }
799 // If we have outgoing nodes, I cannot filter out all of them.
800 assert(node->outgoings->rnum == 0 || outgoings[idx]->rnum > 0)((void) sizeof ((node->outgoings->rnum == 0 || outgoings
[idx]->rnum > 0) ? 1 : 0), __extension__ ({ if (node->
outgoings->rnum == 0 || outgoings[idx]->rnum > 0) ; else
__assert_fail ("node->outgoings->rnum == 0 || outgoings[idx]->rnum > 0"
, "ccv_nnc_graph.c", 800, __extension__ __PRETTY_FUNCTION__);
}))
;
801 }
802 } ccv_nnc_graph_visit_endfor} }
803#define visitor(node, idx, _) \
804 if (node->outgoings) \
805 for (i = 0; i < node->outgoings->rnum; i++) \
806 { \
807 const int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
; \
808 node->rank = ccv_max(incomings[d].rank + 1, node->rank)({ typeof (incomings[d].rank + 1) _a = (incomings[d].rank + 1
); typeof (node->rank) _b = (node->rank); (_a > _b) ?
_a : _b; })
; \
809 }
810 CCV_NNC_GRAPH_VISIT(graph, incomings, exec_info_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; } ccv_nnc_incoming_t
; const int _heap_mem_ = (exec_info_size > 1024); int _i_,
_j_; ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
exec_info_size) + sizeof(int32_t) * (exec_info_size) * 2); else
_incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (sizeof(
ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t) * (exec_info_size
) * 2); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (
exec_info_size)); for (_i_ = 0; _i_ < (exec_info_size); _i_
++) _incomings_[_i_].r = 1; int32_t* _exists_[2] = { (int32_t
*)(_incomings_ + (exec_info_size)), (int32_t*)(_incomings_ + (
exec_info_size)) + (exec_info_size), }; for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(graph->destinations->rnum), 0, }; int _p_ = 0, _q_ = 1
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (!_incomings_[_idx_].r) continue
; _incomings_[_idx_].r = 0; if ((incomings)[_idx_].outgoings)
for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((incomings
)[_idx_].outgoings)->data)) + (size_t)((incomings)[_idx_].
outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(graph->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else
__assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((
graph->sources)->data)) + (size_t)(graph->sources)->
rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_ <
(graph->destinations->rnum); _i_++) { ((void) sizeof (
(((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({
if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations
)->data)) + (size_t)(graph->destinations)->rsize * (
size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->destinations)->data)) + (size_t)(graph->destinations
)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->destinations->rnum); _exist_size_[1] = 0;
int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor(((incomings) + _idx_
), (_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d
) { ++_d_; _incomings_[_idx_].r = 1; } if ((incomings)[_idx_]
.outgoings) { if ((incomings)[_idx_].outgoings->rnum == 1)
{ const int d = *(int*)((void*)(((char*)(((incomings)[_idx_]
.outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings
)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _d_ < (graph->sources->rnum)) {
_exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< (incomings)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((incomings)[_idx_].outgoings)->
data)) + (size_t)((incomings)[_idx_].outgoings)->rsize * (
size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0
&& _d_ < (graph->sources->rnum)) { _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_
; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph->sources->rnum); _i_++) { ((void)
sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->
sources)->data)) + (size_t)(graph->sources)->rsize *
(size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ (
{ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d].r) continue; if (!(0)) {
((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)((void*
)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].d].c == 0) ? 1 : 0),
__extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t*)((void
*)(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].d].c == 0) ; else __assert_fail
("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_graph.c", 810, __extension__ __PRETTY_FUNCTION__);
})); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)
(((char*)((graph->sources)->data)) + (size_t)(graph->
sources)->rsize * (size_t)(0))))[_i_].d].c > 0) continue
; visitor(((incomings) + ((ccv_nnc_graph_exec_t*)((void*)(((char
*)((graph->sources)->data)) + (size_t)(graph->sources
)->rsize * (size_t)(0))))[_i_].d), (((ccv_nnc_graph_exec_t
*)((void*)(((char*)((graph->sources)->data)) + (size_t)
(graph->sources)->rsize * (size_t)(0))))[_i_].d), (_incomings_
[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources
)->data)) + (size_t)(graph->sources)->rsize * (size_t
)(0))))[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while
(0);
;
36
Taking false branch
37
Loop condition is false. Execution continues on line 810
38
Loop condition is true. Entering loop body
39
Taking true branch
40
Loop condition is false. Execution continues on line 810
41
Loop condition is true. Entering loop body
42
Loop condition is true. Entering loop body
43
Taking false branch
44
Use of zero-allocated memory
811#undef visitor
812 int device_ids[max_device_id_size];
813 int outgoing_device_ids[max_device_id_size];
814 int signal_size = 0;
815 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
816 // Go through the incomings.
817 const int device_id_size = _ccv_nnc_device_ids_for_stream_data(node, device_id, stream_data, device_ids, max_device_id_size);
818 if (node->schedule.stream_size == 0)
819 {
820 node->schedule.stream_size = device_id_size; // At least at the same size as the device_id_size.
821 if (device_id_size > 1)
822 {
823 node->schedule._heap_streams = (int*)ccmallocmalloc(sizeof(int) * device_id_size * 2);
824 node->schedule._heap_signals = (node->schedule._heap_streams + device_id_size);
825 }
826 for (i = 0; i < device_id_size; i++)
827 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = -1, SCHEDULE_SIGNALS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_signals : (node->schedule)._heap_signals)
[i] = -1;
828 }
829 for (i = 0; i < device_id_size; i++)
830 // Go through until the end to assign streams.
831 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] < 0)
832 {
833 int stream_idx = -1;
834 int stream_has_command = 0;
835 // First, find a good stream in stream data (the stream is good if it can be recycled, and it has the same command).
836 // Otherwise, we prefer a usable stream (it doesn't have the command, but it can be recycled).
837 for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++)
838 {
839 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(j)))
;
840 if (data->device_id == device_ids[i])
841 {
842 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, data->exec_idx);
843 // If there is a path to conclude that exec_idx is before idx, then we can reuse
844 // this stream. Otherwise the work in this "empty stream" could still be ongoing,
845 // and we may delay the following work unnecessarily.
846 if (cell.i32 && cell.i32[0] > 0)
847 {
848 if (ccv_array_find_uint(data->command_set, node->cmd.cmd))
849 stream_idx = j, stream_has_command = 1;
850 else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet.
851 stream_idx = j;
852 }
853 }
854 }
855 if (stream_idx < 0)
856 {
857 stream_idx = stream_data->rnum;
858 const ccv_nnc_stream_data_t data = {
859 .device_id = device_ids[i],
860 };
861 ccv_array_push(stream_data, &data);
862 }
863 assert(stream_idx >= 0)((void) sizeof ((stream_idx >= 0) ? 1 : 0), __extension__ (
{ if (stream_idx >= 0) ; else __assert_fail ("stream_idx >= 0"
, "ccv_nnc_graph.c", 863, __extension__ __PRETTY_FUNCTION__);
}))
;
864 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
865 if (!data->command_set)
866 data->command_set = ccv_array_new(sizeof(uint32_t), 1, 0);
867 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = stream_idx;
868 ccv_array_add_unique_uint(data->command_set, node->cmd.cmd);
869 // Assign all subsequent node to use this stream.
870 int outgoing_idx = idx;
871 while (outgoings[outgoing_idx] && outgoings[outgoing_idx]->rnum)
872 {
873 int highest_rank = -1;
874 int highest_idx = -1;
875 int stream_n = -1;
876 int stream_has_command = 0;
877 for (j = 0; j < outgoings[outgoing_idx]->rnum; j++)
878 {
879 const int d = *(int*)ccv_array_get(outgoings[outgoing_idx], j)((void*)(((char*)((outgoings[outgoing_idx])->data)) + (size_t
)(outgoings[outgoing_idx])->rsize * (size_t)(j)))
;
880 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + d;
881 const int outgoing_device_id_size = _ccv_nnc_device_ids_for_stream_data(outgoing_node, device_id, stream_data, outgoing_device_ids, max_device_id_size);
882 if (outgoing_node->schedule.stream_size == 0)
883 {
884 outgoing_node->schedule.stream_size = outgoing_device_id_size; // At least at the same size as the device_id_size.
885 if (outgoing_device_id_size > 1)
886 {
887 outgoing_node->schedule._heap_streams = (int*)ccmallocmalloc(sizeof(int) * outgoing_device_id_size * 2);
888 outgoing_node->schedule._heap_signals = (outgoing_node->schedule._heap_streams + outgoing_device_id_size);
889 }
890 for (k = 0; k < outgoing_device_id_size; k++)
891 SCHEDULE_STREAMS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_streams : (outgoing_node->schedule)
._heap_streams)
[k] = -1, SCHEDULE_SIGNALS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_signals : (outgoing_node->schedule)
._heap_signals)
[k] = -1;
892 }
893 assert(outgoing_node->schedule.stream_size == outgoing_device_id_size)((void) sizeof ((outgoing_node->schedule.stream_size == outgoing_device_id_size
) ? 1 : 0), __extension__ ({ if (outgoing_node->schedule.stream_size
== outgoing_device_id_size) ; else __assert_fail ("outgoing_node->schedule.stream_size == outgoing_device_id_size"
, "ccv_nnc_graph.c", 893, __extension__ __PRETTY_FUNCTION__);
}))
;
894 for (k = 0; k < outgoing_device_id_size; k++)
895 // If it should be on the same device and the stream is not assign, potentially.
896 if (outgoing_device_ids[k] == device_ids[i] &&
897 SCHEDULE_STREAMS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_streams : (outgoing_node->schedule)
._heap_streams)
[k] < 0 &&
898 (incomings[d].rank > highest_rank ||
899 (incomings[d].rank == highest_rank &&
900 !stream_has_command && ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd))))
901 {
902 highest_rank = incomings[d].rank;
903 highest_idx = d;
904 stream_n = k;
905 // This is 1 if rank is the same (thus, I must break the tie already), if the rank is not the same, we need to compute this.
906 stream_has_command = (incomings[d].rank == highest_rank || ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd));
907 }
908 }
909 if (highest_idx >= 0)
910 {
911 outgoing_idx = highest_idx;
912 ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + outgoing_idx;
913 assert(stream_n >= 0)((void) sizeof ((stream_n >= 0) ? 1 : 0), __extension__ ({
if (stream_n >= 0) ; else __assert_fail ("stream_n >= 0"
, "ccv_nnc_graph.c", 913, __extension__ __PRETTY_FUNCTION__);
}))
;
914 SCHEDULE_STREAMS(outgoing_node->schedule)((outgoing_node->schedule).stream_size <= 1 ? (outgoing_node
->schedule)._inline_streams : (outgoing_node->schedule)
._heap_streams)
[stream_n] = stream_idx;
915 ccv_array_add_unique_uint(data->command_set, outgoing_node->cmd.cmd);
916 } else
917 break;
918 }
919 data->exec_idx = outgoing_idx;
920 }
921 } ccv_nnc_graph_visit_endfor} }
922 // Go through to assign signals when necessary.
923 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
924 if (incomings[idx].outgoings && incomings[idx].outgoings->rnum)
925 _ccv_nnc_graph_schedule_assign_signals(incomings[idx].outgoings, node, stream_data, &signal_size, exec_info, exec_info_size);
926 } ccv_nnc_graph_visit_endfor} }
927 for (i = 0; i < exec_info_size; i++)
928 if (outgoings[i])
929 ccv_array_free(outgoings[i]);
930 ccfreefree(outgoings);
931 for (i = 0; i < exec_info_size; i++)
932 if (incomings[i].outgoings)
933 ccv_array_free(incomings[i].outgoings);
934 ccfreefree(incomings);
935 ccv_matrix_free(exec_dep);
936 ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
937 if (device_id >= 0)
938 {
939 // If the default stream (stream 0) is not the same as desired stream, swap with the one that is.
940 if (default_data->device_id != device_id)
941 {
942 int exchange_stream_idx = -1;
943 // Find the stream idx to exchange.
944 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
945 int flag = 0;
946 for(i = 0; !flag && i < node->schedule.stream_size; i++)
947 {
948 const int stream_idx = SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i];
949 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
;
950 if (data->device_id == device_id)
951 {
952 exchange_stream_idx = stream_idx;
953 flag = 1;
954 }
955 }
956 if (flag)
957 break;
958 } ccv_nnc_graph_visit_endfor} }
959 assert(exchange_stream_idx >= 0)((void) sizeof ((exchange_stream_idx >= 0) ? 1 : 0), __extension__
({ if (exchange_stream_idx >= 0) ; else __assert_fail ("exchange_stream_idx >= 0"
, "ccv_nnc_graph.c", 959, __extension__ __PRETTY_FUNCTION__);
}))
;
960 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
961 for (i = 0; i < node->schedule.stream_size; i++)
962 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] == 0)
963 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = -1;
964 } ccv_nnc_graph_visit_endfor} }
965 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
966 for (i = 0; i < node->schedule.stream_size; i++)
967 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] == exchange_stream_idx)
968 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = 0;
969 } ccv_nnc_graph_visit_endfor} }
970 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
971 for (i = 0; i < node->schedule.stream_size; i++)
972 if (SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] == -1)
973 SCHEDULE_STREAMS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_streams : (node->schedule)._heap_streams)
[i] = exchange_stream_idx;
974 } ccv_nnc_graph_visit_endfor} }
975 ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, exchange_stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(exchange_stream_idx)))
)->device_id = default_data->device_id;
976 default_data->device_id = device_id;
977 }
978 }
979 int graph_wait_size = 0;
980 for (i = 0; i < graph->destinations->rnum; i++)
981 {
982 const int idx = *(int*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
983 for (j = 0; j < exec_info[idx].schedule.stream_size; j++)
984 if (SCHEDULE_STREAMS(exec_info[idx].schedule)((exec_info[idx].schedule).stream_size <= 1 ? (exec_info[idx
].schedule)._inline_streams : (exec_info[idx].schedule)._heap_streams
)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
985 ++graph_wait_size;
986 }
987 if (graph_wait_size > 0)
988 graph->waits = (graph->waits) ? ccreallocrealloc(graph->waits, sizeof(int) * graph_wait_size) : ccmallocmalloc(sizeof(int) * graph_wait_size);
989 graph_wait_size = 0;
990 for (i = 0; i < graph->destinations->rnum; i++)
991 {
992 const int idx = *(int*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(i)))
;
993 ccv_nnc_graph_exec_info_t* const destination_exec_info = exec_info + idx;
994 for (j = 0; j < exec_info[idx].schedule.stream_size; j++)
995 if (SCHEDULE_STREAMS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_streams : (destination_exec_info
->schedule)._heap_streams)
[j] != 0) // If this exec_info doesn't end with default stream, we need to wait.
996 {
997 ccv_nnc_stream_data_t* const default_stream_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(0)))
;
998 if (SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j] < 0)
999 SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j] = signal_size++;
1000 else if (default_stream_data->signal_set && ccv_array_find_int(default_stream_data->signal_set, SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j]))
1001 continue;
1002 graph->waits[graph_wait_size++] = SCHEDULE_SIGNALS(destination_exec_info->schedule)((destination_exec_info->schedule).stream_size <= 1 ? (
destination_exec_info->schedule)._inline_signals : (destination_exec_info
->schedule)._heap_signals)
[j];
1003 }
1004 }
1005 graph->wait_size = graph_wait_size;
1006 for (i = 0; i < stream_data->rnum; i++)
1007 {
1008 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1009 if (data->signal_set)
1010 ccv_array_free(data->signal_set);
1011 assert(data->command_set)((void) sizeof ((data->command_set) ? 1 : 0), __extension__
({ if (data->command_set) ; else __assert_fail ("data->command_set"
, "ccv_nnc_graph.c", 1011, __extension__ __PRETTY_FUNCTION__)
; }))
;
1012 ccv_array_free(data->command_set);
1013 }
1014 // Allocate streams & signals
1015 graph->stream_size = stream_data->rnum;
1016 graph->streams = (ccv_nnc_stream_context_t**)ccmallocmalloc(sizeof(ccv_nnc_stream_context_t*) * graph->stream_size);
1017 graph->block_stream_tasks = (ccv_nnc_stream_task_t**)cccalloccalloc(graph->stream_size, sizeof(ccv_nnc_stream_task_t*));
1018 if (stream_context)
1019 graph->streams[0] = stream_context;
1020 for (i = (stream_context ? 1 : 0); i < stream_data->rnum; i++)
1021 {
1022 ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(i)))
;
1023 int type = stream_type;
1024 CCV_TENSOR_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1025 graph->streams[i] = ccv_nnc_stream_context_new(type);
1026 }
1027 int default_stream_type = stream_type;
1028 CCV_TENSOR_SET_DEVICE_ID(default_stream_type, default_data->device_id)(default_stream_type) = (((default_stream_type) & ~0xfff00
) | (((default_data->device_id) & 0xfff) << 8))
;
1029 graph->signal_size = signal_size;
1030 graph->signals = (ccv_nnc_stream_signal_t**)cccalloccalloc(signal_size, sizeof(ccv_nnc_stream_signal_t*));
1031 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
1032 for (i = 0; i < node->schedule.stream_size; i++)
1033 if (SCHEDULE_SIGNALS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_signals : (node->schedule)._heap_signals)
[i] >= 0)
1034 {
1035 const int signal = SCHEDULE_SIGNALS(node->schedule)((node->schedule).stream_size <= 1 ? (node->schedule
)._inline_signals : (node->schedule)._heap_signals)
[i];
1036 if (!graph->signals[signal])
1037 {
1038 const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(node->schedule)[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(((node->schedule).stream_size <=
1 ? (node->schedule)._inline_streams : (node->schedule
)._heap_streams)[i])))
;
1039 int type = stream_type;
1040 CCV_TENSOR_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) &
0xfff) << 8))
;
1041 graph->signals[signal] = ccv_nnc_stream_signal_new(type);
1042 }
1043 }
1044 } ccv_nnc_graph_visit_endfor} }
1045 ccv_nnc_graph_visit_free(visit);
1046 for (i = 0; i < signal_size; i++)
1047 { assert(graph->signals[i])((void) sizeof ((graph->signals[i]) ? 1 : 0), __extension__
({ if (graph->signals[i]) ; else __assert_fail ("graph->signals[i]"
, "ccv_nnc_graph.c", 1047, __extension__ __PRETTY_FUNCTION__)
; }))
; }
1048 if (!graph->extern_signal)
1049 graph->extern_signal = ccv_nnc_stream_signal_new(default_stream_type);
1050 // Do this recursively for its sub graphs.
1051 if (graph->sub_graphs)
1052 for (i = 0; i < graph->sub_graphs->rnum; i++)
1053 {
1054 ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
;
1055 if (sub_graph)
1056 {
1057 const int exec_idx = sub_graph->exec_idx - 1;
1058 assert(exec_info[exec_idx].schedule.stream_size == 1)((void) sizeof ((exec_info[exec_idx].schedule.stream_size == 1
) ? 1 : 0), __extension__ ({ if (exec_info[exec_idx].schedule
.stream_size == 1) ; else __assert_fail ("exec_info[exec_idx].schedule.stream_size == 1"
, "ccv_nnc_graph.c", 1058, __extension__ __PRETTY_FUNCTION__)
; }))
;
1059 const int stream_idx = SCHEDULE_STREAMS(exec_info[exec_idx].schedule)((exec_info[exec_idx].schedule).stream_size <= 1 ? (exec_info
[exec_idx].schedule)._inline_streams : (exec_info[exec_idx].schedule
)._heap_streams)
[0];
1060 const int device_id = ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data
)->rsize * (size_t)(stream_idx)))
)->device_id;
1061 _ccv_nnc_graph_static_schedule(sub_graph, stream_type, device_id, graph->streams[stream_idx]);
1062 }
1063 }
1064 ccv_array_free(stream_data);
1065}
1066
1067void ccv_nnc_graph_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type)
1068{
1069 assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({
if (graph->p == 0) ; else __assert_fail ("graph->p == 0"
, "ccv_nnc_graph.c", 1069, __extension__ __PRETTY_FUNCTION__)
; }))
;
1070 _ccv_nnc_graph_static_schedule(graph, stream_type, -1, 0);
1071}
1072
1073ccv_nnc_stream_context_t* ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph)
1074{
1075 if (graph->streams && graph->stream_size > 0)
1076 return graph->streams[0];
1077 return 0;
1078}
1079
1080static void _ccv_nnc_graph_dot_exec(const int index, const ccv_nnc_graph_exec_info_t* const exec_info, ccv_nnc_stream_context_t** const streams, const int flags, FILE* out)
1081{
1082 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1083 fputc('{', out);
1084 fprintf(out, "node%d", index);
1085 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1086 {
1087 fputs("|Command: ", out);
1088 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1089 if (exec_info->schedule.stream_size > 0)
1090 {
1091 int i, flag = 0;
1092 fputs("|Stream: ", out);
1093 for (i = 0; i < exec_info->schedule.stream_size; i++)
1094 {
1095 const int device_id = streams ? CCV_TENSOR_GET_DEVICE_ID(streams[SCHEDULE_STREAMS(exec_info->schedule)[i]]->type)(((streams[((exec_info->schedule).stream_size <= 1 ? (exec_info
->schedule)._inline_streams : (exec_info->schedule)._heap_streams
)[i]]->type) & 0xfff00) >> 8)
: 0;
1096 if (i == 0)
1097 fprintf(out, "%d (d%d)", SCHEDULE_STREAMS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_streams : (exec_info->schedule)._heap_streams
)
[i], device_id);
1098 else
1099 fprintf(out, ", %d (d%d)", SCHEDULE_STREAMS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_streams : (exec_info->schedule)._heap_streams
)
[i], device_id);
1100 }
1101 for (i = 0; i < exec_info->schedule.stream_size; i++)
1102 if (SCHEDULE_SIGNALS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_signals : (exec_info->schedule)._heap_signals
)
[i] >= 0)
1103 {
1104 if (!flag)
1105 {
1106 flag = 1;
1107 fprintf(out, "|Signal: %d", SCHEDULE_SIGNALS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_signals : (exec_info->schedule)._heap_signals
)
[i]);
1108 } else
1109 fprintf(out, ", %d", SCHEDULE_SIGNALS(exec_info->schedule)((exec_info->schedule).stream_size <= 1 ? (exec_info->
schedule)._inline_signals : (exec_info->schedule)._heap_signals
)
[i]);
1110 }
1111 }
1112 if (exec_info->schedule.wait_size > 0)
1113 {
1114 fputs("|Wait: ", out);
1115 int i;
1116 for (i = 0; i < exec_info->schedule.wait_size - 1; i++)
1117 fprintf(out, "%d, ", exec_info->schedule.waits[i]);
1118 fprintf(out, "%d", exec_info->schedule.waits[exec_info->schedule.wait_size - 1]);
1119 }
1120 fputc('}', out);
1121 }
1122}
1123
1124static void _ccv_nnc_graph_dot_tensor(const int index, const ccv_nnc_tensor_t* const tensor, const int zone, const int flags, const int depth, FILE* out)
1125{
1126 // if it has an alias pointer, or, it is a long form.
1127 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1128 fputc('{', out);
1129 const int is_tensor_view = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW);
1130 if (is_tensor_view)
1131 fprintf(out, "tensorview%d", index);
1132 else
1133 fprintf(out, "tensor%d", index);
1134 int i;
1135 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1136 fputc('\'', out);
1137 if (CCV_GET_TAPE_ALLOC(tensor->type)((tensor->type) & CCV_TAPE_ALLOC))
1138 fputs(" (t)", out);
1139 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1140 {
1141 const int device_id = CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8);
1142 fprintf(out, "|d%d|zone%d", device_id, zone);
1143 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1144 fputc('\'', out);
1145 uintptr_t aptr = (uintptr_t)tensor->data.u8;
1146 const int* ainc = is_tensor_view ? ((ccv_nnc_tensor_view_t*)(tensor))->inc : tensor->info.dim;
1147 // For the last one, we don't extend to full ainc.
1148 size_t ainc_size = (ccv_nnc_dimension_count(ainc) - ainc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1149 // Print out the range as well.
1150 fprintf(out, "|{%#010x|%#010x}|%d", (uint32_t)aptr, (uint32_t)(aptr + ainc_size - 1), tensor->info.dim[0]);
1151 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(8) && tensor->info.dim[i]; i++)
1152 fprintf(out, "x%d", tensor->info.dim[i]);
1153 fputc('}', out);
1154 }
1155}
1156
1157typedef struct {
1158 int index;
1159 int name;
1160 int zone;
1161 uintptr_t tensor_ref;
1162 uintptr_t start_ptr;
1163 uintptr_t end_ptr;
1164} ccv_nnc_tensor_dot_t;
1165
1166typedef struct {
1167 ccv_nnc_tensor_dot_t* dots;
1168 int* remap;
1169 int* rename_zone;
1170 int* rename_index;
1171} ccv_nnc_tensor_dot_recovery_t;
1172
1173// First sort by start_ptr, then sort by tensor ptr (so that we will have the same tensor sorted to one cluster).
1174#define less_than(i1, i2, aux) ((i1).start_ptr < (i2).start_ptr || ((i1).start_ptr == (i2).start_ptr && (i1).tensor_ref < (i2).tensor_ref))
1175static CCV_IMPLEMENT_QSORT(_ccv_nnc_tensor_dot_sort_by_ptr, ccv_nnc_tensor_dot_t, less_than)void _ccv_nnc_tensor_dot_sort_by_ptr(ccv_nnc_tensor_dot_t *array
, size_t total, int aux) { int isort_thresh = 7; ccv_nnc_tensor_dot_t
t; int sp = 0; struct { ccv_nnc_tensor_dot_t *lb; ccv_nnc_tensor_dot_t
*ub; } stack[48]; if( total <= 1 ) return; stack[0].lb = array
; stack[0].ub = array + (total - 1); while( sp >= 0 ) { ccv_nnc_tensor_dot_t
* left = stack[sp].lb; ccv_nnc_tensor_dot_t* right = stack[sp
--].ub; for(;;) { int i, n = (int)(right - left) + 1, m; ccv_nnc_tensor_dot_t
* ptr; ccv_nnc_tensor_dot_t* ptr2; if( n <= isort_thresh )
{ insert_sort: for( ptr = left + 1; ptr <= right; ptr++ )
{ for( ptr2 = ptr; ptr2 > left && less_than(ptr2[
0],ptr2[-1], aux); ptr2--) (((t)) = ((ptr2[0])), ((ptr2[0])) =
((ptr2[-1])), ((ptr2[-1])) = ((t))); } break; } else { ccv_nnc_tensor_dot_t
* left0; ccv_nnc_tensor_dot_t* left1; ccv_nnc_tensor_dot_t* right0
; ccv_nnc_tensor_dot_t* right1; ccv_nnc_tensor_dot_t* pivot; ccv_nnc_tensor_dot_t
* a; ccv_nnc_tensor_dot_t* b; ccv_nnc_tensor_dot_t* c; int swap_cnt
= 0; left0 = left; right0 = right; pivot = left + (n/2); if(
n > 40 ) { int d = n / 8; a = left, b = left + d, c = left
+ 2*d; left = less_than(*a, *b, aux) ? (less_than(*b, *c, aux
) ? b : (less_than(*a, *c, aux) ? c : a)) : (less_than(*c, *b
, aux) ? b : (less_than(*a, *c, aux) ? a : c)); a = pivot - d
, b = pivot, c = pivot + d; pivot = less_than(*a, *b, aux) ? (
less_than(*b, *c, aux) ? b : (less_than(*a, *c, aux) ? c : a)
) : (less_than(*c, *b, aux) ? b : (less_than(*a, *c, aux) ? a
: c)); a = right - 2*d, b = right - d, c = right; right = less_than
(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than(*a, *
c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than(
*a, *c, aux) ? a : c)); } a = left, b = pivot, c = right; pivot
= less_than(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than
(*a, *c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than
(*a, *c, aux) ? a : c)); if( pivot != left0 ) { (((t)) = ((*pivot
)), ((*pivot)) = ((*left0)), ((*left0)) = ((t))); pivot = left0
; } left = left1 = left0 + 1; right = right1 = right0; for(;;
) { while( left <= right && !less_than(*pivot, *left
, aux) ) { if( !less_than(*left, *pivot, aux) ) { if( left >
left1 ) (((t)) = ((*left1)), ((*left1)) = ((*left)), ((*left
)) = ((t))); swap_cnt = 1; left1++; } left++; } while( left <=
right && !less_than(*right, *pivot, aux) ) { if( !less_than
(*pivot, *right, aux) ) { if( right < right1 ) (((t)) = ((
*right1)), ((*right1)) = ((*right)), ((*right)) = ((t))); swap_cnt
= 1; right1--; } right--; } if( left > right ) break; (((
t)) = ((*left)), ((*left)) = ((*right)), ((*right)) = ((t)));
swap_cnt = 1; left++; right--; } if( swap_cnt == 0 ) { left =
left0, right = right0; goto insert_sort; } n = ({ typeof ((int
)(left1 - left0)) _a = ((int)(left1 - left0)); typeof ((int)(
left - left1)) _b = ((int)(left - left1)); (_a < _b) ? _a :
_b; }); for( i = 0; i < n; i++ ) (((t)) = ((left0[i])), (
(left0[i])) = ((left[i-n])), ((left[i-n])) = ((t))); n = ({ typeof
((int)(right0 - right1)) _a = ((int)(right0 - right1)); typeof
((int)(right1 - right)) _b = ((int)(right1 - right)); (_a <
_b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left
[i])), ((left[i])) = ((right0[i-n+1])), ((right0[i-n+1])) = (
(t))); n = (int)(left - left1); m = (int)(right1 - right); if
( n > 1 ) { if( m > 1 ) { if( n > m ) { stack[++sp].
lb = left0; stack[sp].ub = left0 + n - 1; left = right0 - m +
1, right = right0; } else { stack[++sp].lb = right0 - m + 1;
stack[sp].ub = right0; left = left0, right = left0 + n - 1; }
} else left = left0, right = left0 + n - 1; } else if( m >
1 ) left = right0 - m + 1, right = right0; else break; } } }
}
1176#undef less_than
1177
1178static int _ccv_nnc_graph_dot_tensor_multiview_count(const ccv_nnc_tensor_multiview_t* const mv)
1179{
1180 if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
1181 return 1;
1182 const int count = mv->kind + mv->repeat;
1183 int i, c = 0;
1184 for (i = 0; i < count; i++)
1185 c += _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
1186 return c;
1187}
1188
1189static void _ccv_nnc_graph_dot_tensor_multiview_tensor_dots(const ccv_nnc_tensor_multiview_t* const mv, ccv_nnc_tensor_dot_t* const tensor_dots, int* tensor_index)
1190{
1191 const int count = mv->kind + mv->repeat;
1192 int i;
1193 for (i = 0; i < count; i++)
1194 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1195 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], tensor_dots, tensor_index);
1196 else {
1197 tensor_dots[*tensor_index].name = *tensor_index;
1198 tensor_dots[*tensor_index].start_ptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1199 // Because tv's pointer will get updated, it is not correct in this case to have one tensor_ref.
1200 tensor_dots[*tensor_index].tensor_ref = tensor_dots[*tensor_index].start_ptr;
1201 const size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1202 tensor_dots[*tensor_index].end_ptr = tensor_dots[*tensor_index].start_ptr + dim_size - 1;
1203 ++(*tensor_index);
1204 }
1205}
1206
1207static ccv_nnc_tensor_dot_recovery_t _ccv_nnc_graph_tensor_dot_recovery(const ccv_nnc_graph_t* const graph)
1208{
1209 int i, j;
1210 // Recover tensor relationships for all tensors referenced in the graph.
1211 // Most notably, we have to give these indexes, and find if they point to
1212 // the same memory region, and whether they overlap. These information
1213 // are lost since we converted from symbolic form to the execution form.
1214 // and here we do our best to recover because that is easier to understand
1215 // if we want to present the graph visually (also, we don't want to put this
1216 // information into the tensor or execution graph to avoid overhead, thus,
1217 // recovering is the best we can do).
1218 int tensor_count = 0;
1219 for (i = 0; i < graph->exec_info->rnum; i++)
1220 {
1221 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1222 for (j = 0; j < exec_info->input_size; j++)
1223 if (exec_info->inputs[j])
1224 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[j])((*(int*)(exec_info->inputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->inputs[j]) : 1;
1225 for (j = 0; j < exec_info->output_size; j++)
1226 if (exec_info->outputs[j])
1227 tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[j])((*(int*)(exec_info->outputs[j])) & CCV_TENSOR_MULTIVIEW
)
? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->outputs[j]) : 1;
1228 }
1229 ccv_nnc_tensor_dot_t* tensor_dots = tensor_count > 0 ? (ccv_nnc_tensor_dot_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_dot_t) * tensor_count) : 0;
1230 int k = 0;
1231 for (i = 0; i < graph->exec_info->rnum; i++)
1232 {
1233 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1234 for (j = 0; j < exec_info->input_size; j++)
1235 {
1236 ccv_nnc_tensor_t* tensor = exec_info->inputs[j];
1237 if (!tensor)
1238 continue;
1239 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1240 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1241 else {
1242 tensor_dots[k].name = k;
1243 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1244 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1245 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1246 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1247 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1248 ++k;
1249 }
1250 }
1251 for (j = 0; j < exec_info->output_size; j++)
1252 {
1253 ccv_nnc_tensor_t* tensor = exec_info->outputs[j];
1254 if (!tensor)
1255 continue;
1256 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1257 _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k);
1258 else {
1259 tensor_dots[k].name = k;
1260 tensor_dots[k].tensor_ref = (uintptr_t)tensor;
1261 tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8;
1262 const int* inc = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW) ? ((ccv_nnc_tensor_view_t*)tensor)->inc : tensor->info.dim;
1263 const size_t inc_size = (ccv_nnc_dimension_count(inc) - inc[0] + tensor->info.dim[0]) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >>
12]
;
1264 tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + inc_size - 1;
1265 ++k;
1266 }
1267 }
1268 }
1269 tensor_count = k; // We may over count, now shrink.
1270 // To group overlap memory into one zone, we sort it by start ptr first (secondary by the tensor pointer).
1271 _ccv_nnc_tensor_dot_sort_by_ptr(tensor_dots, tensor_count, 0);
1272 int index = 0, zone = 0;
1273 uintptr_t tensor_ref = tensor_count > 0 ? tensor_dots[0].tensor_ref : 0;
1274 uintptr_t end_ptr = tensor_count > 0 ? tensor_dots[0].end_ptr : 0;
1275 // Then, it is trivial, we go by end ptr. If the next start ptr is still within the end ptr (start ptr <= end ptr),
1276 // they are the same zone.
1277 for (i = 0; i < tensor_count; i++)
1278 {
1279 if (tensor_dots[i].tensor_ref != tensor_ref)
1280 {
1281 tensor_ref = tensor_dots[i].tensor_ref;
1282 ++index;
1283 }
1284 if (tensor_dots[i].start_ptr > end_ptr)
1285 {
1286 end_ptr = ccv_max(end_ptr, tensor_dots[i].end_ptr)({ typeof (end_ptr) _a = (end_ptr); typeof (tensor_dots[i].end_ptr
) _b = (tensor_dots[i].end_ptr); (_a > _b) ? _a : _b; })
;
1287 ++zone;
1288 }
1289 tensor_dots[i].index = index;
1290 tensor_dots[i].zone = zone;
1291 }
1292 // We already have index and zone assigned, but the problem is that these are not very human interpretable (because
1293 // it follows the pointer from low to high, not the tensor creation order). The following code renamed both the index
1294 // and the zone so that it is much more understandable.
1295 const int index_count = index + 1;
1296 const int zone_count = zone + 1;
1297 int* remap = (int*)ccmallocmalloc(sizeof(int) * (tensor_count + index_count + zone_count));
1298 int* rename_index = remap + tensor_count;
1299 int* rename_zone = rename_index + index_count;
1300 for (i = 0; i < tensor_count; i++)
1301 remap[tensor_dots[i].name] = i;
1302 for (i = 0; i < index_count; i++)
1303 rename_index[i] = -1;
1304 for (i = 0; i < zone_count; i++)
1305 rename_zone[i] = -1;
1306 index = 0;
1307 zone = 0;
1308 for (i = 0; i < tensor_count; i++)
1309 {
1310 ccv_nnc_tensor_dot_t* tensor_dot = tensor_dots + remap[i];
1311 if (rename_index[tensor_dot->index] == -1)
1312 rename_index[tensor_dot->index] = index++;
1313 if (rename_zone[tensor_dot->zone] == -1)
1314 rename_zone[tensor_dot->zone] = zone++;
1315 }
1316 ccv_nnc_tensor_dot_recovery_t recovery = {
1317 .dots = tensor_dots,
1318 .remap = remap,
1319 .rename_index = rename_index,
1320 .rename_zone = rename_zone,
1321 };
1322 return recovery;
1323}
1324
1325static void _ccv_nnc_graph_tensor_dot_recovery_free(const ccv_nnc_tensor_dot_recovery_t recovery)
1326{
1327 ccfreefree(recovery.dots);
1328 ccfreefree(recovery.remap);
1329}
1330
1331static void _ccv_nnc_graph_dot_tensor_multiview_one(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int depth, int* tensor_index, FILE* out)
1332{
1333 const int count = mv->kind + mv->repeat;
1334 int i, j;
1335 fputs("|{", out);
1336 for (i = 0; i < count; i++)
1337 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
1338 {
1339 fprintf(out, "{%d", i);
1340 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1341 fputc('*', out); // Denotes that we loop on this.
1342 _ccv_nnc_graph_dot_tensor_multiview_one((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], recovery, depth, tensor_index, out);
1343 if (i == count - 1)
1344 fputc('}', out);
1345 else
1346 fputs("}|", out);
1347 } else {
1348 fprintf(out, "{%d", i);
1349 if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0))
1350 fputc('*', out); // Denotes that we loop on this.
1351 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1352 fprintf(out, "|zone%d", recovery.rename_zone[tensor_dot->zone]);
1353 for (j = 0; j < depth; j++)
1354 fputc('\'', out);
1355 uintptr_t aptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->data.u8;
1356 // For the last one, we don't extend to full ainc.
1357 size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->type) & 0xFF000) >>
12]
;
1358 // Print out the range as well.
1359 fprintf(out, "|{%#010x|%#010x}", (uint32_t)aptr, (uint32_t)(aptr + dim_size - 1));
1360 ++(*tensor_index);
1361 if (i == count - 1)
1362 fputc('}', out);
1363 else
1364 fputs("}|", out);
1365 }
1366 fputc('}', out);
1367}
1368
1369static void _ccv_nnc_graph_dot_tensor_multiview(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, int* tensor_index, FILE* out)
1370{
1371 // if it has an alias pointer, or, it is a long form.
1372 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1373 fputc('{', out);
1374 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index];
1375 fprintf(out, "multiview%d", recovery.rename_index[tensor_dot->index]);
1376 int i;
1377 for (i = 0; i < depth; i++) // Print subscription to denote depth.
1378 fputc('\'', out);
1379 if (CCV_GET_TAPE_ALLOC(mv->type)((mv->type) & CCV_TAPE_ALLOC))
1380 fputs(" (t)", out);
1381 if (flags == CCV_NNC_LONG_DOT_GRAPH)
1382 {
1383 _ccv_nnc_graph_dot_tensor_multiview_one(mv, recovery, depth, tensor_index, out);
1384 const ccv_nnc_tensor_t* root = (ccv_nnc_tensor_t*)mv;
1385 while (CCV_IS_TENSOR_MULTIVIEW(root)((*(int*)(root)) & CCV_TENSOR_MULTIVIEW))
1386 root = CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)root)(((ccv_nnc_tensor_multiview_t*)root)->_heap_data ? ((ccv_nnc_tensor_multiview_t
*)root)->_heap_data : ((ccv_nnc_tensor_multiview_t*)root)->
_inline_data)
[0];
1387 fprintf(out, "|%d", root->info.dim[0]);
1388 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(8) && root->info.dim[i]; i++)
1389 fprintf(out, "x%d", root->info.dim[i]);
1390 fputc('}', out);
1391 } else
1392 *tensor_index += _ccv_nnc_graph_dot_tensor_multiview_count(mv);
1393}
1394
1395static void _ccv_nnc_graph_dot_node(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, ccv_nnc_stream_context_t** const streams, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* const tensor_index)
1396{
1397 fprintf(out, "node%d [shape=record,label=\"", exec_index);
1398 _ccv_nnc_graph_dot_exec(exec_index, exec_info, streams, flags, out);
1399 int i;
1400 int k = *tensor_index;
1401 if (exec_info->input_size > 0)
1402 {
1403 fputs("|{Input", out);
1404 for (i = 0; i < exec_info->input_size; i++)
1405 if (exec_info->inputs[i])
1406 {
1407 fputc('|', out);
1408 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1409 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1410 else {
1411 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1412 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1413 ++k;
1414 }
1415 } else
1416 fputs("|-", out);
1417 fputc('}', out);
1418 }
1419 if (exec_info->output_size > 0)
1420 {
1421 fputs("|{Output", out);
1422 for (i = 0; i < exec_info->output_size; i++)
1423 if (exec_info->outputs[i])
1424 {
1425 fputc('|', out);
1426 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1427 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1428 else {
1429 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1430 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1431 ++k;
1432 }
1433 } else
1434 fputs("|-", out);
1435 fputc('}', out);
1436 }
1437 fputs("\"];\n", out);
1438 *tensor_index = k;
1439}
1440
1441static void _ccv_nnc_graph_dot_while_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const ccv_nnc_graph_t* const while_graph, const int flags, const int depth, FILE* out, int* tensor_index)
1442{
1443 int i;
1444 fprintf(out, "label=<<b>while%d </b>Command: ", exec_index);
1445 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1446 fputs(">;\n", out);
1447 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1448 int k = *tensor_index;
1449 if (exec_info->input_size > 0)
1450 {
1451 fputs("{Input|{", out);
1452 for (i = 0; i < exec_info->input_size; i++)
1453 {
1454 if (i > 0)
1455 fputc('|', out);
1456 if (exec_info->inputs[i])
1457 {
1458 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1459 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1460 else {
1461 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1462 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1463 ++k;
1464 }
1465 } else
1466 fputc('-', out);
1467 }
1468 fputs("}}", out);
1469 }
1470 if (exec_info->output_size > 0)
1471 {
1472 if (exec_info->input_size > 0)
1473 fputs("|", out);
1474 fputs("{Output|{", out);
1475 for (i = 0; i < exec_info->output_size; i++)
1476 {
1477 if (i > 0)
1478 fputc('|', out);
1479 if (exec_info->outputs[i])
1480 {
1481 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1482 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1483 else {
1484 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1485 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1486 ++k;
1487 }
1488 } else
1489 fputc('-', out);
1490 }
1491 fputs("}}", out);
1492 }
1493 fputs("}\"];\n", out);
1494 *tensor_index = k;
1495}
1496
1497static void _ccv_nnc_graph_dot_case_of_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* tensor_index)
1498{
1499 int i;
1500 fprintf(out, "label=<<b>caseof%d </b>Command: ", exec_index);
1501 fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out);
1502 fputs(">;\n", out);
1503 fprintf(out, "label%d [shape=record,label=\"{", exec_index);
1504 int k = *tensor_index;
1505 if (exec_info->input_size > 0)
1506 {
1507 fputs("{Input|{", out);
1508 for (i = 0; i < exec_info->input_size; i++)
1509 {
1510 if (i > 0)
1511 fputc('|', out);
1512 if (exec_info->inputs[i])
1513 {
1514 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1515 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out);
1516 else {
1517 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1518 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1519 ++k;
1520 }
1521 } else
1522 fputc('-', out);
1523 }
1524 fputs("}}", out);
1525 }
1526 if (exec_info->output_size > 0)
1527 {
1528 if (exec_info->input_size > 0)
1529 fputs("|", out);
1530 fputs("{Output|{", out);
1531 for (i = 0; i < exec_info->output_size; i++)
1532 {
1533 if (i > 0)
1534 fputc('|', out);
1535 if (exec_info->outputs[i])
1536 {
1537 if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW
)
)
1538 _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out);
1539 else {
1540 const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k];
1541 _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out);
1542 ++k;
1543 }
1544 } else
1545 fputc('-', out);
1546 }
1547 fputs("}}", out);
1548 }
1549 fputs("}\"];\n", out);
1550 *tensor_index = k;
1551}
1552
1553static void _ccv_nnc_graph_dot_sub_graphs(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_tensor_dot_recovery_t p_recovery, const ccv_array_t* const sub_graphs, const int flags, const int depth, FILE* out, int* tensor_index, int* exec_index)
1554{
1555 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
1556 {
1557 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1558 const ccv_nnc_graph_t* const while_graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[0] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0]
- 1)))
;
1559 // Output this node info within this subgraph.
1560 _ccv_nnc_graph_dot_while_label(exec_info, *exec_index, p_recovery, while_graph, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1561 } else if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
1562 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index);
1563 _ccv_nnc_graph_dot_case_of_label(exec_info, *exec_index, p_recovery, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index);
1564 }
1565 ++(*exec_index);
1566 int p;
1567 for (p = 0; p < exec_info->graph_ref_size; p++)
1568 {
1569 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1570 {
1571 fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=\"\"\n", *exec_index, *exec_index);
1572 ++(*exec_index);
1573 }
1574 const ccv_nnc_graph_t* const graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[p] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs
)->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info
)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[p]
- 1)))
;
1575 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1576 int i, j;
1577 int k = 0;
1578 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1579 // Output styles.
1580 for (i = 0; i < graph->exec_info->rnum; i++)
1581 {
1582 node_id[i] = *exec_index;
1583 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1584 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0])
1585 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, depth + 1, out, &k, exec_index);
1586 else {
1587 _ccv_nnc_graph_dot_node(exec_info, *exec_index, graph->streams, recovery, flags, depth, out, &k);
1588 ++(*exec_index);
1589 }
1590 }
1591 // Output connections.
1592 for (i = 0; i < graph->exec_info->rnum; i++)
1593 {
1594 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1595 if (exec_info->outgoings)
1596 for (j = 0; j < exec_info->outgoings->rnum; j++)
1597 {
1598 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1599 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1600 // If both are sub-graphs, have both tail and head specified.
1601 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1602 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1603 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1604 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1605 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1606 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1607 else
1608 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1609 }
1610 }
1611 fputs("}\n", out);
1612 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1613 ccfreefree(node_id);
1614 }
1615 // Extra subgraph cluster.
1616 if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1617 fputs("}\n", out);
1618}
1619
1620void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out)
1621{
1622 fputs("digraph G {\ncompound=true;\n", out);
1623 ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph);
1624 int i, j;
1625 int k = 0, c = 0;
1626 int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
1627 // Output styles.
1628 for (i = 0; i < graph->exec_info->rnum; i++)
1629 {
1630 node_id[i] = c;
1631 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1632 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0])
1633 _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, 1, out, &k, &c);
1634 else {
1635 _ccv_nnc_graph_dot_node(exec_info, c, graph->streams, recovery, flags, 0, out, &k);
1636 ++c;
1637 }
1638 }
1639 // Output connections.
1640 for (i = 0; i < graph->exec_info->rnum; i++)
1641 {
1642 ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1643 if (exec_info->outgoings)
1644 for (j = 0; j < exec_info->outgoings->rnum; j++)
1645 {
1646 const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
;
1647 const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(outgoing_idx)))
;
1648 // If both are sub-graphs, have both tail and head specified.
1649 if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1650 fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1651 else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1652 fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1653 else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref
: (exec_info)->_inline_graph_ref)
[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref
: (outgoing_info)->_inline_graph_ref)
[0])
1654 fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1655 else
1656 fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1657 }
1658 }
1659 fputs("}\n", out);
1660 _ccv_nnc_graph_tensor_dot_recovery_free(recovery);
1661 ccfreefree(node_id);
1662}
1663
1664void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size)
1665{
1666 // exec current node, for synchronous CPU execution, no stream unit.
1667#define visitor(node, idx, ...) \
1668 do { \
1669 node->cmd = ccv_nnc_cmd_autotune(node->cmd, max_workspace_size, node->hint, flags, node->inputs, node->input_size, node->outputs, node->output_size, 0); \
1670 } while (0)
1671 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, sources, source_size, destinations, destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; } ccv_nnc_incoming_t
; const int _heap_mem_ = (graph->exec_info->rnum > 1024
); int _i_, _j_; ccv_nnc_incoming_t* _incomings_; if (_heap_mem_
) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (graph->exec_info->rnum) + sizeof(int32_t) * (graph
->exec_info->rnum) * 2); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_info
->rnum) + sizeof(int32_t) * (graph->exec_info->rnum)
* 2); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); for (_i_ = 0; _i_ < (graph->
exec_info->rnum); _i_++) _incomings_[_i_].r = 1; int32_t* _exists_
[2] = { (int32_t*)(_incomings_ + (graph->exec_info->rnum
)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) +
(graph->exec_info->rnum), }; for (_i_ = 0; _i_ < (source_size
); _i_++) { ((void) sizeof (((sources)[_i_].graph == graph) ?
1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ;
else __assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_graph.c"
, 1671, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][
_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size
), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
!_incomings_[_idx_].r) continue; _incomings_[_idx_].r = 0; if
(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_
][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (
_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1671, __extension__ __PRETTY_FUNCTION__)
; })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_graph.c", 1671, __extension__ __PRETTY_FUNCTION__)
; })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1
; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (_idx_
), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++_d_
; _incomings_[_idx_].r = 1; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _d_ < (destination_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _d_ < (destination_size)) { _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_
; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (destination_size); _i_++) { ((void) sizeof ((
(destinations)[_i_].graph == graph) ? 1 : 0), __extension__ (
{ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 1671
, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(destinations
)[_i_].d].r) continue; if (!(0)) { ((void) sizeof ((_incomings_
[(destinations)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if
(_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(destinations)[_i_].d].c == 0", "ccv_nnc_graph.c"
, 1671, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(destinations)[_i_].d].c > 0) continue; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + (destinations
)[_i_].d), ((destinations)[_i_].d), (_incomings_[(destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
1672#undef visitor
1673}
1674
1675void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph)
1676{
1677 int i, j;
1678 for (i = 0; i < graph->exec_info->rnum; i++)
1679 {
1680 ccv_nnc_graph_exec_info_t *info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
;
1681 if (info->_heap_graph_ref)
1682 ccfreefree(info->_heap_graph_ref);
1683 ccv_array_t* outgoings = info->outgoings;
1684 if (outgoings)
1685 ccv_array_free(outgoings);
1686 // We allocate inputs & outputs in continuous fashion, therefore, only need to free the input array.
1687 if (info->inputs)
1688 ccfreefree(info->inputs);
1689 if (info->input_flags)
1690 ccfreefree(info->input_flags);
1691 if (info->updates)
1692 ccfreefree(info->updates);
1693 if ((info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) && info->p_while.inputs)
1694 ccfreefree(info->p_while.inputs);
1695 if (info->schedule.stream_size > 1)
1696 ccfreefree(info->schedule._heap_streams);
1697 if (info->schedule.waits)
1698 ccfreefree(info->schedule.waits);
1699 }
1700 if (graph->tensor_wraps)
1701 {
1702 for (i = 0; i < graph->tensor_wraps->rnum; i++)
1703 {
1704 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, i)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(i)))
;
1705 if (tensor_wrap_array)
1706 {
1707 for (j = 0; j < tensor_wrap_array->size; j++)
1708 _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[j]);
1709 ccfreefree(tensor_wrap_array);
1710 }
1711 }
1712 ccv_array_free(graph->tensor_wraps);
1713 }
1714 if (graph->tensor_wraps_refs)
1715 ccv_array_free(graph->tensor_wraps_refs);
1716 if (graph->breakpoints)
1717 ccfreefree(graph->breakpoints);
1718 if (graph->sources)
1719 ccv_array_free(graph->sources);
1720 if (graph->destinations)
1721 ccv_array_free(graph->destinations);
1722 if (graph->streams)
1723 {
1724 // If the graph has parent graph, the default stream is allocated by the parent graph, we need to skip.
1725 if (!graph->p)
1726 ccv_nnc_stream_context_free(graph->streams[0]);
1727 for (i = 1; i < graph->stream_size; i++)
1728 ccv_nnc_stream_context_free(graph->streams[i]);
1729 ccfreefree(graph->streams);
1730 }
1731 if (graph->block_stream_tasks)
1732 ccfreefree(graph->block_stream_tasks);
1733 if (graph->signals)
1734 {
1735 for (i = 0; i < graph->signal_size; i++)
1736 ccv_nnc_stream_signal_free(graph->signals[i]);
1737 ccfreefree(graph->signals);
1738 }
1739 if (graph->extern_signal)
1740 ccv_nnc_stream_signal_free(graph->extern_signal);
1741 if (graph->waits)
1742 ccfreefree(graph->waits);
1743 if (graph->carry_overs)
1744 {
1745 for (i = 0; i < graph->carry_overs->rnum; i++)
1746 {
1747 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t
)(graph->carry_overs)->rsize * (size_t)(i)))
;
1748 _ccv_nnc_graph_tensor_wrap_free(carry_over->from);
1749 _ccv_nnc_graph_tensor_wrap_free(carry_over->to);
1750 }
1751 ccv_array_free(graph->carry_overs);
1752 }
1753 if (graph->sub_graphs)
1754 {
1755 for (i = 0; i < graph->sub_graphs->rnum; i++)
1756 ccv_nnc_graph_free(*(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
);
1757 ccv_array_free(graph->sub_graphs);
1758 }
1759 ccv_array_free(graph->exec_info);
1760 ccfreefree(graph);
1761}