Bug Summary

File:nnc/ccv_nnc_graph_run.c
Warning:line 799, column 8
Array access (from variable 'inputs') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_graph_run.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/14.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2022-06-22-151334-490440-1 -x c ccv_nnc_graph_run.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_graph.h"
6#include "_ccv_nnc_stream.h"
7#ifdef HAVE_CUDA1
8#include "gpu/ccv_nnc_compat.h"
9#endif
10
11// MARK - Level-2 API
12
13static void _ccv_nnc_unwrap_tensor_wrap(const ccv_nnc_graph_t* const graph, const int64_t count, const int64_t reverse_count, ccv_nnc_graph_tensor_wrap_t* const tensor_wrap)
14{
15 ccv_nnc_tensor_t* tensor = tensor_wrap->tensors[tensor_wrap->index];
16 while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW) &&
17 (((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graph ||
18 ((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graph->pair))
19 {
20 // If the anchor is from the pair, we use the reverse_count instead (we are looking it up).
21 const int i = (int)((((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graph) ? count : reverse_count);
22 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
23 const int off = mv->kind;
24 const int mod = mv->repeat;
25 tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i >= off ? ((i - off) % mod) + off : i]; // Unwrap.
26 // If reached the root.
27 if (!CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
28 tensor_wrap->update_required = 1; // Need to update tensor updates.
29 ++tensor_wrap->index;
30 tensor_wrap->tensors[tensor_wrap->index] = tensor;
31 assert(tensor_wrap->index < tensor_wrap->count)((void) sizeof ((tensor_wrap->index < tensor_wrap->count
) ? 1 : 0), __extension__ ({ if (tensor_wrap->index < tensor_wrap
->count) ; else __assert_fail ("tensor_wrap->index < tensor_wrap->count"
, "ccv_nnc_graph_run.c", 31, __extension__ __PRETTY_FUNCTION__
); }))
;
32 }
33}
34
35static void _ccv_nnc_graph_unwrap_sub_graph(const ccv_nnc_graph_t* const graph, const int64_t count, const int64_t reverse_count, const ccv_nnc_graph_t* const sub_graph)
36{
37 int i;
38 if (sub_graph->carry_overs)
39 for (i = 0; i < sub_graph->carry_overs->rnum; i++)
40 {
41 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(sub_graph->carry_overs, i)((void*)(((char*)((sub_graph->carry_overs)->data)) + (size_t
)(sub_graph->carry_overs)->rsize * (size_t)(i)))
;
42 _ccv_nnc_unwrap_tensor_wrap(graph, count, reverse_count, carry_over->from);
43 _ccv_nnc_unwrap_tensor_wrap(graph, count, reverse_count, carry_over->to);
44 }
45 if (sub_graph->sub_graphs)
46 for (i = 0; i < sub_graph->sub_graphs->rnum; i++)
47 _ccv_nnc_graph_unwrap_sub_graph(graph, count, reverse_count, *(ccv_nnc_graph_t**)ccv_array_get(sub_graph->sub_graphs, i)((void*)(((char*)((sub_graph->sub_graphs)->data)) + (size_t
)(sub_graph->sub_graphs)->rsize * (size_t)(i)))
);
48}
49
50static void _ccv_nnc_graph_unwrap(const ccv_nnc_graph_t* const graph, const int64_t count, const int64_t reverse_count)
51{
52 if (!graph->tensor_wraps_refs)
53 return;
54 int i, j;
55 for (i = 0; i < graph->tensor_wraps_refs->rnum; i++)
56 {
57 const ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (const ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(graph->tensor_wraps_refs, i)((void*)(((char*)((graph->tensor_wraps_refs)->data)) + (
size_t)(graph->tensor_wraps_refs)->rsize * (size_t)(i))
)
;
58 const ccv_nnc_graph_t* const sub_graph = tensor_wraps_ref->graph;
59 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(sub_graph->tensor_wraps, tensor_wraps_ref->d)((void*)(((char*)((sub_graph->tensor_wraps)->data)) + (
size_t)(sub_graph->tensor_wraps)->rsize * (size_t)(tensor_wraps_ref
->d)))
;
60 if (tensor_wrap_array)
61 for (j = 0; j < tensor_wrap_array->size; j++)
62 {
63 ccv_nnc_graph_tensor_wrap_t* const tensor_wrap = tensor_wrap_array->tensor_wraps[j];
64 if (!tensor_wrap)
65 continue;
66 _ccv_nnc_unwrap_tensor_wrap(graph, count, reverse_count, tensor_wrap);
67 }
68 }
69 _ccv_nnc_graph_unwrap_sub_graph(graph, count, reverse_count, graph);
70}
71
72static void _ccv_nnc_graph_transit_move_to(const ccv_nnc_graph_t* const graph)
73{
74 int i;
75 if (graph->carry_overs)
76 for (i = 0; i < graph->carry_overs->rnum; i++)
77 {
78 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t
)(graph->carry_overs)->rsize * (size_t)(i)))
;
79 ccv_nnc_tensor_t* it = (ccv_nnc_tensor_t*)(carry_over->to->tensors[carry_over->to->index]);
80 assert(!CCV_IS_TENSOR_MULTIVIEW(it))((void) sizeof ((!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW))
? 1 : 0), __extension__ ({ if (!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(it)", "ccv_nnc_graph_run.c"
, 80, __extension__ __PRETTY_FUNCTION__); }))
;
81 it->data = carry_over->transit;
82 }
83}
84
85static void _ccv_nnc_graph_from_move_transit(const ccv_nnc_graph_t* const graph)
86{
87 int i;
88 if (graph->carry_overs)
89 for (i = 0; i < graph->carry_overs->rnum; i++)
90 {
91 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t
)(graph->carry_overs)->rsize * (size_t)(i)))
;
92 ccv_nnc_tensor_t* it = (ccv_nnc_tensor_t*)(carry_over->from->tensors[carry_over->from->index]);
93 assert(!CCV_IS_TENSOR_MULTIVIEW(it))((void) sizeof ((!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW))
? 1 : 0), __extension__ ({ if (!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(it)", "ccv_nnc_graph_run.c"
, 93, __extension__ __PRETTY_FUNCTION__); }))
;
94 carry_over->transit = it->data;
95 }
96}
97
98static void _ccv_nnc_rewrap_tensor_wrap(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_tensor_wrap_t* const tensor_wrap)
99{
100 while (tensor_wrap->index > 0 && CCV_IS_TENSOR_MULTIVIEW(tensor_wrap->tensors[tensor_wrap->index - 1])((*(int*)(tensor_wrap->tensors[tensor_wrap->index - 1])
) & CCV_TENSOR_MULTIVIEW)
&&
101 (((ccv_nnc_tensor_multiview_t*)tensor_wrap->tensors[tensor_wrap->index - 1])->anchor == (intptr_t)graph ||
102 ((ccv_nnc_tensor_multiview_t*)tensor_wrap->tensors[tensor_wrap->index - 1])->anchor == (intptr_t)graph->pair))
103 --tensor_wrap->index;
104}
105
106static void _ccv_nnc_graph_rewrap_sub_graph(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_t* const sub_graph)
107{
108 int i;
109 if (sub_graph->carry_overs)
110 for (i = 0; i < sub_graph->carry_overs->rnum; i++)
111 {
112 ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(sub_graph->carry_overs, i)((void*)(((char*)((sub_graph->carry_overs)->data)) + (size_t
)(sub_graph->carry_overs)->rsize * (size_t)(i)))
;
113 _ccv_nnc_rewrap_tensor_wrap(graph, carry_over->from);
114 _ccv_nnc_rewrap_tensor_wrap(graph, carry_over->to);
115 }
116 if (sub_graph->sub_graphs)
117 for (i = 0; i < sub_graph->sub_graphs->rnum; i++)
118 _ccv_nnc_graph_rewrap_sub_graph(graph, *(ccv_nnc_graph_t**)ccv_array_get(sub_graph->sub_graphs, i)((void*)(((char*)((sub_graph->sub_graphs)->data)) + (size_t
)(sub_graph->sub_graphs)->rsize * (size_t)(i)))
);
119}
120
121static void _ccv_nnc_graph_rewrap(const ccv_nnc_graph_t* const graph) // Call this method at the end to roll the wrap_ptr back
122{
123 if (!graph->tensor_wraps_refs)
124 return;
125 int i, j;
126 for (i = 0; i < graph->tensor_wraps_refs->rnum; i++)
127 {
128 const ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (const ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(graph->tensor_wraps_refs, i)((void*)(((char*)((graph->tensor_wraps_refs)->data)) + (
size_t)(graph->tensor_wraps_refs)->rsize * (size_t)(i))
)
;
129 const ccv_nnc_graph_t* const sub_graph = tensor_wraps_ref->graph;
130 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(sub_graph->tensor_wraps, tensor_wraps_ref->d)((void*)(((char*)((sub_graph->tensor_wraps)->data)) + (
size_t)(sub_graph->tensor_wraps)->rsize * (size_t)(tensor_wraps_ref
->d)))
;
131 if (tensor_wrap_array)
132 for (j = 0; j < tensor_wrap_array->size; j++)
133 {
134 ccv_nnc_graph_tensor_wrap_t* const tensor_wrap = tensor_wrap_array->tensor_wraps[j];
135 if (!tensor_wrap)
136 continue;
137 _ccv_nnc_rewrap_tensor_wrap(graph, tensor_wrap);
138 }
139 }
140 _ccv_nnc_graph_rewrap_sub_graph(graph, graph);
141}
142
143static void _ccv_nnc_graph_exec_unwrap_io(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node)
144{
145 if (!node->tensor_wraps_ref)
21
Assuming field 'tensor_wraps_ref' is 0
22
Taking true branch
146 return;
23
Returning without writing to 'node->inputs'
147 int i;
148 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, node->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(node->tensor_wraps_ref
- 1)))
;
149 ccv_nnc_graph_tensor_wrap_t** const tensor_wraps = tensor_wrap_array->tensor_wraps;
150 for (i = 0; i < tensor_wrap_array->size; i++)
151 if (tensor_wraps[i])
152 {
153 assert(tensor_wraps[i]->index > 0)((void) sizeof ((tensor_wraps[i]->index > 0) ? 1 : 0), __extension__
({ if (tensor_wraps[i]->index > 0) ; else __assert_fail
("tensor_wraps[i]->index > 0", "ccv_nnc_graph_run.c", 153
, __extension__ __PRETTY_FUNCTION__); }))
;
154 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)(tensor_wraps[i]->tensors[tensor_wraps[i]->index - 1]);
155 assert(CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) ?
1 : 0), __extension__ ({ if (((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(mv)", "ccv_nnc_graph_run.c"
, 155, __extension__ __PRETTY_FUNCTION__); }))
;
156 // Only now set the mv->it, because now this node is about to get executed.
157 mv->it = tensor_wraps[i]->tensors[tensor_wraps[i]->index];
158 assert(!CCV_IS_TENSOR_MULTIVIEW(mv->it))((void) sizeof ((!((*(int*)(mv->it)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (!((*(int*)(mv->it)) &
CCV_TENSOR_MULTIVIEW)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(mv->it)"
, "ccv_nnc_graph_run.c", 158, __extension__ __PRETTY_FUNCTION__
); }))
;
159 }
160 for (i = 0; i < node->input_size; i++)
161 if (tensor_wraps[i])
162 node->inputs[i] = tensor_wraps[i]->tensors[tensor_wraps[i]->index];
163 const int d = node->input_size;
164 for (i = 0; i < node->output_size; i++)
165 if (tensor_wraps[d + i])
166 node->outputs[i] = tensor_wraps[d + i]->tensors[tensor_wraps[d + i]->index];
167}
168
169static void _ccv_nnc_graph_exec_unwrap_while_expr(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node)
170{
171 assert(node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)((void) sizeof ((node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE
) ? 1 : 0), __extension__ ({ if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE
) ; else __assert_fail ("node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE"
, "ccv_nnc_graph_run.c", 171, __extension__ __PRETTY_FUNCTION__
); }))
;
172 if (!node->p_while.tensor_wraps_ref)
173 return;
174 int i;
175 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, node->p_while.tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(node->p_while
.tensor_wraps_ref - 1)))
;
176 ccv_nnc_graph_tensor_wrap_t** const tensor_wraps = tensor_wrap_array->tensor_wraps;
177 for (i = 0; i < tensor_wrap_array->size; i++)
178 if (tensor_wraps[i])
179 {
180 assert(tensor_wraps[i]->index > 0)((void) sizeof ((tensor_wraps[i]->index > 0) ? 1 : 0), __extension__
({ if (tensor_wraps[i]->index > 0) ; else __assert_fail
("tensor_wraps[i]->index > 0", "ccv_nnc_graph_run.c", 180
, __extension__ __PRETTY_FUNCTION__); }))
;
181 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)(tensor_wraps[i]->tensors[tensor_wraps[i]->index - 1]);
182 assert(CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) ?
1 : 0), __extension__ ({ if (((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(mv)", "ccv_nnc_graph_run.c"
, 182, __extension__ __PRETTY_FUNCTION__); }))
;
183 // Only now set the mv->it, because now this node is about to get executed.
184 mv->it = tensor_wraps[i]->tensors[tensor_wraps[i]->index];
185 assert(!CCV_IS_TENSOR_MULTIVIEW(mv->it))((void) sizeof ((!((*(int*)(mv->it)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (!((*(int*)(mv->it)) &
CCV_TENSOR_MULTIVIEW)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(mv->it)"
, "ccv_nnc_graph_run.c", 185, __extension__ __PRETTY_FUNCTION__
); }))
;
186 }
187 for (i = 0; i < node->p_while.input_size; i++)
188 if (tensor_wraps[i])
189 node->p_while.inputs[i] = tensor_wraps[i]->tensors[tensor_wraps[i]->index];
190}
191
192static void _ccv_nnc_graph_exec_unwrap_phi(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_info_t* const node, const int ref)
193{
194 int i;
195 // If the output tensor is a phi multi-view tensor, we update our selection to all the subscribers.
196 for (i = 0; i < node->output_size; i++)
197 if (CCV_IS_TENSOR_MULTIVIEW(node->outputs[i])((*(int*)(node->outputs[i])) & CCV_TENSOR_MULTIVIEW) &&
198 ((ccv_nnc_tensor_multiview_t*)node->outputs[i])->anchor == CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1)
199 {
200 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)node->outputs[i];
201 mv->it = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[ref >= 0];
202 ccv_nnc_tensor_multiview_synchronize(mv);
203 }
204}
205
206static void _ccv_nnc_graph_exec_begin_synchronize_multiviews(ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node)
207{
208 if (!node->tensor_wraps_ref)
209 return;
210 int i;
211 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, node->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(node->tensor_wraps_ref
- 1)))
;
212 ccv_nnc_graph_tensor_wrap_t** const tensor_wraps = tensor_wrap_array->tensor_wraps;
213 for (i = 0; i < tensor_wrap_array->size; i++)
214 if (tensor_wraps[i] && tensor_wraps[i]->update_required)
215 {
216 assert(tensor_wraps[i]->index > 0)((void) sizeof ((tensor_wraps[i]->index > 0) ? 1 : 0), __extension__
({ if (tensor_wraps[i]->index > 0) ; else __assert_fail
("tensor_wraps[i]->index > 0", "ccv_nnc_graph_run.c", 216
, __extension__ __PRETTY_FUNCTION__); }))
;
217 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)(tensor_wraps[i]->tensors[tensor_wraps[i]->index - 1]);
218 // Now update the final pointer.
219 ccv_nnc_tensor_multiview_synchronize(mv);
220 tensor_wraps[i]->update_required = 0; // Reset, no need to update.
221 }
222}
223
224void ccv_nnc_print_tensor_info(const ccv_nnc_tensor_t* const tensor)
225{
226 int i;
227 PRINT(CCV_CLI_INFO, " [%d", tensor->info.dim[0])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
(" [%d", tensor->info.dim[0]); fflush(stdout); } } while (
0)
;
228 for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor->info.dim[i]; i++)
229 PRINT(CCV_CLI_INFO, "x%d", tensor->info.dim[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("x%d", tensor->info.dim[i]); fflush(stdout); } } while (0
)
;
230 PRINT(CCV_CLI_INFO, "]")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("]"); fflush(stdout); } } while (0)
;
231 if (!CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels()) || tensor->info.dim[0] <= 0)
232 return;
233 const int nd = ccv_nnc_tensor_nd(tensor->info.dim);
234 const int len = ccv_min(tensor->info.dim[nd - 1], 3)({ typeof (tensor->info.dim[nd - 1]) _a = (tensor->info
.dim[nd - 1]); typeof (3) _b = (3); (_a < _b) ? _a : _b; }
)
;
235 if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY)
236 {
237#ifdef HAVE_CUDA1
238 switch (tensor->info.datatype)
239 {
240 case CCV_16F: {
241 uint16_t data[len];
242 cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.f16, tensor->info.type, len * sizeof(uint16_t));
243 float fp32[len];
244 ccv_half_precision_to_float(data, fp32, len);
245 for (i = 0; i < len; i++)
246 PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %f", fp32[i]); fflush(stdout); } } while (0)
;
247 break;
248 }
249 case CCV_32F: {
250 float data[len];
251 cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.f32, tensor->info.type, len * sizeof(float));
252 for (i = 0; i < len; i++)
253 PRINT(CCV_CLI_VERBOSE, " %f", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %f", data[i]); fflush(stdout); } } while (0)
;
254 break;
255 }
256 case CCV_64F: {
257 double data[len];
258 cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.f64, tensor->info.type, len * sizeof(double));
259 for (i = 0; i < len; i++)
260 PRINT(CCV_CLI_VERBOSE, " %f", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %f", data[i]); fflush(stdout); } } while (0)
;
261 break;
262 }
263 case CCV_32S: {
264 int data[len];
265 cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.i32, tensor->info.type, len * sizeof(int));
266 for (i = 0; i < len; i++)
267 PRINT(CCV_CLI_VERBOSE, " %d", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %d", data[i]); fflush(stdout); } } while (0)
;
268 break;
269 }
270 case CCV_64S: {
271 int64_t data[len];
272 cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.i64, tensor->info.type, len * sizeof(int64_t));
273 for (i = 0; i < len; i++)
274 PRINT(CCV_CLI_VERBOSE, " %lld", (long long)data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %lld", (long long)data[i]); fflush(stdout); } } while
(0)
;
275 break;
276 }
277 case CCV_8U: {
278 uint8_t data[len];
279 cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, len * sizeof(uint8_t));
280 for (i = 0; i < len; i++)
281 PRINT(CCV_CLI_VERBOSE, " %d", (int)data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %d", (int)data[i]); fflush(stdout); } } while (0)
;
282 break;
283 }
284 }
285 if (ccv_nnc_tensor_count(tensor->info) > 3)
286 PRINT(CCV_CLI_VERBOSE, " ..")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" .."); fflush(stdout); } } while (0)
;
287#endif
288 } else if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) {
289 switch (tensor->info.datatype)
290 {
291 case CCV_16F: {
292 float fp32[len];
293 ccv_half_precision_to_float((uint16_t*)tensor->data.f16, fp32, len);
294 for (i = 0; i < len; i++)
295 PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %f", fp32[i]); fflush(stdout); } } while (0)
;
296 break;
297 }
298 case CCV_32F:
299 for (i = 0; i < len; i++)
300 PRINT(CCV_CLI_VERBOSE, " %f", tensor->data.f32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %f", tensor->data.f32[i]); fflush(stdout); } }
while (0)
;
301 break;
302 case CCV_64F:
303 for (i = 0; i < len; i++)
304 PRINT(CCV_CLI_VERBOSE, " %f", tensor->data.f64[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %f", tensor->data.f64[i]); fflush(stdout); } }
while (0)
;
305 break;
306 case CCV_32S:
307 for (i = 0; i < len; i++)
308 PRINT(CCV_CLI_VERBOSE, " %d", tensor->data.i32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %d", tensor->data.i32[i]); fflush(stdout); } }
while (0)
;
309 break;
310 case CCV_64S:
311 for (i = 0; i < len; i++)
312 PRINT(CCV_CLI_VERBOSE, " %lld", (long long)tensor->data.i64[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %lld", (long long)tensor->data.i64[i]); fflush
(stdout); } } while (0)
;
313 break;
314 case CCV_8U:
315 for (i = 0; i < len; i++)
316 PRINT(CCV_CLI_VERBOSE, " %d", (int)tensor->data.u8[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" %d", (int)tensor->data.u8[i]); fflush(stdout);
} } while (0)
;
317 break;
318 }
319 if (ccv_nnc_tensor_count(tensor->info) > 3)
320 PRINT(CCV_CLI_VERBOSE, " ..")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf(" .."); fflush(stdout); } } while (0)
;
321 }
322}
323
324static co_decl(_ccv_nnc_graph_topsorted_run_coro, (ccv_nnc_graph_t* const graph, const int exec_idx, const ccv_nnc_graph_static_schedule_t* const schedule, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, const int flags))co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t* const
_self, void* const _privates_); struct _ccv_nnc_graph_topsorted_run_coro_param_s
{ struct { ccv_nnc_graph_t* const graph;const int exec_idx;const
ccv_nnc_graph_static_schedule_t* const schedule;ccv_nnc_graph_exec_info_t
* const exec;ccv_nnc_tensor_tape_t* const tensor_tape;ccv_nnc_stream_context_t
* const stream_context;const int flags;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params; }; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size
(void);
;
325
326static co_decl_task(_ccv_nnc_graph_exec_cases_of_coro, (ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, const ccv_nnc_graph_exec_schedule_t* const schd, ccv_nnc_tensor_t* const* const inputs, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, int flags), private(co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const
_self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s
{ struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t
* const exec;const ccv_nnc_graph_exec_schedule_t* const schd;
ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const
tensor_tape;ccv_nnc_stream_context_t* const stream_context;int
flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct
_ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref
; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s
); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params;
int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line
) { case 0:
327 int ref;co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const
_self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s
{ struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t
* const exec;const ccv_nnc_graph_exec_schedule_t* const schd;
ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const
tensor_tape;ccv_nnc_stream_context_t* const stream_context;int
flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct
_ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref
; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s
); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params;
int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line
) { case 0:
328 ccv_nnc_graph_t* sub_graph;co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const
_self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s
{ struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t
* const exec;const ccv_nnc_graph_exec_schedule_t* const schd;
ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const
tensor_tape;ccv_nnc_stream_context_t* const stream_context;int
flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct
_ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref
; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s
); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params;
int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line
) { case 0:
329))co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const
_self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s
{ struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t
* const exec;const ccv_nnc_graph_exec_schedule_t* const schd;
ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const
tensor_tape;ccv_nnc_stream_context_t* const stream_context;int
flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct
_ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref
; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size
(void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s
); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params;
int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line
) { case 0:
{
330 // Wait until this stream context is done.
331 co_stream_await(CO_P(stream_context))do { if (!_co_stream_await(_self_, (((struct _private_s*)(_privates_
))->_co_params._co_params.stream_context))) { return (co_state_t
){ 331, 0 }; } case 331: ; } while (0)
;
332 if (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->cmd.cmd == CCV_NNC_GRAPH_FORWARD)
333 {
334 CO_V(ref)(((struct _private_s*)(_privates_))->ref) = CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->case_of.offset + CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->case_of.expr(CO_P(inputs)(((struct _private_s*)(_privates_))->_co_params._co_params
.inputs)
, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->input_size, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->case_of.data);
335 if (CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
)
336 ccv_nnc_tensor_tape_set_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
, CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, (ccv_nnc_graph_exec_t){
337 .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_idx)
,
338 .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
,
339 }, CO_V(ref)(((struct _private_s*)(_privates_))->ref));
340 } else {
341 assert(CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ? 1 :
0), __extension__ ({ if ((((struct _private_s*)(_privates_))
->_co_params._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD
) ; else __assert_fail ("CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD"
, "ccv_nnc_graph_run.c", 341, __extension__ __PRETTY_FUNCTION__
); }))
;
342 assert(CO_P(tensor_tape))((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.tensor_tape)) ? 1 : 0), __extension__ ({ if ((((struct
_private_s*)(_privates_))->_co_params._co_params.tensor_tape
)) ; else __assert_fail ("CO_P(tensor_tape)", "ccv_nnc_graph_run.c"
, 342, __extension__ __PRETTY_FUNCTION__); }))
;
343 CO_V(ref)(((struct _private_s*)(_privates_))->ref) = ccv_nnc_tensor_tape_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
, CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, (ccv_nnc_graph_exec_t){
344 .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_idx)
,
345 .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
,
346 });
347 }
348 if (CO_V(ref)(((struct _private_s*)(_privates_))->ref) >= 0)
349 {
350 assert(CO_V(ref) < CO_P(exec)->graph_ref_size)((void) sizeof (((((struct _private_s*)(_privates_))->ref)
< (((struct _private_s*)(_privates_))->_co_params._co_params
.exec)->graph_ref_size) ? 1 : 0), __extension__ ({ if ((((
struct _private_s*)(_privates_))->ref) < (((struct _private_s
*)(_privates_))->_co_params._co_params.exec)->graph_ref_size
) ; else __assert_fail ("CO_V(ref) < CO_P(exec)->graph_ref_size"
, "ccv_nnc_graph_run.c", 350, __extension__ __PRETTY_FUNCTION__
); }))
;
351 CO_V(sub_graph)(((struct _private_s*)(_privates_))->sub_graph) = *(ccv_nnc_graph_t**)ccv_array_get(CO_P(graph)->sub_graphs, CCV_NNC_GRAPH_REF(CO_P(exec))[CO_V(ref)] - 1)((void*)(((char*)(((((struct _private_s*)(_privates_))->_co_params
._co_params.graph)->sub_graphs)->data)) + (size_t)((((struct
_private_s*)(_privates_))->_co_params._co_params.graph)->
sub_graphs)->rsize * (size_t)((((((struct _private_s*)(_privates_
))->_co_params._co_params.exec))->_heap_graph_ref ? (((
(struct _private_s*)(_privates_))->_co_params._co_params.exec
))->_heap_graph_ref : ((((struct _private_s*)(_privates_))
->_co_params._co_params.exec))->_inline_graph_ref)[(((struct
_private_s*)(_privates_))->ref)] - 1)))
;
352 assert(CO_P(schd)->stream_size == 1)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.schd)->stream_size == 1) ? 1 : 0), __extension__
({ if ((((struct _private_s*)(_privates_))->_co_params._co_params
.schd)->stream_size == 1) ; else __assert_fail ("CO_P(schd)->stream_size == 1"
, "ccv_nnc_graph_run.c", 352, __extension__ __PRETTY_FUNCTION__
); }))
;
353 assert(CO_P(graph)->streams[SCHEDULE_STREAMS(*CO_P(schd))[0]] == CO_V(sub_graph)->streams[0])((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.graph)->streams[((*(((struct _private_s*)(_privates_
))->_co_params._co_params.schd)).stream_size <= 1 ? (*(
((struct _private_s*)(_privates_))->_co_params._co_params.
schd))._inline_streams : (*(((struct _private_s*)(_privates_)
)->_co_params._co_params.schd))._heap_streams)[0]] == (((struct
_private_s*)(_privates_))->sub_graph)->streams[0]) ? 1
: 0), __extension__ ({ if ((((struct _private_s*)(_privates_
))->_co_params._co_params.graph)->streams[((*(((struct _private_s
*)(_privates_))->_co_params._co_params.schd)).stream_size <=
1 ? (*(((struct _private_s*)(_privates_))->_co_params._co_params
.schd))._inline_streams : (*(((struct _private_s*)(_privates_
))->_co_params._co_params.schd))._heap_streams)[0]] == (((
struct _private_s*)(_privates_))->sub_graph)->streams[0
]) ; else __assert_fail ("CO_P(graph)->streams[SCHEDULE_STREAMS(*CO_P(schd))[0]] == CO_V(sub_graph)->streams[0]"
, "ccv_nnc_graph_run.c", 353, __extension__ __PRETTY_FUNCTION__
); }))
;
354 co_apply(_ccv_nnc_graph_topsorted_run_coro, (CO_V(sub_graph), CO_P(exec_idx), CO_V(sub_graph)->default_schedule, CO_P(exec), CO_P(tensor_tape), CO_P(graph)->streams[SCHEDULE_STREAMS(*CO_P(schd))[0]], CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size
())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
sub_graph), (((struct _private_s*)(_privates_))->_co_params
._co_params.exec_idx), (((struct _private_s*)(_privates_))->
sub_graph)->default_schedule, (((struct _private_s*)(_privates_
))->_co_params._co_params.exec), (((struct _private_s*)(_privates_
))->_co_params._co_params.tensor_tape), (((struct _private_s
*)(_privates_))->_co_params._co_params.graph)->streams[
((*(((struct _private_s*)(_privates_))->_co_params._co_params
.schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_
))->_co_params._co_params.schd))._inline_streams : (*(((struct
_private_s*)(_privates_))->_co_params._co_params.schd))._heap_streams
)[0]], (((struct _private_s*)(_privates_))->_co_params._co_params
.flags) } }; task->fn = _ccv_nnc_graph_topsorted_run_coro;
task->line = 0; task->done = 0; task->root = 0; task
->other_size = 0; task->notify_any = 0; task->others
= 0; task->caller = 0; task->callee = 0; if (sizeof(params
) > 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; }); _co_apply(_self_, _self_->callee); return (
co_state_t){ 354, 0 }; case 354: co_free(_self_->callee); _self_
->callee = 0; } while (0)
;
355 }
356 _ccv_nnc_graph_exec_unwrap_phi(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
, CO_V(ref)(((struct _private_s*)(_privates_))->ref));
357} co_end()default: return (co_state_t){ 357, 1 }; } }
358
359typedef struct {
360 ccv_nnc_graph_t* graph;
361 const ccv_nnc_graph_exec_schedule_t* node;
362 ccv_nnc_stream_context_t* stream;
363} ccv_nnc_graph_neighbor_context_discovery_t;
364
365static ccv_nnc_stream_context_t* _ccv_nnc_graph_neighbor_context_discovery(const int device_id, void* const context)
366{
367 const ccv_nnc_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_graph_neighbor_context_discovery_t*)context;
368 if (CCV_STREAM_GET_DEVICE_ID(ccv_nnc_stream_context_type(discovery->stream))(((ccv_nnc_stream_context_type(discovery->stream)) & 0xfff00
) >> 8)
== device_id)
369 return discovery->stream;
370 ccv_nnc_graph_t* const graph = discovery->graph;
371 const ccv_nnc_graph_exec_schedule_t* const node = discovery->node;
372 int i;
373 // First try to find in other streams of the same node.
374 for (i = 0; i < node->stream_size; i++)
375 {
376 ccv_nnc_stream_context_t* const stream = graph->streams[SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[i]];
377 if (CCV_STREAM_GET_DEVICE_ID(ccv_nnc_stream_context_type(stream))(((ccv_nnc_stream_context_type(stream)) & 0xfff00) >>
8)
== device_id)
378 return stream;
379 }
380 // If cannot find, try to find in all the wait streams.
381 for (i = 0; i < node->wait_size; i++)
382 {
383 ccv_nnc_stream_context_t* stream_context = ccv_nnc_stream_signal_get_emitter(graph->signals[node->waits[i]]);
384 if (stream_context && CCV_STREAM_GET_DEVICE_ID(ccv_nnc_stream_context_type(stream_context))(((ccv_nnc_stream_context_type(stream_context)) & 0xfff00
) >> 8)
== device_id)
385 return stream_context;
386 }
387 return 0;
388}
389
390static co_routine_t* _ccv_nnc_graph_exec_run_task(ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node, const ccv_nnc_graph_exec_schedule_t* const schd, const int idx, ccv_nnc_tensor_tape_t* const tensor_tape, const int flags)
391{
392 _ccv_nnc_graph_exec_unwrap_io(graph, node);
393 ccv_nnc_tensor_t** inputs = node->inputs;
394 ccv_nnc_tensor_t** outputs = inputs ? inputs + node->input_size : 0;
395 if (tensor_tape)
396 ccv_nnc_tensor_tape_io(tensor_tape, graph, node->input_flags, inputs, node->input_size, node->output_flags, outputs, node->output_size);
397 /* Broadcast the updates to all subscribed references for input / output, even though at th
398 * time output is not written yet, propagate pointer change is still valid. */
399 _ccv_nnc_graph_exec_begin_synchronize_multiviews(graph, node);
400 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)
401 {
402 if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
403 {
404 ccv_nnc_stream_context_t* const node_stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd
)._heap_streams)
[0]];
405 return co_new(_ccv_nnc_graph_exec_cases_of_coro, (graph, idx, node, schd, inputs, tensor_tape, node_stream, flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_exec_cases_of_coro_stack_size
())); do { struct _ccv_nnc_graph_exec_cases_of_coro_param_s params
= { ._co_params = { graph, idx, node, schd, inputs, tensor_tape
, node_stream, flags } }; task->fn = _ccv_nnc_graph_exec_cases_of_coro
; task->line = 0; task->done = 0; task->root = 0; task
->other_size = 0; task->notify_any = 0; task->others
= 0; task->caller = 0; task->callee = 0; if (sizeof(params
) > 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; })
;
406 } else if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) {
407 ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[0] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref
? (node)->_heap_graph_ref : (node)->_inline_graph_ref)
[0] - 1)))
;
408 assert(graph->streams[SCHEDULE_STREAMS(*schd)[0]] == sub_graph->streams[0])((void) sizeof ((graph->streams[((*schd).stream_size <=
1 ? (*schd)._inline_streams : (*schd)._heap_streams)[0]] == sub_graph
->streams[0]) ? 1 : 0), __extension__ ({ if (graph->streams
[((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd
)._heap_streams)[0]] == sub_graph->streams[0]) ; else __assert_fail
("graph->streams[SCHEDULE_STREAMS(*schd)[0]] == sub_graph->streams[0]"
, "ccv_nnc_graph_run.c", 408, __extension__ __PRETTY_FUNCTION__
); }))
;
409 return co_new(_ccv_nnc_graph_topsorted_run_coro, (sub_graph, idx, sub_graph->default_schedule, node, tensor_tape, graph->streams[SCHEDULE_STREAMS(*schd)[0]], flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size
())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params
= { ._co_params = { sub_graph, idx, sub_graph->default_schedule
, node, tensor_tape, graph->streams[((*schd).stream_size <=
1 ? (*schd)._inline_streams : (*schd)._heap_streams)[0]], flags
} }; task->fn = _ccv_nnc_graph_topsorted_run_coro; task->
line = 0; task->done = 0; task->root = 0; task->other_size
= 0; task->notify_any = 0; task->others = 0; task->
caller = 0; task->callee = 0; if (sizeof(params) > 0) memcpy
(task + 1, &params, sizeof(params)); } while (0); task; }
)
;
410 }
411 } else {
412 PRINT(CCV_CLI_INFO, "%s [%d]: [%d] -> [%d] (%d)\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size, SCHEDULE_STREAMS(*schd)[0])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("%s [%d]: [%d] -> [%d] (%d)\n", ccv_nnc_cmd_name(node->
cmd.cmd), idx, node->input_size, node->output_size, ((*
schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd)
._heap_streams)[0]); fflush(stdout); } } while (0)
;
413 int i, j;
414 int flag = 0;
415 for (i = 0; i < schd->stream_size; i++)
416 {
417 ccv_nnc_stream_context_t* const stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd
)._heap_streams)
[i]];
418 for (j = 0; j < schd->wait_size; j++)
419 {
420 ccv_nnc_stream_context_wait_signal(stream, graph->signals[schd->waits[j]]);
421 if (!flag)
422 {
423 PRINT(CCV_CLI_INFO, "Wait: (%d, %d)", SCHEDULE_STREAMS(*schd)[i], schd->waits[j])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("Wait: (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams
: (*schd)._heap_streams)[i], schd->waits[j]); fflush(stdout
); } } while (0)
;
424 flag = 1;
425 } else
426 PRINT(CCV_CLI_INFO, ", (%d, %d)", SCHEDULE_STREAMS(*schd)[i], schd->waits[j])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
(", (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams
: (*schd)._heap_streams)[i], schd->waits[j]); fflush(stdout
); } } while (0)
;
427 }
428 }
429 if (flag)
430 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
431 for (i = 0; i < node->input_size; i++)
432 {
433 PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs[i]->data.u8 : 0), (inputs[i] ? CCV_TENSOR_GET_DEVICE_ID(inputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs
[i]->data.u8 : 0), (inputs[i] ? (((inputs[i]->info.type
) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while
(0)
;
434 if (inputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels()))
435 ccv_nnc_print_tensor_info(inputs[i]);
436 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
437 }
438 ccv_nnc_stream_context_t* const node_stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd
)._heap_streams)
[0]];
439 ccv_nnc_graph_neighbor_context_discovery_t discovery_context = {
440 .graph = graph,
441 .node = schd,
442 .stream = node_stream
443 };
444 ccv_nnc_stream_context_set_neighbor_discovery(node_stream, _ccv_nnc_graph_neighbor_context_discovery, &discovery_context);
445 ccv_nnc_cmd_exec(node->cmd, node->hint, flags, inputs, node->input_size, outputs, node->output_size, node_stream);
446 for (i = 0; i < node->output_size; i++)
447 {
448 PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs[i]->data.u8 : 0), (outputs[i] ? CCV_TENSOR_GET_DEVICE_ID(outputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs
[i]->data.u8 : 0), (outputs[i] ? (((outputs[i]->info.type
) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while
(0)
;
449 if (outputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels()))
450 ccv_nnc_print_tensor_info(outputs[i]);
451 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
452 }
453 flag = 0;
454 for (i = 0; i < schd->stream_size; i++)
455 if (SCHEDULE_SIGNALS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_signals : (*schd
)._heap_signals)
[i] >= 0)
456 {
457 ccv_nnc_stream_context_t* const stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd
)._heap_streams)
[i]];
458 ccv_nnc_stream_context_emit_signal(stream, graph->signals[SCHEDULE_SIGNALS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_signals : (*schd
)._heap_signals)
[i]]);
459 if (!flag)
460 {
461 PRINT(CCV_CLI_INFO, "Emit: (%d, %d)", SCHEDULE_STREAMS(*schd)[i], SCHEDULE_SIGNALS(*schd)[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("Emit: (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams
: (*schd)._heap_streams)[i], ((*schd).stream_size <= 1 ? (
*schd)._inline_signals : (*schd)._heap_signals)[i]); fflush(stdout
); } } while (0)
;
462 flag = 1;
463 } else
464 PRINT(CCV_CLI_INFO, ", (%d, %d)", SCHEDULE_STREAMS(*schd)[i], SCHEDULE_SIGNALS(*schd)[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
(", (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams
: (*schd)._heap_streams)[i], ((*schd).stream_size <= 1 ? (
*schd)._inline_signals : (*schd)._heap_signals)[i]); fflush(stdout
); } } while (0)
;
465 }
466 if (flag)
467 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
468 }
469 return 0;
470}
471
472static void _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_schedule_t* const schd_info, ccv_nnc_graph_exec_info_t* const node, co_routine_t* const task)
473{
474 int i, j;
475 if (node->outgoings)
476 for (i = 0; i < node->outgoings->rnum; i++)
477 {
478 const int outgoing_idx = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
479 const ccv_nnc_graph_exec_schedule_t* const outgoing_schd = schd_info + outgoing_idx;
480 // An outgoing stream can be blocked by multiple other tasks from other streams. But it is OK,
481 // because on next round of execution, that one will be marked as blocked again.
482 for (j = 0; j < outgoing_schd->stream_size; j++)
483 graph->block_stream_tasks[SCHEDULE_STREAMS(*outgoing_schd)((*outgoing_schd).stream_size <= 1 ? (*outgoing_schd)._inline_streams
: (*outgoing_schd)._heap_streams)
[j]] = task;
484 }
485}
486
487static co_decl_task(_ccv_nnc_graph_wait_any_sub_tasks, (ccv_nnc_graph_t* const graph, co_routine_t* const* const sub_tasks, const int sub_task_size, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int* const pending_nodes, const int pending_node_size), private(co_state_t _ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const
_self, void* const _privates_); struct _ccv_nnc_graph_wait_any_sub_tasks_param_s
{ struct { ccv_nnc_graph_t* const graph;co_routine_t* const*
const sub_tasks;const int sub_task_size;const ccv_nnc_graph_exec_schedule_t
* const schd_info;const int* const pending_nodes;const int pending_node_size
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; }
_co_params; }; size_t _ccv_nnc_graph_wait_any_sub_tasks_stack_size
(void); struct _ccv_nnc_graph_wait_any_sub_tasks_private_s { struct
_ccv_nnc_graph_wait_any_sub_tasks_param_s _co_params; }; size_t
_ccv_nnc_graph_wait_any_sub_tasks_stack_size(void) { return sizeof
(struct _ccv_nnc_graph_wait_any_sub_tasks_private_s); } co_state_t
_ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const _self_
, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s
_co_params; }; switch (_self_->line) { case 0:
488))co_state_t _ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const
_self, void* const _privates_); struct _ccv_nnc_graph_wait_any_sub_tasks_param_s
{ struct { ccv_nnc_graph_t* const graph;co_routine_t* const*
const sub_tasks;const int sub_task_size;const ccv_nnc_graph_exec_schedule_t
* const schd_info;const int* const pending_nodes;const int pending_node_size
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; }
_co_params; }; size_t _ccv_nnc_graph_wait_any_sub_tasks_stack_size
(void); struct _ccv_nnc_graph_wait_any_sub_tasks_private_s { struct
_ccv_nnc_graph_wait_any_sub_tasks_param_s _co_params; }; size_t
_ccv_nnc_graph_wait_any_sub_tasks_stack_size(void) { return sizeof
(struct _ccv_nnc_graph_wait_any_sub_tasks_private_s); } co_state_t
_ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const _self_
, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s
_co_params; }; switch (_self_->line) { case 0:
{
489 assert(CO_P(sub_task_size) > 0)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.sub_task_size) > 0) ? 1 : 0), __extension__ ({
if ((((struct _private_s*)(_privates_))->_co_params._co_params
.sub_task_size) > 0) ; else __assert_fail ("CO_P(sub_task_size) > 0"
, "ccv_nnc_graph_run.c", 489, __extension__ __PRETTY_FUNCTION__
); }))
;
490 co_await_any(CO_P(sub_tasks), CO_P(sub_task_size))do { if (!_co_await_any(_self_, (((struct _private_s*)(_privates_
))->_co_params._co_params.sub_tasks), (((struct _private_s
*)(_privates_))->_co_params._co_params.sub_task_size))) { return
(co_state_t){ 490, 0 }; } case 490: ; } while (0)
;
491 // This is not good, these local variables need to be in the private section.
492 // I got away with it because there is no yield or resume or apply or any after await above.
493 int i, j, k;
494 for (i = 0; i < CO_P(sub_task_size)(((struct _private_s*)(_privates_))->_co_params._co_params
.sub_task_size)
; i++)
495 if (co_is_done(CO_P(sub_tasks)(((struct _private_s*)(_privates_))->_co_params._co_params
.sub_tasks)
[i]))
496 {
497 for (j = 0; j < CO_P(pending_node_size)(((struct _private_s*)(_privates_))->_co_params._co_params
.pending_node_size)
; j++)
498 {
499 const ccv_nnc_graph_exec_schedule_t* const node = CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info)
+ CO_P(pending_nodes)(((struct _private_s*)(_privates_))->_co_params._co_params
.pending_nodes)
[j];
500 for (k = 0; k < node->stream_size; k++)
501 if (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[k]] == CO_P(sub_tasks)(((struct _private_s*)(_privates_))->_co_params._co_params
.sub_tasks)
[i])
502 CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node
)._heap_streams)
[k]] = 0;
503 }
504 co_free(CO_P(sub_tasks)(((struct _private_s*)(_privates_))->_co_params._co_params
.sub_tasks)
[i]);
505 }
506} co_end()default: return (co_state_t){ 506, 1 }; } }
507
508static co_decl_task(_ccv_nnc_graph_exec_run_loop, (ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int* const psort, const int start_index, const int exec_info_size, ccv_nnc_tensor_tape_t* const tensor_tape, const int flags), private(co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
509 int i, p, q;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
510 int sub_task_size;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
511 co_routine_t** sub_tasks;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
512 int* pending_nodes[2];co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
513 int pending_node_size[2];co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
514 int idx;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
515 ccv_nnc_graph_exec_info_t* node;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
516 const ccv_nnc_graph_exec_schedule_t* schd;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
517 co_routine_t* task;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
518))co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self
, void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s
{ struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t
* const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info
;const int* const psort;const int start_index;const int exec_info_size
;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params
; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct
_ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s
_co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks
; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void
) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s
); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const
_self_, void* const _privates_) { struct _private_s { struct
_ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q
; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes
[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t
* node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t
* task; }; switch (_self_->line) { case 0:
{
519 CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size) = 0;
520 CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks) = (co_routine_t**)ccv_nnc_graph_buffer(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, sizeof(co_routine_t*) * (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->sub_graphs ? CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->sub_graphs->rnum : 0) + sizeof(int) * CO_P(exec_info_size)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_info_size)
* 2);
521 CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[0] = (int*)(CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks) + (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->sub_graphs ? CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->sub_graphs->rnum : 0));
522 CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[1] = CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[0] + CO_P(exec_info_size)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_info_size)
;
523 CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[0] = 0;
524 CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[1] = 0;
525 for (CO_V(i)(((struct _private_s*)(_privates_))->i) = CO_P(start_index)(((struct _private_s*)(_privates_))->_co_params._co_params
.start_index)
; CO_V(i)(((struct _private_s*)(_privates_))->i) < CO_P(exec_info_size)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_info_size)
; CO_V(i)(((struct _private_s*)(_privates_))->i)++)
526 {
527 CO_V(idx)(((struct _private_s*)(_privates_))->idx) = CO_P(psort)(((struct _private_s*)(_privates_))->_co_params._co_params
.psort)
? CO_P(psort)(((struct _private_s*)(_privates_))->_co_params._co_params
.psort)
[CO_V(i)(((struct _private_s*)(_privates_))->i)] : CO_V(i)(((struct _private_s*)(_privates_))->i);
528 CO_V(node)(((struct _private_s*)(_privates_))->node) = CO_P(exec_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_info)
+ CO_V(idx)(((struct _private_s*)(_privates_))->idx);
529 CO_V(schd)(((struct _private_s*)(_privates_))->schd) = CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info)
+ CO_V(idx)(((struct _private_s*)(_privates_))->idx);
530 // If stream is blocked by but not blocked by current executing task.
531 int blocked = 0, j;
532 for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++)
533 if (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size
<= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams
: (*(((struct _private_s*)(_privates_))->schd))._heap_streams
)
[j]])
534 {
535 CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[0][CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[0]++] = CO_V(idx)(((struct _private_s*)(_privates_))->idx);
536 _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info)
, CO_V(node)(((struct _private_s*)(_privates_))->node), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size
<= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams
: (*(((struct _private_s*)(_privates_))->schd))._heap_streams
)
[j]]);
537 blocked = 1;
538 }
539 if (blocked)
540 continue;
541 CO_V(task)(((struct _private_s*)(_privates_))->task) = _ccv_nnc_graph_exec_run_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(schd)(((struct _private_s*)(_privates_))->schd), CO_V(idx)(((struct _private_s*)(_privates_))->idx), CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
, CO_P(flags)(((struct _private_s*)(_privates_))->_co_params._co_params
.flags)
);
542 if (CO_V(task)(((struct _private_s*)(_privates_))->task))
543 {
544 co_resume(CO_V(task))do { _co_resume(_self_, (((struct _private_s*)(_privates_))->
task)); return (co_state_t){ 544, 0 }; case 544: _self_->callee
= 0; } while (0)
;
545 if (!co_is_done(CO_V(task)(((struct _private_s*)(_privates_))->task)))
546 {
547 CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks)[CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size)++] = CO_V(task)(((struct _private_s*)(_privates_))->task);
548 int j;
549 for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++)
550 CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size
<= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams
: (*(((struct _private_s*)(_privates_))->schd))._heap_streams
)
[j]] = CO_V(task)(((struct _private_s*)(_privates_))->task);
551 _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info)
, CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(task)(((struct _private_s*)(_privates_))->task));
552 } else
553 co_free(CO_V(task)(((struct _private_s*)(_privates_))->task));
554 }
555 }
556 if (CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size))
557 co_apply(_ccv_nnc_graph_wait_any_sub_tasks, (CO_P(graph), CO_V(sub_tasks), CO_V(sub_task_size), CO_P(schd_info), CO_V(pending_nodes)[0], CO_V(pending_node_size)[0]))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_wait_any_sub_tasks_stack_size
())); do { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
_co_params._co_params.graph), (((struct _private_s*)(_privates_
))->sub_tasks), (((struct _private_s*)(_privates_))->sub_task_size
), (((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info), (((struct _private_s*)(_privates_))->pending_nodes
)[0], (((struct _private_s*)(_privates_))->pending_node_size
)[0] } }; task->fn = _ccv_nnc_graph_wait_any_sub_tasks; task
->line = 0; task->done = 0; task->root = 0; task->
other_size = 0; task->notify_any = 0; task->others = 0;
task->caller = 0; task->callee = 0; if (sizeof(params)
> 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; }); _co_apply(_self_, _self_->callee); return (
co_state_t){ 557, 0 }; case 557: co_free(_self_->callee); _self_
->callee = 0; } while (0)
;
558 CO_V(p)(((struct _private_s*)(_privates_))->p) = 0;
559 CO_V(q)(((struct _private_s*)(_privates_))->q) = 1;
560 while (CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(p)(((struct _private_s*)(_privates_))->p)] > 0)
561 {
562 CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(q)(((struct _private_s*)(_privates_))->q)] = 0;
563 CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size) = 0;
564 for (CO_V(i)(((struct _private_s*)(_privates_))->i) = 0; CO_V(i)(((struct _private_s*)(_privates_))->i) < CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(p)(((struct _private_s*)(_privates_))->p)]; CO_V(i)(((struct _private_s*)(_privates_))->i)++)
565 {
566 CO_V(idx)(((struct _private_s*)(_privates_))->idx) = CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[CO_V(p)(((struct _private_s*)(_privates_))->p)][CO_V(i)(((struct _private_s*)(_privates_))->i)];
567 CO_V(node)(((struct _private_s*)(_privates_))->node) = CO_P(exec_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_info)
+ CO_V(idx)(((struct _private_s*)(_privates_))->idx);
568 CO_V(schd)(((struct _private_s*)(_privates_))->schd) = CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info)
+ CO_V(idx)(((struct _private_s*)(_privates_))->idx);
569 int blocked = 0, j;
570 for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++)
571 if (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size
<= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams
: (*(((struct _private_s*)(_privates_))->schd))._heap_streams
)
[j]])
572 {
573 _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info)
, CO_V(node)(((struct _private_s*)(_privates_))->node), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size
<= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams
: (*(((struct _private_s*)(_privates_))->schd))._heap_streams
)
[j]]);
574 CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[CO_V(q)(((struct _private_s*)(_privates_))->q)][CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(q)(((struct _private_s*)(_privates_))->q)]++] = CO_V(idx)(((struct _private_s*)(_privates_))->idx);
575 blocked = 1;
576 }
577 if (blocked)
578 continue;
579 CO_V(task)(((struct _private_s*)(_privates_))->task) = _ccv_nnc_graph_exec_run_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(schd)(((struct _private_s*)(_privates_))->schd), CO_V(idx)(((struct _private_s*)(_privates_))->idx), CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
, CO_P(flags)(((struct _private_s*)(_privates_))->_co_params._co_params
.flags)
);
580 if (CO_V(task)(((struct _private_s*)(_privates_))->task))
581 {
582 co_resume(CO_V(task))do { _co_resume(_self_, (((struct _private_s*)(_privates_))->
task)); return (co_state_t){ 582, 0 }; case 582: _self_->callee
= 0; } while (0)
;
583 if (!co_is_done(CO_V(task)(((struct _private_s*)(_privates_))->task)))
584 {
585 CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks)[CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size)++] = CO_V(task)(((struct _private_s*)(_privates_))->task);
586 for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++)
587 CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size
<= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams
: (*(((struct _private_s*)(_privates_))->schd))._heap_streams
)
[j]] = CO_V(task)(((struct _private_s*)(_privates_))->task);
588 _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info)
, CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(task)(((struct _private_s*)(_privates_))->task));
589 } else
590 co_free(CO_V(task)(((struct _private_s*)(_privates_))->task));
591 }
592 }
593 int t;
594 CCV_SWAP(CO_V(p), CO_V(q), t)((t) = ((((struct _private_s*)(_privates_))->p)), ((((struct
_private_s*)(_privates_))->p)) = ((((struct _private_s*)(
_privates_))->q)), ((((struct _private_s*)(_privates_))->
q)) = (t))
;
595 if (CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size))
596 co_apply(_ccv_nnc_graph_wait_any_sub_tasks, (CO_P(graph), CO_V(sub_tasks), CO_V(sub_task_size), CO_P(schd_info), CO_V(pending_nodes)[CO_V(p)], CO_V(pending_node_size)[CO_V(p)]))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_wait_any_sub_tasks_stack_size
())); do { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
_co_params._co_params.graph), (((struct _private_s*)(_privates_
))->sub_tasks), (((struct _private_s*)(_privates_))->sub_task_size
), (((struct _private_s*)(_privates_))->_co_params._co_params
.schd_info), (((struct _private_s*)(_privates_))->pending_nodes
)[(((struct _private_s*)(_privates_))->p)], (((struct _private_s
*)(_privates_))->pending_node_size)[(((struct _private_s*)
(_privates_))->p)] } }; task->fn = _ccv_nnc_graph_wait_any_sub_tasks
; task->line = 0; task->done = 0; task->root = 0; task
->other_size = 0; task->notify_any = 0; task->others
= 0; task->caller = 0; task->callee = 0; if (sizeof(params
) > 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; }); _co_apply(_self_, _self_->callee); return (
co_state_t){ 596, 0 }; case 596: co_free(_self_->callee); _self_
->callee = 0; } while (0)
;
597 }
598} co_end()default: return (co_state_t){ 598, 1 }; } }
599
600co_task(_ccv_nnc_graph_topsorted_run_coro, (ccv_nnc_graph_t* const graph, const int exec_idx, const ccv_nnc_graph_static_schedule_t* const schedule, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, const int flags), private(struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
601 ccv_nnc_graph_exec_info_t* exec_info;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
602 const ccv_nnc_graph_exec_schedule_t* schd_info;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
603 co_routine_t* previous_main;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
604 int stream_0;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
605 // while loopstruct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
606 int64_t count, reverse_count;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
607 int graph_breakpoint_size;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
608 int i, j;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
609))struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s
_co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) {
return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s
); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t
* const _self_, void* const _privates_) { struct _private_s {
struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params;
ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t
* schd_info; co_routine_t* previous_main; int stream_0; int64_t
count, reverse_count; int graph_breakpoint_size; int i, j; }
; switch (_self_->line) { case 0:
{
610 assert(CO_P(graph)->stream_size > 0)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.graph)->stream_size > 0) ? 1 : 0), __extension__
({ if ((((struct _private_s*)(_privates_))->_co_params._co_params
.graph)->stream_size > 0) ; else __assert_fail ("CO_P(graph)->stream_size > 0"
, "ccv_nnc_graph_run.c", 610, __extension__ __PRETTY_FUNCTION__
); }))
;
611 int i;
612 // Assign the resource container pointer.
613 for (i = 0; i < CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->stream_size; i++)
614 CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[i]->resource_container = CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
->_inline_container;
615 CO_V(exec_info)(((struct _private_s*)(_privates_))->exec_info) = (ccv_nnc_graph_exec_info_t*)ccv_array_get(CO_P(graph)->exec_info, 0)((void*)(((char*)(((((struct _private_s*)(_privates_))->_co_params
._co_params.graph)->exec_info)->data)) + (size_t)((((struct
_private_s*)(_privates_))->_co_params._co_params.graph)->
exec_info)->rsize * (size_t)(0)))
;
616 CO_V(schd_info)(((struct _private_s*)(_privates_))->schd_info) = CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->exec_info;
617 CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0) = CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->stream_0;
618 if (CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_idx)
== -1)
619 {
620 if (CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
->main)
621 {
622 CO_V(previous_main)(((struct _private_s*)(_privates_))->previous_main) = CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
->main;
623 CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
->main = co_self()(_self_);
624 // Wait the previous task to be done. This makes sure that our graph run is serial on the same stream.
625 assert(!co_is_done(CO_V(previous_main)))((void) sizeof ((!co_is_done((((struct _private_s*)(_privates_
))->previous_main))) ? 1 : 0), __extension__ ({ if (!co_is_done
((((struct _private_s*)(_privates_))->previous_main))) ; else
__assert_fail ("!co_is_done(CO_V(previous_main))", "ccv_nnc_graph_run.c"
, 625, __extension__ __PRETTY_FUNCTION__); }))
;
626 co_await(CO_V(previous_main))do { if (!_co_await_any(_self_, &((((struct _private_s*)(
_privates_))->previous_main)), 1)) { return (co_state_t){ 626
, 0 }; } case 626: ; } while (0)
;
627 } else
628 CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
->main = co_self()(_self_);
629 PRINT(CCV_CLI_INFO, "Graph Stream %d Begin", CO_V(stream_0))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("Graph Stream %d Begin", (((struct _private_s*)(_privates_))
->stream_0)); fflush(stdout); } } while (0)
;
630 ccv_nnc_stream_signal_t* stream_0_signal;
631 if (CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
!= CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)])
632 {
633 // Make sure when we start work on streams[0], the current stream context is done.
634 stream_0_signal = ccv_nnc_stream_context_emit_signal_new(CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
);
635 ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], stream_0_signal);
636 } else if (CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->stream_1_size) {
637 ccv_nnc_stream_context_emit_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->begin);
638 stream_0_signal = CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->begin;
639 }
640 int i, flag = 0;
641 for (i = 0; i < CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->stream_1_size; i++)
642 {
643 ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->stream_1s[i]], stream_0_signal);
644 if (!flag)
645 {
646 PRINT(CCV_CLI_INFO, ", Wait: %d", CO_P(schedule)->stream_1s[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
(", Wait: %d", (((struct _private_s*)(_privates_))->_co_params
._co_params.schedule)->stream_1s[i]); fflush(stdout); } } while
(0)
;
647 flag = 1;
648 } else
649 PRINT(CCV_CLI_INFO, ", %d", CO_P(schedule)->stream_1s[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
(", %d", (((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)->stream_1s[i]); fflush(stdout); } } while (0)
;
650 }
651 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
652 } else {
653 assert(CO_P(stream_context) == CO_P(graph)->streams[0])((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.stream_context) == (((struct _private_s*)(_privates_
))->_co_params._co_params.graph)->streams[0]) ? 1 : 0),
__extension__ ({ if ((((struct _private_s*)(_privates_))->
_co_params._co_params.stream_context) == (((struct _private_s
*)(_privates_))->_co_params._co_params.graph)->streams[
0]) ; else __assert_fail ("CO_P(stream_context) == CO_P(graph)->streams[0]"
, "ccv_nnc_graph_run.c", 653, __extension__ __PRETTY_FUNCTION__
); }))
;
654 }
655 if (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
&& (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
656 {
657 assert(CO_P(schedule) == CO_P(graph)->default_schedule)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.schedule) == (((struct _private_s*)(_privates_))->
_co_params._co_params.graph)->default_schedule) ? 1 : 0), __extension__
({ if ((((struct _private_s*)(_privates_))->_co_params._co_params
.schedule) == (((struct _private_s*)(_privates_))->_co_params
._co_params.graph)->default_schedule) ; else __assert_fail
("CO_P(schedule) == CO_P(graph)->default_schedule", "ccv_nnc_graph_run.c"
, 657, __extension__ __PRETTY_FUNCTION__); }))
;
658 assert(CO_P(exec)->p_while.expr)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.exec)->p_while.expr) ? 1 : 0), __extension__ (
{ if ((((struct _private_s*)(_privates_))->_co_params._co_params
.exec)->p_while.expr) ; else __assert_fail ("CO_P(exec)->p_while.expr"
, "ccv_nnc_graph_run.c", 658, __extension__ __PRETTY_FUNCTION__
); }))
;
659 CO_V(count)(((struct _private_s*)(_privates_))->count) = 0;
660 // This is a forward while loop. Backward while loop will just consult its pairing part.
661 if (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->cmd.cmd == CCV_NNC_GRAPH_FORWARD)
662 {
663 CO_V(graph_breakpoint_size)(((struct _private_s*)(_privates_))->graph_breakpoint_size
)
= CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->breakpoint_offset + CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->breakpoint_size;
664 for (;; ++CO_V(count)(((struct _private_s*)(_privates_))->count))
665 {
666 CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->while_count = CO_V(count)(((struct _private_s*)(_privates_))->count);
667 if (CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
)
668 ccv_nnc_tensor_tape_set_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
, CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->p, (ccv_nnc_graph_exec_t){
669 .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_idx)
,
670 .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->p,
671 }, CO_V(count)(((struct _private_s*)(_privates_))->count));
672 _ccv_nnc_graph_unwrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_V(count)(((struct _private_s*)(_privates_))->count), 0);
673 if (CO_V(count)(((struct _private_s*)(_privates_))->count) > 0)
674 _ccv_nnc_graph_transit_move_to(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
675 co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, 0, CO_V(graph_breakpoint_size), CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size
())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
_co_params._co_params.graph), (((struct _private_s*)(_privates_
))->exec_info), (((struct _private_s*)(_privates_))->schd_info
), 0, 0, (((struct _private_s*)(_privates_))->graph_breakpoint_size
), (((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape), (((struct _private_s*)(_privates_))->_co_params
._co_params.flags) } }; task->fn = _ccv_nnc_graph_exec_run_loop
; task->line = 0; task->done = 0; task->root = 0; task
->other_size = 0; task->notify_any = 0; task->others
= 0; task->caller = 0; task->callee = 0; if (sizeof(params
) > 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; }); _co_apply(_self_, _self_->callee); return (
co_state_t){ 675, 0 }; case 675: co_free(_self_->callee); _self_
->callee = 0; } while (0)
;
676 // Reached breakpoints, now check the breakpoint, if not met, break out.
677 // Wait until everything on the stream is executed.
678 for (CO_V(i)(((struct _private_s*)(_privates_))->i) = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->breakpoint_offset; CO_V(i)(((struct _private_s*)(_privates_))->i) < CO_V(graph_breakpoint_size)(((struct _private_s*)(_privates_))->graph_breakpoint_size
)
; CO_V(i)(((struct _private_s*)(_privates_))->i)++)
679 for (CO_V(j)(((struct _private_s*)(_privates_))->j) = 0; CO_V(j)(((struct _private_s*)(_privates_))->j) < CO_V(schd_info)(((struct _private_s*)(_privates_))->schd_info)[CO_V(i)(((struct _private_s*)(_privates_))->i)].stream_size; CO_V(j)(((struct _private_s*)(_privates_))->j)++)
680 co_stream_await(CO_P(graph)->streams[SCHEDULE_STREAMS(CO_V(schd_info)[CO_V(i)])[CO_V(j)]])do { if (!_co_stream_await(_self_, (((struct _private_s*)(_privates_
))->_co_params._co_params.graph)->streams[(((((struct _private_s
*)(_privates_))->schd_info)[(((struct _private_s*)(_privates_
))->i)]).stream_size <= 1 ? ((((struct _private_s*)(_privates_
))->schd_info)[(((struct _private_s*)(_privates_))->i)]
)._inline_streams : ((((struct _private_s*)(_privates_))->
schd_info)[(((struct _private_s*)(_privates_))->i)])._heap_streams
)[(((struct _private_s*)(_privates_))->j)]])) { return (co_state_t
){ 680, 0 }; } case 680: ; } while (0)
;
681 _ccv_nnc_graph_exec_unwrap_while_expr(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
);
682 if (!CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->p_while.expr(CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->p_while.inputs, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->p_while.input_size, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec)
->p_while.data))
683 {
684 _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
685 // If we break from here, it is ok because all the streams are waited.
686 break;
687 }
688 co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, CO_V(graph_breakpoint_size), CO_P(graph)->exec_info->rnum, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size
())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
_co_params._co_params.graph), (((struct _private_s*)(_privates_
))->exec_info), (((struct _private_s*)(_privates_))->schd_info
), 0, (((struct _private_s*)(_privates_))->graph_breakpoint_size
), (((struct _private_s*)(_privates_))->_co_params._co_params
.graph)->exec_info->rnum, (((struct _private_s*)(_privates_
))->_co_params._co_params.tensor_tape), (((struct _private_s
*)(_privates_))->_co_params._co_params.flags) } }; task->
fn = _ccv_nnc_graph_exec_run_loop; task->line = 0; task->
done = 0; task->root = 0; task->other_size = 0; task->
notify_any = 0; task->others = 0; task->caller = 0; task
->callee = 0; if (sizeof(params) > 0) memcpy(task + 1, &
params, sizeof(params)); } while (0); task; }); _co_apply(_self_
, _self_->callee); return (co_state_t){ 688, 0 }; case 688
: co_free(_self_->callee); _self_->callee = 0; } while (
0)
;
689 _ccv_nnc_graph_from_move_transit(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
690 _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
691 }
692 } else {
693 // For backward graph, no need to evaluate the while expr.
694 assert(CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ? 1 :
0), __extension__ ({ if ((((struct _private_s*)(_privates_))
->_co_params._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD
) ; else __assert_fail ("CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD"
, "ccv_nnc_graph_run.c", 694, __extension__ __PRETTY_FUNCTION__
); }))
;
695 assert(CO_P(graph)->pair)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.graph)->pair) ? 1 : 0), __extension__ ({ if ((
((struct _private_s*)(_privates_))->_co_params._co_params.
graph)->pair) ; else __assert_fail ("CO_P(graph)->pair"
, "ccv_nnc_graph_run.c", 695, __extension__ __PRETTY_FUNCTION__
); }))
;
696 assert(CO_P(tensor_tape))((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.tensor_tape)) ? 1 : 0), __extension__ ({ if ((((struct
_private_s*)(_privates_))->_co_params._co_params.tensor_tape
)) ; else __assert_fail ("CO_P(tensor_tape)", "ccv_nnc_graph_run.c"
, 696, __extension__ __PRETTY_FUNCTION__); }))
;
697 CO_V(count)(((struct _private_s*)(_privates_))->count) = 0;
698 CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count) = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->while_count = ccv_nnc_tensor_tape_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params
.tensor_tape)
, CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->p, (ccv_nnc_graph_exec_t){
699 .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_idx)
,
700 .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->p,
701 });
702 _ccv_nnc_graph_unwrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_V(count)(((struct _private_s*)(_privates_))->count), CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count));
703 co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, CO_P(graph)->breakpoint_offset, CO_P(graph)->exec_info->rnum, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size
())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
_co_params._co_params.graph), (((struct _private_s*)(_privates_
))->exec_info), (((struct _private_s*)(_privates_))->schd_info
), 0, (((struct _private_s*)(_privates_))->_co_params._co_params
.graph)->breakpoint_offset, (((struct _private_s*)(_privates_
))->_co_params._co_params.graph)->exec_info->rnum, (
((struct _private_s*)(_privates_))->_co_params._co_params.
tensor_tape), (((struct _private_s*)(_privates_))->_co_params
._co_params.flags) } }; task->fn = _ccv_nnc_graph_exec_run_loop
; task->line = 0; task->done = 0; task->root = 0; task
->other_size = 0; task->notify_any = 0; task->others
= 0; task->caller = 0; task->callee = 0; if (sizeof(params
) > 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; }); _co_apply(_self_, _self_->callee); return (
co_state_t){ 703, 0 }; case 703: co_free(_self_->callee); _self_
->callee = 0; } while (0)
;
704 _ccv_nnc_graph_from_move_transit(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
705 _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
706 for (CO_V(count)(((struct _private_s*)(_privates_))->count) = 1; CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count) > 0; ++CO_V(count)(((struct _private_s*)(_privates_))->count))
707 {
708 CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->while_count = --CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count);
709 _ccv_nnc_graph_unwrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
, CO_V(count)(((struct _private_s*)(_privates_))->count), CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count));
710 _ccv_nnc_graph_transit_move_to(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
711 co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, 0, CO_P(graph)->exec_info->rnum, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size
())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
_co_params._co_params.graph), (((struct _private_s*)(_privates_
))->exec_info), (((struct _private_s*)(_privates_))->schd_info
), 0, 0, (((struct _private_s*)(_privates_))->_co_params._co_params
.graph)->exec_info->rnum, (((struct _private_s*)(_privates_
))->_co_params._co_params.tensor_tape), (((struct _private_s
*)(_privates_))->_co_params._co_params.flags) } }; task->
fn = _ccv_nnc_graph_exec_run_loop; task->line = 0; task->
done = 0; task->root = 0; task->other_size = 0; task->
notify_any = 0; task->others = 0; task->caller = 0; task
->callee = 0; if (sizeof(params) > 0) memcpy(task + 1, &
params, sizeof(params)); } while (0); task; }); _co_apply(_self_
, _self_->callee); return (co_state_t){ 711, 0 }; case 711
: co_free(_self_->callee); _self_->callee = 0; } while (
0)
;
712 _ccv_nnc_graph_from_move_transit(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
713 _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
);
714 }
715 }
716 assert(CO_V(stream_0) == 0)((void) sizeof (((((struct _private_s*)(_privates_))->stream_0
) == 0) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)
(_privates_))->stream_0) == 0) ; else __assert_fail ("CO_V(stream_0) == 0"
, "ccv_nnc_graph_run.c", 716, __extension__ __PRETTY_FUNCTION__
); }))
;
717 int i;
718 for (i = 0; i < CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->wait_size; i++)
719 ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[0], CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->signals[CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->waits[i]]);
720 } else {
721 CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->while_count = 0;
722 co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), CO_P(schedule)->psort, 0, CO_P(schedule)->psort ? CO_P(schedule)->psort_size : CO_P(schedule)->exec_info_size, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc
((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size
())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params
= { ._co_params = { (((struct _private_s*)(_privates_))->
_co_params._co_params.graph), (((struct _private_s*)(_privates_
))->exec_info), (((struct _private_s*)(_privates_))->schd_info
), (((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)->psort, 0, (((struct _private_s*)(_privates_))->
_co_params._co_params.schedule)->psort ? (((struct _private_s
*)(_privates_))->_co_params._co_params.schedule)->psort_size
: (((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)->exec_info_size, (((struct _private_s*)(_privates_
))->_co_params._co_params.tensor_tape), (((struct _private_s
*)(_privates_))->_co_params._co_params.flags) } }; task->
fn = _ccv_nnc_graph_exec_run_loop; task->line = 0; task->
done = 0; task->root = 0; task->other_size = 0; task->
notify_any = 0; task->others = 0; task->caller = 0; task
->callee = 0; if (sizeof(params) > 0) memcpy(task + 1, &
params, sizeof(params)); } while (0); task; }); _co_apply(_self_
, _self_->callee); return (co_state_t){ 722, 0 }; case 722
: co_free(_self_->callee); _self_->callee = 0; } while (
0)
;
723 PRINT(CCV_CLI_INFO, "Graph Stream %d End", CO_V(stream_0))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("Graph Stream %d End", (((struct _private_s*)(_privates_))->
stream_0)); fflush(stdout); } } while (0)
;
724 int i, flag = 0;
725 for (i = 0; i < CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->wait_size; i++)
726 {
727 ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->signals[CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->waits[i]]);
728 if (!flag)
729 {
730 PRINT(CCV_CLI_INFO, ", Wait: %d", CO_P(schedule)->waits[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
(", Wait: %d", (((struct _private_s*)(_privates_))->_co_params
._co_params.schedule)->waits[i]); fflush(stdout); } } while
(0)
;
731 flag = 1;
732 } else
733 PRINT(CCV_CLI_INFO, ", %d", CO_P(schedule)->waits[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
(", %d", (((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)->waits[i]); fflush(stdout); } } while (0)
;
734 }
735 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
736 }
737 if (CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
!= CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)])
738 {
739 assert(CO_P(exec_idx) == -1)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params
._co_params.exec_idx) == -1) ? 1 : 0), __extension__ ({ if ((
((struct _private_s*)(_privates_))->_co_params._co_params.
exec_idx) == -1) ; else __assert_fail ("CO_P(exec_idx) == -1"
, "ccv_nnc_graph_run.c", 739, __extension__ __PRETTY_FUNCTION__
); }))
;
740 ccv_nnc_stream_context_emit_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params
.graph)
->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->end);
741 ccv_nnc_stream_context_wait_signal(CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
, CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params
.schedule)
->end);
742 }
743 // Reset main to 0 if it is current me.
744 if (CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params
.exec_idx)
== -1 && CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
->main == co_self()(_self_))
745 CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params
.stream_context)
->main = 0;
746} co_end()default: return (co_state_t){ 746, 1 }; } }
747
748static int _ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
749
750static inline void _ccv_nnc_graph_exec_run(ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node, const int idx, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, const int flags)
751{
752 int i;
753 _ccv_nnc_graph_exec_unwrap_io(graph, node);
20
Calling '_ccv_nnc_graph_exec_unwrap_io'
24
Returning from '_ccv_nnc_graph_exec_unwrap_io'
754 ccv_nnc_tensor_t** inputs = node->inputs;
25
'inputs' initialized here
755 ccv_nnc_tensor_t** outputs = inputs ? inputs + node->input_size : 0;
26
Assuming 'inputs' is null
27
'?' condition is false
756 if (tensor_tape)
28
Assuming 'tensor_tape' is null
29
Taking false branch
757 ccv_nnc_tensor_tape_io(tensor_tape, graph, node->input_flags, inputs, node->input_size, node->output_flags, outputs, node->output_size);
758 /* Broadcast the updates to all subscribed references for input / output, even though at th
759 * time output is not written yet, propagate pointer change is still valid. */
760 _ccv_nnc_graph_exec_begin_synchronize_multiviews(graph, node);
761 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)
30
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_FORWARD
31
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_BACKWARD
32
Taking false branch
762 {
763 assert(!stream_context)((void) sizeof ((!stream_context) ? 1 : 0), __extension__ ({ if
(!stream_context) ; else __assert_fail ("!stream_context", "ccv_nnc_graph_run.c"
, 763, __extension__ __PRETTY_FUNCTION__); }))
; // This doesn't work properly with stream context.
764 if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
765 {
766 int ref;
767 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD)
768 {
769 ref = node->case_of.offset + node->case_of.expr(inputs, node->input_size, node->case_of.data);
770 if (tensor_tape)
771 ccv_nnc_tensor_tape_set_numbering(tensor_tape, graph, (ccv_nnc_graph_exec_t){
772 .d = idx,
773 .graph = graph,
774 }, ref);
775 } else {
776 assert(node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof ((node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ?
1 : 0), __extension__ ({ if (node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD
) ; else __assert_fail ("node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD"
, "ccv_nnc_graph_run.c", 776, __extension__ __PRETTY_FUNCTION__
); }))
;
777 assert(tensor_tape)((void) sizeof ((tensor_tape) ? 1 : 0), __extension__ ({ if (
tensor_tape) ; else __assert_fail ("tensor_tape", "ccv_nnc_graph_run.c"
, 777, __extension__ __PRETTY_FUNCTION__); }))
;
778 ref = ccv_nnc_tensor_tape_numbering(tensor_tape, graph, (ccv_nnc_graph_exec_t){
779 .d = idx,
780 .graph = graph,
781 });
782 }
783 if (ref >= 0)
784 {
785 assert(ref < node->graph_ref_size)((void) sizeof ((ref < node->graph_ref_size) ? 1 : 0), __extension__
({ if (ref < node->graph_ref_size) ; else __assert_fail
("ref < node->graph_ref_size", "ccv_nnc_graph_run.c", 785
, __extension__ __PRETTY_FUNCTION__); }))
;
786 ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[ref] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref
? (node)->_heap_graph_ref : (node)->_inline_graph_ref)
[ref] - 1)))
;
787 _ccv_nnc_graph_run(sub_graph, idx, node, inputs, node->input_size, outputs, node->output_size, flags, 0, 0, 0, 0, tensor_tape, stream_context);
788 }
789 _ccv_nnc_graph_exec_unwrap_phi(graph, node, ref);
790 } else if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) {
791 ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[0] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref
? (node)->_heap_graph_ref : (node)->_inline_graph_ref)
[0] - 1)))
;
792 _ccv_nnc_graph_run(sub_graph, idx, node, inputs, node->input_size, outputs, node->output_size, flags, 0, 0, 0, 0, tensor_tape, stream_context);
793 }
794 } else {
795 PRINT(CCV_CLI_INFO, "%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.
cmd), idx, node->input_size, node->output_size); fflush
(stdout); } } while (0)
;
33
Assuming the condition is false
34
Taking false branch
35
Loop condition is false. Exiting loop
796 for (i = 0; i < node->input_size; i++)
36
Assuming 'i' is < field 'input_size'
37
Loop condition is true. Entering loop body
797 {
798 PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs[i]->data.u8 : 0), (inputs[i] ? CCV_TENSOR_GET_DEVICE_ID(inputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs
[i]->data.u8 : 0), (inputs[i] ? (((inputs[i]->info.type
) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while
(0)
;
38
Assuming the condition is false
39
Taking false branch
799 if (inputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels()))
40
Loop condition is false. Exiting loop
41
Array access (from variable 'inputs') results in a null pointer dereference
800 ccv_nnc_print_tensor_info(inputs[i]);
801 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
802 }
803 ccv_nnc_cmd_exec(node->cmd, node->hint, flags, inputs, node->input_size, outputs, node->output_size, stream_context);
804 for (i = 0; i < node->output_size; i++)
805 {
806 PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs[i]->data.u8 : 0), (outputs[i] ? CCV_TENSOR_GET_DEVICE_ID(outputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs
[i]->data.u8 : 0), (outputs[i] ? (((outputs[i]->info.type
) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while
(0)
;
807 if (outputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels()))
808 ccv_nnc_print_tensor_info(outputs[i]);
809 PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf
("\n"); fflush(stdout); } } while (0)
;
810 }
811 }
812}
813
814static inline void _ccv_nnc_graph_topsorted_run(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, const int flags, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
815{
816 int i;
817 if (exec
16.1
'exec' is null
&& (exec->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
818 {
819 assert(!stream_context)((void) sizeof ((!stream_context) ? 1 : 0), __extension__ ({ if
(!stream_context) ; else __assert_fail ("!stream_context", "ccv_nnc_graph_run.c"
, 819, __extension__ __PRETTY_FUNCTION__); }))
; // This doesn't work properly with stream context.
820 assert(exec->p_while.expr)((void) sizeof ((exec->p_while.expr) ? 1 : 0), __extension__
({ if (exec->p_while.expr) ; else __assert_fail ("exec->p_while.expr"
, "ccv_nnc_graph_run.c", 820, __extension__ __PRETTY_FUNCTION__
); }))
;
821 int64_t count = 0;
822 // This is a forward while loop. Backward while loop will just consult its pairing part.
823 if (exec->cmd.cmd == CCV_NNC_GRAPH_FORWARD)
824 {
825 const int graph_breakpoint_size = graph->breakpoint_offset + graph->breakpoint_size;
826 for (;; ++count)
827 {
828 graph->while_count = count;
829 if (tensor_tape)
830 ccv_nnc_tensor_tape_set_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){
831 .d = exec_idx,
832 .graph = graph->p,
833 }, count);
834 _ccv_nnc_graph_unwrap(graph, count, 0);
835 if (count > 0)
836 _ccv_nnc_graph_transit_move_to(graph);
837 for (i = 0; i < graph_breakpoint_size; i++)
838 _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
, i, tensor_tape, stream_context, flags);
839 _ccv_nnc_graph_exec_unwrap_while_expr(graph, exec);
840 // Reached breakpoints, now check the breakpoint, if not met, break out.
841 if (!exec->p_while.expr(exec->p_while.inputs, exec->p_while.input_size, exec->p_while.data))
842 {
843 _ccv_nnc_graph_rewrap(graph);
844 break;
845 }
846 for (i = graph_breakpoint_size; i < graph->exec_info->rnum; i++)
847 _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
, i, tensor_tape, stream_context, flags);
848 _ccv_nnc_graph_from_move_transit(graph);
849 _ccv_nnc_graph_rewrap(graph);
850 }
851 } else {
852 // For backward graph, no need to evaluate the while expr.
853 assert(exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof ((exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ?
1 : 0), __extension__ ({ if (exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD
) ; else __assert_fail ("exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD"
, "ccv_nnc_graph_run.c", 853, __extension__ __PRETTY_FUNCTION__
); }))
;
854 assert(graph->pair)((void) sizeof ((graph->pair) ? 1 : 0), __extension__ ({ if
(graph->pair) ; else __assert_fail ("graph->pair", "ccv_nnc_graph_run.c"
, 854, __extension__ __PRETTY_FUNCTION__); }))
;
855 assert(tensor_tape)((void) sizeof ((tensor_tape) ? 1 : 0), __extension__ ({ if (
tensor_tape) ; else __assert_fail ("tensor_tape", "ccv_nnc_graph_run.c"
, 855, __extension__ __PRETTY_FUNCTION__); }))
;
856 count = 0;
857 int64_t reverse_count = graph->while_count = ccv_nnc_tensor_tape_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){
858 .d = exec_idx,
859 .graph = graph->p,
860 });
861 _ccv_nnc_graph_unwrap(graph, count, reverse_count);
862 for (i = graph->breakpoint_offset; i < graph->exec_info->rnum; i++)
863 _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
, i, tensor_tape, stream_context, flags);
864 _ccv_nnc_graph_from_move_transit(graph);
865 _ccv_nnc_graph_rewrap(graph);
866 for (count = 1; reverse_count > 0; ++count)
867 {
868 graph->while_count = --reverse_count;
869 _ccv_nnc_graph_unwrap(graph, count, reverse_count);
870 _ccv_nnc_graph_transit_move_to(graph);
871 for (i = 0; i < graph->exec_info->rnum; i++)
872 _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
, i, tensor_tape, stream_context, flags);
873 _ccv_nnc_graph_from_move_transit(graph);
874 _ccv_nnc_graph_rewrap(graph);
875 }
876 }
877 } else {
878 graph->while_count = 0;
879 for (i = 0; i < graph->exec_info->rnum; i++)
17
Assuming 'i' is < field 'rnum'
18
Loop condition is true. Entering loop body
880 _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(i)))
, i, tensor_tape, stream_context, flags)
;
19
Calling '_ccv_nnc_graph_exec_run'
881 }
882}
883
884static inline void _ccv_nnc_graph_run_slow_path(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
885{
886 int i, j;
887 const ccv_nnc_graph_exec_t* const graph_sources = sources ? sources : (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
;
888 const int graph_source_size = source_size ? source_size : graph->sources->rnum;
889 const ccv_nnc_graph_exec_t* const graph_destinations = destinations ? destinations : (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
;
890 const int graph_destination_size = destination_size ? destination_size : graph->destinations->rnum;
891#define visitor(node, idx, ...) \
892 _ccv_nnc_graph_exec_run(graph, node, idx, tensor_tape, stream_context, flags)
893 if (exec && (exec->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
894 {
895 assert(!stream_context)((void) sizeof ((!stream_context) ? 1 : 0), __extension__ ({ if
(!stream_context) ; else __assert_fail ("!stream_context", "ccv_nnc_graph_run.c"
, 895, __extension__ __PRETTY_FUNCTION__); }))
; // This doesn't work properly with stream context.
896 assert(exec->p_while.expr)((void) sizeof ((exec->p_while.expr) ? 1 : 0), __extension__
({ if (exec->p_while.expr) ; else __assert_fail ("exec->p_while.expr"
, "ccv_nnc_graph_run.c", 896, __extension__ __PRETTY_FUNCTION__
); }))
;
897 int64_t count = 0;
898 // This is a forward while loop. Backward while loop will just consult its pairing part.
899 if (exec->cmd.cmd == CCV_NNC_GRAPH_FORWARD)
900 {
901 ccv_array_t* follows = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), graph->breakpoint_size, 0);
902 for (i = 0; i < graph->breakpoint_size; i++)
903 {
904 const ccv_nnc_graph_exec_info_t* const exec_info = (const ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, graph->breakpoints->d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(graph->breakpoints
->d)))
;
905 if (exec_info->outgoings)
906 for (j = 0; j < exec_info->outgoings->rnum; j++)
907 {
908 const ccv_nnc_graph_exec_t exec = {
909 .d = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t
)(exec_info->outgoings)->rsize * (size_t)(j)))
,
910 .graph = graph,
911 };
912 ccv_array_push(follows, &exec);
913 }
914 }
915 for (;; ++count)
916 {
917 graph->while_count = count;
918 if (tensor_tape)
919 ccv_nnc_tensor_tape_set_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){
920 .d = exec_idx,
921 .graph = graph->p,
922 }, count);
923 _ccv_nnc_graph_unwrap(graph, count, 0);
924 if (count > 0)
925 _ccv_nnc_graph_transit_move_to(graph);
926 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph->breakpoints, graph->breakpoint_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++
) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph
) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 926, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } int _exist_size_
[2] = { (graph_source_size), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size
); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph ==
graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 926, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _exist_size_
[0] = (graph_source_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 926, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph->breakpoint_size); _i_++) { ((void) sizeof
(((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph_run.c"
, 926, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph->breakpoints)[_i_].d].d = 1; } for (_i_ = 0; _i_ <
(graph_source_size); _i_++) { ((void) sizeof (((graph_sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources
)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 926, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0;
_q_ = 1; _exist_size_[0] = (graph_source_size); _exist_size_
[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (
_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++
_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } }
else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph
->breakpoint_size)) { _exists_[_q_][_exist_size_[_q_]] = d
; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) =
(_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 926, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(graph->breakpoints)[_i_].d].r == 4
) continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph->
breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_
[(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph_run.c"
, 926, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(graph->breakpoints)[_i_].d].c > 0) continue; visitor(
(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0)))) + (graph->breakpoints)[_i_].d), ((graph->breakpoints
)[_i_].d), (_incomings_[(graph->breakpoints)[_i_].d].d)); }
if (_heap_mem_) free(_incomings_); } while (0);
;
927 _ccv_nnc_graph_exec_unwrap_while_expr(graph, exec);
928 // Reached breakpoints, now check the breakpoint, if not met, break out.
929 if (!exec->p_while.expr(exec->p_while.inputs, exec->p_while.input_size, exec->p_while.data))
930 {
931 _ccv_nnc_graph_rewrap(graph);
932 break;
933 }
934 if (follows->rnum > 0)
935 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(follows, 0), follows->rnum, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (follows->rnum); _i_++)
{ ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)
((follows)->data)) + (size_t)(follows)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data))
+ (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph ==
graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 935, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)((
(char*)((follows)->data)) + (size_t)(follows)->rsize * (
size_t)(0))))[_i_].d; } int _exist_size_[2] = { (follows->
rnum), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] >
0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 1) continue; _incomings_[_idx_].r = 1
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]
] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_)
, (_q_) = (_i_)); } for (_i_ = 0; _i_ < (follows->rnum)
; _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(
((char*)((follows)->data)) + (size_t)(follows)->rsize *
(size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ (
{ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->
data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 935, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)((
(char*)((follows)->data)) + (size_t)(follows)->rsize * (
size_t)(0))))[_i_].d; } _exist_size_[0] = (follows->rnum);
_exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 2) continue
; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size
); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_
].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 935, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_
[0] = (graph_destination_size); _exist_size_[1] = 0; _p_ = 0,
_q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph_run.c"
, 935, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
follows->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t
*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->
rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->
data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 935, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)((
(char*)((follows)->data)) + (size_t)(follows)->rsize * (
size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (
follows->rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_
]; visitor((((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0)))) + _idx_), (_idx_), (_incomings_[_idx_]
.d)); if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r
= 4; } if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 935, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(graph_destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph_run.c", 935, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(graph_destinations)[_i_].d].c
> 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0)))) + (graph_destinations
)[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
936 _ccv_nnc_graph_from_move_transit(graph);
937 _ccv_nnc_graph_rewrap(graph);
938 }
939 ccv_array_free(follows);
940 } else {
941 // For backward graph, no need to evaluate the while expr.
942 assert(exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof ((exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ?
1 : 0), __extension__ ({ if (exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD
) ; else __assert_fail ("exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD"
, "ccv_nnc_graph_run.c", 942, __extension__ __PRETTY_FUNCTION__
); }))
;
943 assert(graph->pair)((void) sizeof ((graph->pair) ? 1 : 0), __extension__ ({ if
(graph->pair) ; else __assert_fail ("graph->pair", "ccv_nnc_graph_run.c"
, 943, __extension__ __PRETTY_FUNCTION__); }))
;
944 assert(tensor_tape)((void) sizeof ((tensor_tape) ? 1 : 0), __extension__ ({ if (
tensor_tape) ; else __assert_fail ("tensor_tape", "ccv_nnc_graph_run.c"
, 944, __extension__ __PRETTY_FUNCTION__); }))
;
945 count = 0;
946 int64_t reverse_count = graph->while_count = ccv_nnc_tensor_tape_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){
947 .d = exec_idx,
948 .graph = graph->p,
949 });
950 _ccv_nnc_graph_unwrap(graph, count, reverse_count);
951 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, graph_destinations, graph_destination_size, 1, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 951, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } int
_exist_size_[2] = { (graph->breakpoint_size), 0, }; int _p_
= 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 1) continue; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size
); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints
)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 951, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ =
0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0)
{ _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 2) continue; _incomings_[_idx_].r = 2
; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->
exec_info)->data)) + (size_t)(graph->exec_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info
)->data)) + (size_t)(graph->exec_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)(((
char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data
)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(
(graph->exec_info)->data)) + (size_t)(graph->exec_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t
)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges
= _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; }
_edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_
; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++
_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) =
(_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_
++) { ((void) sizeof (((graph_destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 951, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_
[0] = (graph_destination_size); _exist_size_[1] = 0; _p_ = 0,
_q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph_run.c"
, 951, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
graph->breakpoint_size); _i_++) { ((void) sizeof (((graph->
breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if
((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail
("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph_run.c"
, 951, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_
[0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_
= 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_
= _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (_idx_
), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++_d_
; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 951, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(graph_destinations)[_i_].d].r == 4) continue
; if (!(1)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph_run.c", 951, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(graph_destinations)[_i_].d].c
> 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0)))) + (graph_destinations
)[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
952 _ccv_nnc_graph_from_move_transit(graph);
953 _ccv_nnc_graph_rewrap(graph);
954 for (count = 1; reverse_count > 0; ++count)
955 {
956 graph->while_count = --reverse_count;
957 _ccv_nnc_graph_unwrap(graph, count, reverse_count);
958 _ccv_nnc_graph_transit_move_to(graph);
959 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++
) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph
) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 959, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } int _exist_size_
[2] = { (graph_source_size), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size
); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph ==
graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 959, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _exist_size_
[0] = (graph_source_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size
); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_
].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 959, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_
[0] = (graph_destination_size); _exist_size_[1] = 0; _p_ = 0,
_q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph_run.c"
, 959, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
graph_source_size); _i_++) { ((void) sizeof (((graph_sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources
)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 959, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0;
_q_ = 1; _exist_size_[0] = (graph_source_size); _exist_size_
[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (
_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++
_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 959, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(graph_destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph_run.c", 959, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(graph_destinations)[_i_].d].c
> 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0)))) + (graph_destinations
)[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
960 _ccv_nnc_graph_from_move_transit(graph);
961 _ccv_nnc_graph_rewrap(graph);
962 }
963 }
964 } else {
965 graph->while_count = 0;
966 CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_
++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings
->rnum : 0; const int _heap_mem_ = (graph->exec_info->
rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof
(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) +
sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph
->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t*
)(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_
+ (graph->exec_info->rnum)) + (graph->exec_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info
->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++
) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1
: 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph
) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 966, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } int _exist_size_
[2] = { (graph_source_size), 0, }; int _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_info_t*)
((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size
); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph ==
graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 966, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _exist_size_
[0] = (graph_source_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*
)(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_
[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d
].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].
c] = _idx_; ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_
]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_
), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size
); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_
].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 966, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_destinations)[_i_].d; } _exist_size_
[0] = (graph_destination_size); _exist_size_[1] = 0; _p_ = 0,
_q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
!= 2) continue; _incomings_[_idx_].r = 3; if (_incomings_[_idx_
].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_
++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_
]; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_];
} } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof
(((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail
("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph_run.c"
, 966, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(
graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (
graph_source_size); _i_++) { ((void) sizeof (((graph_sources)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources
)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 966, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0;
_q_ = 1; _exist_size_[0] = (graph_source_size); _exist_size_
[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), (
_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++
_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph
->exec_info)->data)) + (size_t)(graph->exec_info)->
rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const
int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t
*)((void*)(((char*)((graph->exec_info)->data)) + (size_t
)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)
(((char*)((graph->exec_info)->data)) + (size_t)(graph->
exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize
* (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c ==
0 && _incomings_[d].r == 3 && _d_ < (graph_destination_size
)) { _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_
)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++)
{ ((void) sizeof (((graph_destinations)[_i_].graph == graph)
? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph
== graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph"
, "ccv_nnc_graph_run.c", 966, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(graph_destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations
)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[
(graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0"
, "ccv_nnc_graph_run.c", 966, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(graph_destinations)[_i_].d].c
> 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void
*)(((char*)((graph->exec_info)->data)) + (size_t)(graph
->exec_info)->rsize * (size_t)(0)))) + (graph_destinations
)[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations
)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (
0);
;
967 }
968#undef visitor
969}
970
971static int _ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
972{
973 assert((sources == 0 && source_size == 0) || (sources && source_size))((void) sizeof (((sources == 0 && source_size == 0) ||
(sources && source_size)) ? 1 : 0), __extension__ ({
if ((sources == 0 && source_size == 0) || (sources &&
source_size)) ; else __assert_fail ("(sources == 0 && source_size == 0) || (sources && source_size)"
, "ccv_nnc_graph_run.c", 973, __extension__ __PRETTY_FUNCTION__
); }))
;
3
Assuming 'sources' is equal to null
4
Assuming 'source_size' is equal to 0
974 assert((destinations == 0 && destination_size == 0) || (destinations && destination_size))((void) sizeof (((destinations == 0 && destination_size
== 0) || (destinations && destination_size)) ? 1 : 0
), __extension__ ({ if ((destinations == 0 && destination_size
== 0) || (destinations && destination_size)) ; else __assert_fail
("(destinations == 0 && destination_size == 0) || (destinations && destination_size)"
, "ccv_nnc_graph_run.c", 974, __extension__ __PRETTY_FUNCTION__
); }))
;
5
Assuming 'destinations' is equal to null
6
Assuming 'destination_size' is equal to 0
975 const ccv_nnc_graph_exec_t* const graph_sources = sources
6.1
'sources' is null
? sources : (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph
->sources)->rsize * (size_t)(0)))
;
7
'?' condition is false
976 const int graph_source_size = source_size
7.1
'source_size' is 0
? source_size : graph->sources->rnum;
8
'?' condition is false
977 const ccv_nnc_graph_exec_t* const graph_destinations = destinations
8.1
'destinations' is null
? destinations : (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t
)(graph->destinations)->rsize * (size_t)(0)))
;
9
'?' condition is false
978 const int graph_destination_size = destination_size
9.1
'destination_size' is 0
? destination_size : graph->destinations->rnum;
10
'?' condition is false
979 int i;
980 for (i = 0; i < graph_source_size; i++)
11
Assuming 'i' is >= 'graph_source_size'
12
Loop condition is false. Execution continues on line 983
981 if (graph_sources[i].graph != graph)
982 return CCV_NNC_EXEC_INVALID;
983 for (i = 0; i < graph_destination_size; i++)
13
Assuming 'i' is >= 'graph_destination_size'
984 if (graph_destinations[i].graph != graph)
985 return CCV_NNC_EXEC_INVALID;
986 // When topsorted is true, there is no memory allocation when run the graph.
987 const int topsorted = (!sources
13.1
'sources' is null
&& !destinations
13.2
'destinations' is null
&& graph->topsorted);
988 if (topsorted)
14
Assuming 'topsorted' is not equal to 0
15
Taking true branch
989 _ccv_nnc_graph_topsorted_run(graph, exec_idx, exec, flags, tensor_tape, stream_context);
16
Calling '_ccv_nnc_graph_topsorted_run'
990 else
991 _ccv_nnc_graph_run_slow_path(graph, exec_idx, exec, inputs, input_size, outputs, output_size, flags, sources, source_size, destinations, destination_size, tensor_tape, stream_context);
992 return CCV_NNC_EXEC_SUCCESS;
993}
994
995int ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
996{
997 if (stream_context && graph->topsorted && graph->stream_size > 0 && graph->default_schedule && source_size == 0 && destination_size == 0)
1
Assuming 'stream_context' is null
998 {
999 co_scheduler_t* const scheduler = ccv_nnc_stream_context_get_scheduler(stream_context);
1000 co_routine_t* const task = co_new(_ccv_nnc_graph_topsorted_run_coro, (graph, -1, graph->default_schedule, 0, tensor_tape, stream_context, flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size
())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params
= { ._co_params = { graph, -1, graph->default_schedule, 0
, tensor_tape, stream_context, flags } }; task->fn = _ccv_nnc_graph_topsorted_run_coro
; task->line = 0; task->done = 0; task->root = 0; task
->other_size = 0; task->notify_any = 0; task->others
= 0; task->caller = 0; task->callee = 0; if (sizeof(params
) > 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; })
;
1001 co_schedule(scheduler, task);
1002 // I don't need to worry about freeing this task, it will free itself at the end.
1003 return CCV_NNC_EXEC_SUCCESS;
1004 } else
1005 return _ccv_nnc_graph_run(graph, -1, 0, 0, 0, 0, 0, flags, sources, source_size, destinations, destination_size, tensor_tape, 0 /* In this case, we don't support stream context yet. */);
2
Calling '_ccv_nnc_graph_run'
1006}
1007
1008int ccv_nnc_graph_run_with_schedule(ccv_nnc_graph_t* const graph, const int flags, const ccv_nnc_graph_static_schedule_t* const _schedule, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const _stream_context)
1009{
1010 assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__
({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted"
, "ccv_nnc_graph_run.c", 1010, __extension__ __PRETTY_FUNCTION__
); }))
;
1011 assert(graph->stream_size > 0)((void) sizeof ((graph->stream_size > 0) ? 1 : 0), __extension__
({ if (graph->stream_size > 0) ; else __assert_fail ("graph->stream_size > 0"
, "ccv_nnc_graph_run.c", 1011, __extension__ __PRETTY_FUNCTION__
); }))
;
1012 const ccv_nnc_graph_static_schedule_t* const schedule = _schedule ? _schedule : graph->default_schedule;
1013 assert(schedule)((void) sizeof ((schedule) ? 1 : 0), __extension__ ({ if (schedule
) ; else __assert_fail ("schedule", "ccv_nnc_graph_run.c", 1013
, __extension__ __PRETTY_FUNCTION__); }))
;
1014 assert(schedule->stream_0 < graph->stream_size)((void) sizeof ((schedule->stream_0 < graph->stream_size
) ? 1 : 0), __extension__ ({ if (schedule->stream_0 < graph
->stream_size) ; else __assert_fail ("schedule->stream_0 < graph->stream_size"
, "ccv_nnc_graph_run.c", 1014, __extension__ __PRETTY_FUNCTION__
); }))
;
1015 ccv_nnc_stream_context_t* const stream_context = _stream_context ? _stream_context : graph->streams[schedule->stream_0];
1016 co_scheduler_t* const scheduler = ccv_nnc_stream_context_get_scheduler(stream_context);
1017 co_routine_t* const task = co_new(_ccv_nnc_graph_topsorted_run_coro, (graph, -1, schedule, 0, tensor_tape, stream_context, flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size
())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params
= { ._co_params = { graph, -1, schedule, 0, tensor_tape, stream_context
, flags } }; task->fn = _ccv_nnc_graph_topsorted_run_coro;
task->line = 0; task->done = 0; task->root = 0; task
->other_size = 0; task->notify_any = 0; task->others
= 0; task->caller = 0; task->callee = 0; if (sizeof(params
) > 0) memcpy(task + 1, &params, sizeof(params)); } while
(0); task; })
;
1018 co_schedule(scheduler, task);
1019 // I don't need to worry about freeing this task, it will free itself at the end.
1020 if (!_stream_context) // If no stream context provided, this is a sync operation.
1021 ccv_nnc_stream_context_wait(stream_context);
1022 return CCV_NNC_EXEC_SUCCESS;
1023}