| File: | nnc/ccv_nnc_graph_run.c |
| Warning: | line 921, column 8 Array access (from variable 'inputs') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" | ||||
| 2 | #include "ccv_nnc_easy.h" | ||||
| 3 | #include "ccv_nnc_internal.h" | ||||
| 4 | #include "ccv_internal.h" | ||||
| 5 | #include "_ccv_nnc_graph.h" | ||||
| 6 | #include "_ccv_nnc_stream.h" | ||||
| 7 | #ifdef HAVE_CUDA1 | ||||
| 8 | #include "gpu/ccv_nnc_compat.h" | ||||
| 9 | #elif defined(HAVE_MPS) | ||||
| 10 | #include "mps/ccv_nnc_mps.h" | ||||
| 11 | #endif | ||||
| 12 | |||||
| 13 | // MARK - Level-2 API | ||||
| 14 | |||||
| 15 | static void _ccv_nnc_unwrap_tensor_wrap(const ccv_nnc_graph_t* const graph, const int64_t count, const int64_t reverse_count, ccv_nnc_graph_tensor_wrap_t* const tensor_wrap) | ||||
| 16 | { | ||||
| 17 | ccv_nnc_tensor_t* tensor = tensor_wrap->tensors[tensor_wrap->index]; | ||||
| 18 | while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW) && | ||||
| 19 | (((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graph || | ||||
| 20 | ((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graph->pair)) | ||||
| 21 | { | ||||
| 22 | // If the anchor is from the pair, we use the reverse_count instead (we are looking it up). | ||||
| 23 | const int i = (int)((((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graph) ? count : reverse_count); | ||||
| 24 | ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor; | ||||
| 25 | const int off = mv->kind; | ||||
| 26 | const int mod = mv->repeat; | ||||
| 27 | tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i >= off ? ((i - off) % mod) + off : i]; // Unwrap. | ||||
| 28 | // If reached the root. | ||||
| 29 | if (!CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW)) | ||||
| 30 | tensor_wrap->update_required = 1; // Need to update tensor updates. | ||||
| 31 | ++tensor_wrap->index; | ||||
| 32 | tensor_wrap->tensors[tensor_wrap->index] = tensor; | ||||
| 33 | assert(tensor_wrap->index < tensor_wrap->count)((void) sizeof ((tensor_wrap->index < tensor_wrap->count ) ? 1 : 0), __extension__ ({ if (tensor_wrap->index < tensor_wrap ->count) ; else __assert_fail ("tensor_wrap->index < tensor_wrap->count" , "ccv_nnc_graph_run.c", 33, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 34 | } | ||||
| 35 | } | ||||
| 36 | |||||
| 37 | static void _ccv_nnc_graph_unwrap_sub_graph(const ccv_nnc_graph_t* const graph, const int64_t count, const int64_t reverse_count, const ccv_nnc_graph_t* const sub_graph) | ||||
| 38 | { | ||||
| 39 | int i; | ||||
| 40 | if (sub_graph->carry_overs) | ||||
| 41 | for (i = 0; i < sub_graph->carry_overs->rnum; i++) | ||||
| 42 | { | ||||
| 43 | ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(sub_graph->carry_overs, i)((void*)(((char*)((sub_graph->carry_overs)->data)) + (size_t )(sub_graph->carry_overs)->rsize * (size_t)(i))); | ||||
| 44 | _ccv_nnc_unwrap_tensor_wrap(graph, count, reverse_count, carry_over->from); | ||||
| 45 | _ccv_nnc_unwrap_tensor_wrap(graph, count, reverse_count, carry_over->to); | ||||
| 46 | } | ||||
| 47 | if (sub_graph->sub_graphs) | ||||
| 48 | for (i = 0; i < sub_graph->sub_graphs->rnum; i++) | ||||
| 49 | _ccv_nnc_graph_unwrap_sub_graph(graph, count, reverse_count, *(ccv_nnc_graph_t**)ccv_array_get(sub_graph->sub_graphs, i)((void*)(((char*)((sub_graph->sub_graphs)->data)) + (size_t )(sub_graph->sub_graphs)->rsize * (size_t)(i)))); | ||||
| 50 | } | ||||
| 51 | |||||
| 52 | static void _ccv_nnc_graph_unwrap(const ccv_nnc_graph_t* const graph, const int64_t count, const int64_t reverse_count) | ||||
| 53 | { | ||||
| 54 | if (!graph->tensor_wraps_refs) | ||||
| 55 | return; | ||||
| 56 | int i, j; | ||||
| 57 | for (i = 0; i < graph->tensor_wraps_refs->rnum; i++) | ||||
| 58 | { | ||||
| 59 | const ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (const ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(graph->tensor_wraps_refs, i)((void*)(((char*)((graph->tensor_wraps_refs)->data)) + ( size_t)(graph->tensor_wraps_refs)->rsize * (size_t)(i)) ); | ||||
| 60 | const ccv_nnc_graph_t* const sub_graph = tensor_wraps_ref->graph; | ||||
| 61 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(sub_graph->tensor_wraps, tensor_wraps_ref->d)((void*)(((char*)((sub_graph->tensor_wraps)->data)) + ( size_t)(sub_graph->tensor_wraps)->rsize * (size_t)(tensor_wraps_ref ->d))); | ||||
| 62 | if (tensor_wrap_array) | ||||
| 63 | for (j = 0; j < tensor_wrap_array->size; j++) | ||||
| 64 | { | ||||
| 65 | ccv_nnc_graph_tensor_wrap_t* const tensor_wrap = tensor_wrap_array->tensor_wraps[j]; | ||||
| 66 | if (!tensor_wrap) | ||||
| 67 | continue; | ||||
| 68 | _ccv_nnc_unwrap_tensor_wrap(graph, count, reverse_count, tensor_wrap); | ||||
| 69 | } | ||||
| 70 | } | ||||
| 71 | _ccv_nnc_graph_unwrap_sub_graph(graph, count, reverse_count, graph); | ||||
| 72 | } | ||||
| 73 | |||||
| 74 | static void _ccv_nnc_graph_transit_move_to(const ccv_nnc_graph_t* const graph) | ||||
| 75 | { | ||||
| 76 | int i; | ||||
| 77 | if (graph->carry_overs) | ||||
| 78 | for (i = 0; i < graph->carry_overs->rnum; i++) | ||||
| 79 | { | ||||
| 80 | ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t )(graph->carry_overs)->rsize * (size_t)(i))); | ||||
| 81 | ccv_nnc_tensor_t* it = (ccv_nnc_tensor_t*)(carry_over->to->tensors[carry_over->to->index]); | ||||
| 82 | assert(!CCV_IS_TENSOR_MULTIVIEW(it))((void) sizeof ((!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW)) ? 1 : 0), __extension__ ({ if (!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(it)", "ccv_nnc_graph_run.c" , 82, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 83 | it->data = carry_over->transit; | ||||
| 84 | } | ||||
| 85 | } | ||||
| 86 | |||||
| 87 | static void _ccv_nnc_graph_from_move_transit(const ccv_nnc_graph_t* const graph) | ||||
| 88 | { | ||||
| 89 | int i; | ||||
| 90 | if (graph->carry_overs) | ||||
| 91 | for (i = 0; i < graph->carry_overs->rnum; i++) | ||||
| 92 | { | ||||
| 93 | ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t )(graph->carry_overs)->rsize * (size_t)(i))); | ||||
| 94 | ccv_nnc_tensor_t* it = (ccv_nnc_tensor_t*)(carry_over->from->tensors[carry_over->from->index]); | ||||
| 95 | assert(!CCV_IS_TENSOR_MULTIVIEW(it))((void) sizeof ((!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW)) ? 1 : 0), __extension__ ({ if (!((*(int*)(it)) & CCV_TENSOR_MULTIVIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(it)", "ccv_nnc_graph_run.c" , 95, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 96 | carry_over->transit = it->data; | ||||
| 97 | } | ||||
| 98 | } | ||||
| 99 | |||||
| 100 | static void _ccv_nnc_rewrap_tensor_wrap(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_tensor_wrap_t* const tensor_wrap) | ||||
| 101 | { | ||||
| 102 | while (tensor_wrap->index > 0 && CCV_IS_TENSOR_MULTIVIEW(tensor_wrap->tensors[tensor_wrap->index - 1])((*(int*)(tensor_wrap->tensors[tensor_wrap->index - 1]) ) & CCV_TENSOR_MULTIVIEW) && | ||||
| 103 | (((ccv_nnc_tensor_multiview_t*)tensor_wrap->tensors[tensor_wrap->index - 1])->anchor == (intptr_t)graph || | ||||
| 104 | ((ccv_nnc_tensor_multiview_t*)tensor_wrap->tensors[tensor_wrap->index - 1])->anchor == (intptr_t)graph->pair)) | ||||
| 105 | --tensor_wrap->index; | ||||
| 106 | } | ||||
| 107 | |||||
| 108 | static void _ccv_nnc_graph_rewrap_sub_graph(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_t* const sub_graph) | ||||
| 109 | { | ||||
| 110 | int i; | ||||
| 111 | if (sub_graph->carry_overs) | ||||
| 112 | for (i = 0; i < sub_graph->carry_overs->rnum; i++) | ||||
| 113 | { | ||||
| 114 | ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(sub_graph->carry_overs, i)((void*)(((char*)((sub_graph->carry_overs)->data)) + (size_t )(sub_graph->carry_overs)->rsize * (size_t)(i))); | ||||
| 115 | _ccv_nnc_rewrap_tensor_wrap(graph, carry_over->from); | ||||
| 116 | _ccv_nnc_rewrap_tensor_wrap(graph, carry_over->to); | ||||
| 117 | } | ||||
| 118 | if (sub_graph->sub_graphs) | ||||
| 119 | for (i = 0; i < sub_graph->sub_graphs->rnum; i++) | ||||
| 120 | _ccv_nnc_graph_rewrap_sub_graph(graph, *(ccv_nnc_graph_t**)ccv_array_get(sub_graph->sub_graphs, i)((void*)(((char*)((sub_graph->sub_graphs)->data)) + (size_t )(sub_graph->sub_graphs)->rsize * (size_t)(i)))); | ||||
| 121 | } | ||||
| 122 | |||||
| 123 | static void _ccv_nnc_graph_rewrap(const ccv_nnc_graph_t* const graph) // Call this method at the end to roll the wrap_ptr back | ||||
| 124 | { | ||||
| 125 | if (!graph->tensor_wraps_refs) | ||||
| 126 | return; | ||||
| 127 | int i, j; | ||||
| 128 | for (i = 0; i < graph->tensor_wraps_refs->rnum; i++) | ||||
| 129 | { | ||||
| 130 | const ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (const ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(graph->tensor_wraps_refs, i)((void*)(((char*)((graph->tensor_wraps_refs)->data)) + ( size_t)(graph->tensor_wraps_refs)->rsize * (size_t)(i)) ); | ||||
| 131 | const ccv_nnc_graph_t* const sub_graph = tensor_wraps_ref->graph; | ||||
| 132 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(sub_graph->tensor_wraps, tensor_wraps_ref->d)((void*)(((char*)((sub_graph->tensor_wraps)->data)) + ( size_t)(sub_graph->tensor_wraps)->rsize * (size_t)(tensor_wraps_ref ->d))); | ||||
| 133 | if (tensor_wrap_array) | ||||
| 134 | for (j = 0; j < tensor_wrap_array->size; j++) | ||||
| 135 | { | ||||
| 136 | ccv_nnc_graph_tensor_wrap_t* const tensor_wrap = tensor_wrap_array->tensor_wraps[j]; | ||||
| 137 | if (!tensor_wrap) | ||||
| 138 | continue; | ||||
| 139 | _ccv_nnc_rewrap_tensor_wrap(graph, tensor_wrap); | ||||
| 140 | } | ||||
| 141 | } | ||||
| 142 | _ccv_nnc_graph_rewrap_sub_graph(graph, graph); | ||||
| 143 | } | ||||
| 144 | |||||
| 145 | static void _ccv_nnc_graph_exec_unwrap_io(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node) | ||||
| 146 | { | ||||
| 147 | if (!node->tensor_wraps_ref) | ||||
| 148 | return; | ||||
| 149 | int i; | ||||
| 150 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, node->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(node->tensor_wraps_ref - 1))); | ||||
| 151 | ccv_nnc_graph_tensor_wrap_t** const tensor_wraps = tensor_wrap_array->tensor_wraps; | ||||
| 152 | for (i = 0; i < tensor_wrap_array->size; i++) | ||||
| 153 | if (tensor_wraps[i]) | ||||
| 154 | { | ||||
| 155 | assert(tensor_wraps[i]->index > 0)((void) sizeof ((tensor_wraps[i]->index > 0) ? 1 : 0), __extension__ ({ if (tensor_wraps[i]->index > 0) ; else __assert_fail ("tensor_wraps[i]->index > 0", "ccv_nnc_graph_run.c", 155 , __extension__ __PRETTY_FUNCTION__); })); | ||||
| 156 | ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)(tensor_wraps[i]->tensors[tensor_wraps[i]->index - 1]); | ||||
| 157 | assert(CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) ? 1 : 0), __extension__ ({ if (((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW )) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(mv)", "ccv_nnc_graph_run.c" , 157, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 158 | // Only now set the mv->it, because now this node is about to get executed. | ||||
| 159 | mv->it = tensor_wraps[i]->tensors[tensor_wraps[i]->index]; | ||||
| 160 | assert(!CCV_IS_TENSOR_MULTIVIEW(mv->it))((void) sizeof ((!((*(int*)(mv->it)) & CCV_TENSOR_MULTIVIEW )) ? 1 : 0), __extension__ ({ if (!((*(int*)(mv->it)) & CCV_TENSOR_MULTIVIEW)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(mv->it)" , "ccv_nnc_graph_run.c", 160, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 161 | } | ||||
| 162 | for (i = 0; i < node->input_size; i++) | ||||
| 163 | if (tensor_wraps[i]) | ||||
| 164 | node->inputs[i] = tensor_wraps[i]->tensors[tensor_wraps[i]->index]; | ||||
| 165 | const int d = node->input_size; | ||||
| 166 | for (i = 0; i < node->output_size; i++) | ||||
| 167 | if (tensor_wraps[d + i]) | ||||
| 168 | node->outputs[i] = tensor_wraps[d + i]->tensors[tensor_wraps[d + i]->index]; | ||||
| 169 | } | ||||
| 170 | |||||
| 171 | static void _ccv_nnc_graph_exec_unwrap_while_expr(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node) | ||||
| 172 | { | ||||
| 173 | assert(node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)((void) sizeof ((node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE ) ? 1 : 0), __extension__ ({ if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE ) ; else __assert_fail ("node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE" , "ccv_nnc_graph_run.c", 173, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 174 | if (!node->p_while.tensor_wraps_ref) | ||||
| 175 | return; | ||||
| 176 | int i; | ||||
| 177 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, node->p_while.tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(node->p_while .tensor_wraps_ref - 1))); | ||||
| 178 | ccv_nnc_graph_tensor_wrap_t** const tensor_wraps = tensor_wrap_array->tensor_wraps; | ||||
| 179 | for (i = 0; i < tensor_wrap_array->size; i++) | ||||
| 180 | if (tensor_wraps[i]) | ||||
| 181 | { | ||||
| 182 | assert(tensor_wraps[i]->index > 0)((void) sizeof ((tensor_wraps[i]->index > 0) ? 1 : 0), __extension__ ({ if (tensor_wraps[i]->index > 0) ; else __assert_fail ("tensor_wraps[i]->index > 0", "ccv_nnc_graph_run.c", 182 , __extension__ __PRETTY_FUNCTION__); })); | ||||
| 183 | ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)(tensor_wraps[i]->tensors[tensor_wraps[i]->index - 1]); | ||||
| 184 | assert(CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) ? 1 : 0), __extension__ ({ if (((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW )) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(mv)", "ccv_nnc_graph_run.c" , 184, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 185 | // Only now set the mv->it, because now this node is about to get executed. | ||||
| 186 | mv->it = tensor_wraps[i]->tensors[tensor_wraps[i]->index]; | ||||
| 187 | assert(!CCV_IS_TENSOR_MULTIVIEW(mv->it))((void) sizeof ((!((*(int*)(mv->it)) & CCV_TENSOR_MULTIVIEW )) ? 1 : 0), __extension__ ({ if (!((*(int*)(mv->it)) & CCV_TENSOR_MULTIVIEW)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(mv->it)" , "ccv_nnc_graph_run.c", 187, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 188 | } | ||||
| 189 | for (i = 0; i < node->p_while.input_size; i++) | ||||
| 190 | if (tensor_wraps[i]) | ||||
| 191 | node->p_while.inputs[i] = tensor_wraps[i]->tensors[tensor_wraps[i]->index]; | ||||
| 192 | } | ||||
| 193 | |||||
| 194 | static void _ccv_nnc_graph_exec_unwrap_phi(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_info_t* const node, const int ref) | ||||
| 195 | { | ||||
| 196 | int i; | ||||
| 197 | // If the output tensor is a phi multi-view tensor, we update our selection to all the subscribers. | ||||
| 198 | for (i = 0; i < node->output_size; i++) | ||||
| 199 | if (CCV_IS_TENSOR_MULTIVIEW(node->outputs[i])((*(int*)(node->outputs[i])) & CCV_TENSOR_MULTIVIEW) && | ||||
| 200 | ((ccv_nnc_tensor_multiview_t*)node->outputs[i])->anchor == CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1) | ||||
| 201 | { | ||||
| 202 | ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)node->outputs[i]; | ||||
| 203 | mv->it = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[ref >= 0]; | ||||
| 204 | ccv_nnc_tensor_multiview_synchronize(mv); | ||||
| 205 | } | ||||
| 206 | } | ||||
| 207 | |||||
| 208 | static void _ccv_nnc_graph_exec_begin_synchronize_multiviews(ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node) | ||||
| 209 | { | ||||
| 210 | if (!node->tensor_wraps_ref) | ||||
| 211 | return; | ||||
| 212 | int i; | ||||
| 213 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, node->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(node->tensor_wraps_ref - 1))); | ||||
| 214 | ccv_nnc_graph_tensor_wrap_t** const tensor_wraps = tensor_wrap_array->tensor_wraps; | ||||
| 215 | for (i = 0; i < tensor_wrap_array->size; i++) | ||||
| 216 | if (tensor_wraps[i] && tensor_wraps[i]->update_required) | ||||
| 217 | { | ||||
| 218 | assert(tensor_wraps[i]->index > 0)((void) sizeof ((tensor_wraps[i]->index > 0) ? 1 : 0), __extension__ ({ if (tensor_wraps[i]->index > 0) ; else __assert_fail ("tensor_wraps[i]->index > 0", "ccv_nnc_graph_run.c", 218 , __extension__ __PRETTY_FUNCTION__); })); | ||||
| 219 | ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)(tensor_wraps[i]->tensors[tensor_wraps[i]->index - 1]); | ||||
| 220 | // Now update the final pointer. | ||||
| 221 | ccv_nnc_tensor_multiview_synchronize(mv); | ||||
| 222 | tensor_wraps[i]->update_required = 0; // Reset, no need to update. | ||||
| 223 | } | ||||
| 224 | } | ||||
| 225 | |||||
| 226 | void ccv_nnc_print_tensor_shape(const ccv_nnc_tensor_t* const tensor) | ||||
| 227 | { | ||||
| 228 | int i; | ||||
| 229 | PRINT(CCV_CLI_INFO, " [%d", tensor->info.dim[0])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (" [%d", tensor->info.dim[0]); fflush(stdout); } } while ( 0); | ||||
| 230 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor->info.dim[i]; i++) | ||||
| 231 | PRINT(CCV_CLI_INFO, "x%d", tensor->info.dim[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("x%d", tensor->info.dim[i]); fflush(stdout); } } while (0 ); | ||||
| 232 | PRINT(CCV_CLI_INFO, "]")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("]"); fflush(stdout); } } while (0); | ||||
| 233 | } | ||||
| 234 | |||||
| 235 | void ccv_nnc_print_tensor_info(const ccv_nnc_tensor_t* const tensor) | ||||
| 236 | { | ||||
| 237 | int i; | ||||
| 238 | PRINT(CCV_CLI_INFO, " [%d", tensor->info.dim[0])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (" [%d", tensor->info.dim[0]); fflush(stdout); } } while ( 0); | ||||
| 239 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor->info.dim[i]; i++) | ||||
| 240 | PRINT(CCV_CLI_INFO, "x%d", tensor->info.dim[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("x%d", tensor->info.dim[i]); fflush(stdout); } } while (0 ); | ||||
| 241 | PRINT(CCV_CLI_INFO, "]")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("]"); fflush(stdout); } } while (0); | ||||
| 242 | if (!CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels()) || tensor->info.dim[0] <= 0) | ||||
| 243 | return; | ||||
| 244 | const int nd = ccv_nnc_tensor_nd(tensor->info.dim); | ||||
| 245 | const int len = ccv_min(tensor->info.dim[nd - 1], 3)({ typeof (tensor->info.dim[nd - 1]) _a = (tensor->info .dim[nd - 1]); typeof (3) _b = (3); (_a < _b) ? _a : _b; } ); | ||||
| 246 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | ||||
| 247 | { | ||||
| 248 | #ifdef HAVE_CUDA1 | ||||
| 249 | switch (tensor->info.datatype) | ||||
| 250 | { | ||||
| 251 | case CCV_16F: { | ||||
| 252 | uint16_t data[len]; | ||||
| 253 | cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.f16, tensor->info.type, len * sizeof(uint16_t)); | ||||
| 254 | float fp32[len]; | ||||
| 255 | ccv_half_precision_to_float(data, fp32, len); | ||||
| 256 | for (i = 0; i < len; i++) | ||||
| 257 | PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", fp32[i]); fflush(stdout); } } while (0); | ||||
| 258 | break; | ||||
| 259 | } | ||||
| 260 | case CCV_16BF: { | ||||
| 261 | uint16_t data[len]; | ||||
| 262 | cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.f16, tensor->info.type, len * sizeof(uint16_t)); | ||||
| 263 | float fp32[len]; | ||||
| 264 | ccv_bfloat_to_float(data, fp32, len); | ||||
| 265 | for (i = 0; i < len; i++) | ||||
| 266 | PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", fp32[i]); fflush(stdout); } } while (0); | ||||
| 267 | break; | ||||
| 268 | } | ||||
| 269 | case CCV_32F: { | ||||
| 270 | float data[len]; | ||||
| 271 | cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.f32, tensor->info.type, len * sizeof(float)); | ||||
| 272 | for (i = 0; i < len; i++) | ||||
| 273 | PRINT(CCV_CLI_VERBOSE, " %f", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", data[i]); fflush(stdout); } } while (0); | ||||
| 274 | break; | ||||
| 275 | } | ||||
| 276 | case CCV_64F: { | ||||
| 277 | double data[len]; | ||||
| 278 | cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.f64, tensor->info.type, len * sizeof(double)); | ||||
| 279 | for (i = 0; i < len; i++) | ||||
| 280 | PRINT(CCV_CLI_VERBOSE, " %f", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", data[i]); fflush(stdout); } } while (0); | ||||
| 281 | break; | ||||
| 282 | } | ||||
| 283 | case CCV_32S: { | ||||
| 284 | int data[len]; | ||||
| 285 | cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.i32, tensor->info.type, len * sizeof(int)); | ||||
| 286 | for (i = 0; i < len; i++) | ||||
| 287 | PRINT(CCV_CLI_VERBOSE, " %d", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %d", data[i]); fflush(stdout); } } while (0); | ||||
| 288 | break; | ||||
| 289 | } | ||||
| 290 | case CCV_64S: { | ||||
| 291 | int64_t data[len]; | ||||
| 292 | cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.i64, tensor->info.type, len * sizeof(int64_t)); | ||||
| 293 | for (i = 0; i < len; i++) | ||||
| 294 | PRINT(CCV_CLI_VERBOSE, " %lld", (long long)data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %lld", (long long)data[i]); fflush(stdout); } } while (0); | ||||
| 295 | break; | ||||
| 296 | } | ||||
| 297 | case CCV_8U: { | ||||
| 298 | uint8_t data[len]; | ||||
| 299 | cumemcpy(data, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, len * sizeof(uint8_t)); | ||||
| 300 | for (i = 0; i < len; i++) | ||||
| 301 | PRINT(CCV_CLI_VERBOSE, " %d", (int)data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %d", (int)data[i]); fflush(stdout); } } while (0); | ||||
| 302 | break; | ||||
| 303 | } | ||||
| 304 | } | ||||
| 305 | if (ccv_nnc_tensor_count(tensor->info) > 3) | ||||
| 306 | PRINT(CCV_CLI_VERBOSE, " ..")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" .."); fflush(stdout); } } while (0); | ||||
| 307 | #elif defined(HAVE_MPS) | ||||
| 308 | switch (tensor->info.datatype) | ||||
| 309 | { | ||||
| 310 | case CCV_16F: { | ||||
| 311 | uint16_t data[len]; | ||||
| 312 | mpmemcpy(data, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.f16, tensor->dataof, tensor->info.type, len * sizeof(uint16_t)); | ||||
| 313 | float fp32[len]; | ||||
| 314 | ccv_half_precision_to_float(data, fp32, len); | ||||
| 315 | for (i = 0; i < len; i++) | ||||
| 316 | PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", fp32[i]); fflush(stdout); } } while (0); | ||||
| 317 | break; | ||||
| 318 | } | ||||
| 319 | case CCV_16BF: { | ||||
| 320 | uint16_t data[len]; | ||||
| 321 | mpmemcpy(data, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.f16, tensor->dataof, tensor->info.type, len * sizeof(uint16_t)); | ||||
| 322 | float fp32[len]; | ||||
| 323 | ccv_bfloat_to_float(data, fp32, len); | ||||
| 324 | for (i = 0; i < len; i++) | ||||
| 325 | PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", fp32[i]); fflush(stdout); } } while (0); | ||||
| 326 | break; | ||||
| 327 | } | ||||
| 328 | case CCV_32F: { | ||||
| 329 | float data[len]; | ||||
| 330 | mpmemcpy(data, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.f32, tensor->dataof, tensor->info.type, len * sizeof(float)); | ||||
| 331 | for (i = 0; i < len; i++) | ||||
| 332 | PRINT(CCV_CLI_VERBOSE, " %f", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", data[i]); fflush(stdout); } } while (0); | ||||
| 333 | break; | ||||
| 334 | } | ||||
| 335 | case CCV_64F: { | ||||
| 336 | double data[len]; | ||||
| 337 | mpmemcpy(data, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.f64, tensor->dataof, tensor->info.type, len * sizeof(double)); | ||||
| 338 | for (i = 0; i < len; i++) | ||||
| 339 | PRINT(CCV_CLI_VERBOSE, " %f", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", data[i]); fflush(stdout); } } while (0); | ||||
| 340 | break; | ||||
| 341 | } | ||||
| 342 | case CCV_32S: { | ||||
| 343 | int data[len]; | ||||
| 344 | mpmemcpy(data, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.i32, tensor->dataof, tensor->info.type, len * sizeof(int)); | ||||
| 345 | for (i = 0; i < len; i++) | ||||
| 346 | PRINT(CCV_CLI_VERBOSE, " %d", data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %d", data[i]); fflush(stdout); } } while (0); | ||||
| 347 | break; | ||||
| 348 | } | ||||
| 349 | case CCV_64S: { | ||||
| 350 | int64_t data[len]; | ||||
| 351 | mpmemcpy(data, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.i64, tensor->dataof, tensor->info.type, len * sizeof(int64_t)); | ||||
| 352 | for (i = 0; i < len; i++) | ||||
| 353 | PRINT(CCV_CLI_VERBOSE, " %lld", (long long)data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %lld", (long long)data[i]); fflush(stdout); } } while (0); | ||||
| 354 | break; | ||||
| 355 | } | ||||
| 356 | case CCV_8U: { | ||||
| 357 | uint8_t data[len]; | ||||
| 358 | mpmemcpy(data, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, len * sizeof(uint8_t)); | ||||
| 359 | for (i = 0; i < len; i++) | ||||
| 360 | PRINT(CCV_CLI_VERBOSE, " %d", (int)data[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %d", (int)data[i]); fflush(stdout); } } while (0); | ||||
| 361 | break; | ||||
| 362 | } | ||||
| 363 | } | ||||
| 364 | if (ccv_nnc_tensor_count(tensor->info) > 3) | ||||
| 365 | PRINT(CCV_CLI_VERBOSE, " ..")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" .."); fflush(stdout); } } while (0); | ||||
| 366 | #endif | ||||
| 367 | } else if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) { | ||||
| 368 | switch (tensor->info.datatype) | ||||
| 369 | { | ||||
| 370 | case CCV_16F: { | ||||
| 371 | float fp32[len]; | ||||
| 372 | ccv_half_precision_to_float((uint16_t*)tensor->data.f16, fp32, len); | ||||
| 373 | for (i = 0; i < len; i++) | ||||
| 374 | PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", fp32[i]); fflush(stdout); } } while (0); | ||||
| 375 | break; | ||||
| 376 | } | ||||
| 377 | case CCV_16BF: { | ||||
| 378 | float fp32[len]; | ||||
| 379 | ccv_bfloat_to_float((uint16_t*)tensor->data.f16, fp32, len); | ||||
| 380 | for (i = 0; i < len; i++) | ||||
| 381 | PRINT(CCV_CLI_VERBOSE, " %f", fp32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", fp32[i]); fflush(stdout); } } while (0); | ||||
| 382 | break; | ||||
| 383 | } | ||||
| 384 | case CCV_32F: | ||||
| 385 | for (i = 0; i < len; i++) | ||||
| 386 | PRINT(CCV_CLI_VERBOSE, " %f", tensor->data.f32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", tensor->data.f32[i]); fflush(stdout); } } while (0); | ||||
| 387 | break; | ||||
| 388 | case CCV_64F: | ||||
| 389 | for (i = 0; i < len; i++) | ||||
| 390 | PRINT(CCV_CLI_VERBOSE, " %f", tensor->data.f64[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %f", tensor->data.f64[i]); fflush(stdout); } } while (0); | ||||
| 391 | break; | ||||
| 392 | case CCV_32S: | ||||
| 393 | for (i = 0; i < len; i++) | ||||
| 394 | PRINT(CCV_CLI_VERBOSE, " %d", tensor->data.i32[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %d", tensor->data.i32[i]); fflush(stdout); } } while (0); | ||||
| 395 | break; | ||||
| 396 | case CCV_64S: | ||||
| 397 | for (i = 0; i < len; i++) | ||||
| 398 | PRINT(CCV_CLI_VERBOSE, " %lld", (long long)tensor->data.i64[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %lld", (long long)tensor->data.i64[i]); fflush (stdout); } } while (0); | ||||
| 399 | break; | ||||
| 400 | case CCV_8U: | ||||
| 401 | for (i = 0; i < len; i++) | ||||
| 402 | PRINT(CCV_CLI_VERBOSE, " %d", (int)tensor->data.u8[i])do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" %d", (int)tensor->data.u8[i]); fflush(stdout); } } while (0); | ||||
| 403 | break; | ||||
| 404 | } | ||||
| 405 | if (ccv_nnc_tensor_count(tensor->info) > 3) | ||||
| 406 | PRINT(CCV_CLI_VERBOSE, " ..")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf(" .."); fflush(stdout); } } while (0); | ||||
| 407 | } | ||||
| 408 | } | ||||
| 409 | |||||
| 410 | static co_decl(_ccv_nnc_graph_topsorted_run_coro, (ccv_nnc_graph_t* const graph, const int exec_idx, const ccv_nnc_graph_static_schedule_t* const schedule, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, const int flags))co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t* const _self, void* const _privates_); struct _ccv_nnc_graph_topsorted_run_coro_param_s { struct { ccv_nnc_graph_t* const graph;const int exec_idx;const ccv_nnc_graph_static_schedule_t* const schedule;ccv_nnc_graph_exec_info_t * const exec;ccv_nnc_tensor_tape_t* const tensor_tape;ccv_nnc_stream_context_t * const stream_context;const int flags;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params; }; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size (void);; | ||||
| 411 | |||||
| 412 | static co_decl_task(_ccv_nnc_graph_exec_cases_of_coro, (ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, const ccv_nnc_graph_exec_schedule_t* const schd, ccv_nnc_tensor_t* const* const inputs, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, int flags), private(co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const _self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s { struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t * const exec;const ccv_nnc_graph_exec_schedule_t* const schd; ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const tensor_tape;ccv_nnc_stream_context_t* const stream_context;int flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref ; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s ); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line ) { case 0: | ||||
| 413 | int ref;co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const _self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s { struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t * const exec;const ccv_nnc_graph_exec_schedule_t* const schd; ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const tensor_tape;ccv_nnc_stream_context_t* const stream_context;int flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref ; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s ); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line ) { case 0: | ||||
| 414 | ccv_nnc_graph_t* sub_graph;co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const _self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s { struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t * const exec;const ccv_nnc_graph_exec_schedule_t* const schd; ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const tensor_tape;ccv_nnc_stream_context_t* const stream_context;int flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref ; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s ); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line ) { case 0: | ||||
| 415 | ))co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t* const _self, void* const _privates_); struct _ccv_nnc_graph_exec_cases_of_coro_param_s { struct { ccv_nnc_graph_t* const graph;const int exec_idx;ccv_nnc_graph_exec_info_t * const exec;const ccv_nnc_graph_exec_schedule_t* const schd; ccv_nnc_tensor_t* const* const inputs;ccv_nnc_tensor_tape_t* const tensor_tape;ccv_nnc_stream_context_t* const stream_context;int flags;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; } _co_params; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void); struct _ccv_nnc_graph_exec_cases_of_coro_private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref ; ccv_nnc_graph_t* sub_graph; }; size_t _ccv_nnc_graph_exec_cases_of_coro_stack_size (void) { return sizeof(struct _ccv_nnc_graph_exec_cases_of_coro_private_s ); } co_state_t _ccv_nnc_graph_exec_cases_of_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_cases_of_coro_param_s _co_params; int ref; ccv_nnc_graph_t* sub_graph; }; switch (_self_->line ) { case 0: { | ||||
| 416 | // Wait until this stream context is done. | ||||
| 417 | co_stream_await(CO_P(stream_context))do { if (!_co_stream_await(_self_, (((struct _private_s*)(_privates_ ))->_co_params._co_params.stream_context))) { return (co_state_t ){ 417, 0 }; } case 417: ; } while (0); | ||||
| 418 | if (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->cmd.cmd == CCV_NNC_GRAPH_FORWARD) | ||||
| 419 | { | ||||
| 420 | CO_V(ref)(((struct _private_s*)(_privates_))->ref) = CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->case_of.offset + CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->case_of.expr(CO_P(inputs)(((struct _private_s*)(_privates_))->_co_params._co_params .inputs), CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->input_size, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->case_of.data); | ||||
| 421 | if (CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape)) | ||||
| 422 | ccv_nnc_tensor_tape_set_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), (ccv_nnc_graph_exec_t){ | ||||
| 423 | .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx), | ||||
| 424 | .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), | ||||
| 425 | }, CO_V(ref)(((struct _private_s*)(_privates_))->ref)); | ||||
| 426 | } else { | ||||
| 427 | assert(CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_)) ->_co_params._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ) ; else __assert_fail ("CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD" , "ccv_nnc_graph_run.c", 427, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 428 | assert(CO_P(tensor_tape))((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.tensor_tape)) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_))->_co_params._co_params.tensor_tape )) ; else __assert_fail ("CO_P(tensor_tape)", "ccv_nnc_graph_run.c" , 428, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 429 | CO_V(ref)(((struct _private_s*)(_privates_))->ref) = ccv_nnc_tensor_tape_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), (ccv_nnc_graph_exec_t){ | ||||
| 430 | .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx), | ||||
| 431 | .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), | ||||
| 432 | }); | ||||
| 433 | } | ||||
| 434 | if (CO_V(ref)(((struct _private_s*)(_privates_))->ref) >= 0) | ||||
| 435 | { | ||||
| 436 | assert(CO_V(ref) < CO_P(exec)->graph_ref_size)((void) sizeof (((((struct _private_s*)(_privates_))->ref) < (((struct _private_s*)(_privates_))->_co_params._co_params .exec)->graph_ref_size) ? 1 : 0), __extension__ ({ if (((( struct _private_s*)(_privates_))->ref) < (((struct _private_s *)(_privates_))->_co_params._co_params.exec)->graph_ref_size ) ; else __assert_fail ("CO_V(ref) < CO_P(exec)->graph_ref_size" , "ccv_nnc_graph_run.c", 436, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 437 | CO_V(sub_graph)(((struct _private_s*)(_privates_))->sub_graph) = *(ccv_nnc_graph_t**)ccv_array_get(CO_P(graph)->sub_graphs, CCV_NNC_GRAPH_REF(CO_P(exec))[CO_V(ref)] - 1)((void*)(((char*)(((((struct _private_s*)(_privates_))->_co_params ._co_params.graph)->sub_graphs)->data)) + (size_t)((((struct _private_s*)(_privates_))->_co_params._co_params.graph)-> sub_graphs)->rsize * (size_t)((((((struct _private_s*)(_privates_ ))->_co_params._co_params.exec))->_heap_graph_ref ? ((( (struct _private_s*)(_privates_))->_co_params._co_params.exec ))->_heap_graph_ref : ((((struct _private_s*)(_privates_)) ->_co_params._co_params.exec))->_inline_graph_ref)[(((struct _private_s*)(_privates_))->ref)] - 1))); | ||||
| 438 | assert(CO_P(schd)->stream_size == 1)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.schd)->stream_size == 1) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_))->_co_params._co_params .schd)->stream_size == 1) ; else __assert_fail ("CO_P(schd)->stream_size == 1" , "ccv_nnc_graph_run.c", 438, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 439 | assert(CO_P(graph)->streams[SCHEDULE_STREAMS(*CO_P(schd))[0]] == CO_V(sub_graph)->streams[0])((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.graph)->streams[((*(((struct _private_s*)(_privates_ ))->_co_params._co_params.schd)).stream_size <= 1 ? (*( ((struct _private_s*)(_privates_))->_co_params._co_params. schd))._inline_streams : (*(((struct _private_s*)(_privates_) )->_co_params._co_params.schd))._heap_streams)[0]] == (((struct _private_s*)(_privates_))->sub_graph)->streams[0]) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_ ))->_co_params._co_params.graph)->streams[((*(((struct _private_s *)(_privates_))->_co_params._co_params.schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_))->_co_params._co_params .schd))._inline_streams : (*(((struct _private_s*)(_privates_ ))->_co_params._co_params.schd))._heap_streams)[0]] == ((( struct _private_s*)(_privates_))->sub_graph)->streams[0 ]) ; else __assert_fail ("CO_P(graph)->streams[SCHEDULE_STREAMS(*CO_P(schd))[0]] == CO_V(sub_graph)->streams[0]" , "ccv_nnc_graph_run.c", 439, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 440 | co_apply(_ccv_nnc_graph_topsorted_run_coro, (CO_V(sub_graph), CO_P(exec_idx), CO_V(sub_graph)->default_schedule, CO_P(exec), CO_P(tensor_tape), CO_P(graph)->streams[SCHEDULE_STREAMS(*CO_P(schd))[0]], CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size ())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> sub_graph), (((struct _private_s*)(_privates_))->_co_params ._co_params.exec_idx), (((struct _private_s*)(_privates_))-> sub_graph)->default_schedule, (((struct _private_s*)(_privates_ ))->_co_params._co_params.exec), (((struct _private_s*)(_privates_ ))->_co_params._co_params.tensor_tape), (((struct _private_s *)(_privates_))->_co_params._co_params.graph)->streams[ ((*(((struct _private_s*)(_privates_))->_co_params._co_params .schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_ ))->_co_params._co_params.schd))._inline_streams : (*(((struct _private_s*)(_privates_))->_co_params._co_params.schd))._heap_streams )[0]], (((struct _private_s*)(_privates_))->_co_params._co_params .flags) } }; task->fn = _ccv_nnc_graph_topsorted_run_coro; task->line = 0; task->done = 0; task->root = 0; task ->other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params ) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); _co_apply(_self_, _self_->callee); return ( co_state_t){ 440, 0 }; case 440: co_free(_self_->callee); _self_ ->callee = 0; } while (0); | ||||
| 441 | } | ||||
| 442 | _ccv_nnc_graph_exec_unwrap_phi(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec), CO_V(ref)(((struct _private_s*)(_privates_))->ref)); | ||||
| 443 | } co_end()default: return (co_state_t){ 443, 1 }; } } | ||||
| 444 | |||||
| 445 | typedef struct { | ||||
| 446 | ccv_nnc_graph_t* graph; | ||||
| 447 | const ccv_nnc_graph_exec_schedule_t* node; | ||||
| 448 | ccv_nnc_stream_context_t* stream; | ||||
| 449 | } ccv_nnc_graph_neighbor_context_discovery_t; | ||||
| 450 | |||||
| 451 | static ccv_nnc_stream_context_t* _ccv_nnc_graph_neighbor_context_discovery(const int device_id, void* const context) | ||||
| 452 | { | ||||
| 453 | const ccv_nnc_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_graph_neighbor_context_discovery_t*)context; | ||||
| 454 | if (CCV_STREAM_GET_DEVICE_ID(ccv_nnc_stream_context_type(discovery->stream))(((ccv_nnc_stream_context_type(discovery->stream)) & 0xfff00 ) >> 8) == device_id) | ||||
| 455 | return discovery->stream; | ||||
| 456 | ccv_nnc_graph_t* const graph = discovery->graph; | ||||
| 457 | const ccv_nnc_graph_exec_schedule_t* const node = discovery->node; | ||||
| 458 | int i; | ||||
| 459 | // First try to find in other streams of the same node. | ||||
| 460 | for (i = 0; i < node->stream_size; i++) | ||||
| 461 | { | ||||
| 462 | ccv_nnc_stream_context_t* const stream = graph->streams[SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node )._heap_streams)[i]]; | ||||
| 463 | if (CCV_STREAM_GET_DEVICE_ID(ccv_nnc_stream_context_type(stream))(((ccv_nnc_stream_context_type(stream)) & 0xfff00) >> 8) == device_id) | ||||
| 464 | return stream; | ||||
| 465 | } | ||||
| 466 | // If cannot find, try to find in all the wait streams. | ||||
| 467 | for (i = 0; i < node->wait_size; i++) | ||||
| 468 | { | ||||
| 469 | ccv_nnc_stream_context_t* stream_context = ccv_nnc_stream_signal_get_emitter(graph->signals[node->waits[i]]); | ||||
| 470 | if (stream_context && CCV_STREAM_GET_DEVICE_ID(ccv_nnc_stream_context_type(stream_context))(((ccv_nnc_stream_context_type(stream_context)) & 0xfff00 ) >> 8) == device_id) | ||||
| 471 | return stream_context; | ||||
| 472 | } | ||||
| 473 | return 0; | ||||
| 474 | } | ||||
| 475 | |||||
| 476 | static co_routine_t* _ccv_nnc_graph_exec_run_task(ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node, const ccv_nnc_graph_exec_schedule_t* const schd, const int idx, ccv_nnc_tensor_tape_t* const tensor_tape, const int flags) | ||||
| 477 | { | ||||
| 478 | _ccv_nnc_graph_exec_unwrap_io(graph, node); | ||||
| 479 | ccv_nnc_tensor_t** inputs = node->inputs; | ||||
| 480 | ccv_nnc_tensor_t** outputs = inputs ? inputs + node->input_size : 0; | ||||
| 481 | if (tensor_tape) | ||||
| 482 | ccv_nnc_tensor_tape_io(tensor_tape, graph, node->input_flags, inputs, node->input_size, node->output_flags, outputs, node->output_size); | ||||
| 483 | /* Broadcast the updates to all subscribed references for input / output, even though at th | ||||
| 484 | * time output is not written yet, propagate pointer change is still valid. */ | ||||
| 485 | _ccv_nnc_graph_exec_begin_synchronize_multiviews(graph, node); | ||||
| 486 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) | ||||
| 487 | { | ||||
| 488 | if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) | ||||
| 489 | { | ||||
| 490 | ccv_nnc_stream_context_t* const node_stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd )._heap_streams)[0]]; | ||||
| 491 | return co_new(_ccv_nnc_graph_exec_cases_of_coro, (graph, idx, node, schd, inputs, tensor_tape, node_stream, flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_exec_cases_of_coro_stack_size ())); do { struct _ccv_nnc_graph_exec_cases_of_coro_param_s params = { ._co_params = { graph, idx, node, schd, inputs, tensor_tape , node_stream, flags } }; task->fn = _ccv_nnc_graph_exec_cases_of_coro ; task->line = 0; task->done = 0; task->root = 0; task ->other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params ) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); | ||||
| 492 | } else if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) { | ||||
| 493 | ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[0] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node)->_inline_graph_ref) [0] - 1))); | ||||
| 494 | assert(graph->streams[SCHEDULE_STREAMS(*schd)[0]] == sub_graph->streams[0])((void) sizeof ((graph->streams[((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd)._heap_streams)[0]] == sub_graph ->streams[0]) ? 1 : 0), __extension__ ({ if (graph->streams [((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd )._heap_streams)[0]] == sub_graph->streams[0]) ; else __assert_fail ("graph->streams[SCHEDULE_STREAMS(*schd)[0]] == sub_graph->streams[0]" , "ccv_nnc_graph_run.c", 494, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 495 | return co_new(_ccv_nnc_graph_topsorted_run_coro, (sub_graph, idx, sub_graph->default_schedule, node, tensor_tape, graph->streams[SCHEDULE_STREAMS(*schd)[0]], flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size ())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params = { ._co_params = { sub_graph, idx, sub_graph->default_schedule , node, tensor_tape, graph->streams[((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd)._heap_streams)[0]], flags } }; task->fn = _ccv_nnc_graph_topsorted_run_coro; task-> line = 0; task->done = 0; task->root = 0; task->other_size = 0; task->notify_any = 0; task->others = 0; task-> caller = 0; task->callee = 0; if (sizeof(params) > 0) memcpy (task + 1, ¶ms, sizeof(params)); } while (0); task; } ); | ||||
| 496 | } | ||||
| 497 | } else { | ||||
| 498 | PRINT(CCV_CLI_INFO, "%s [%d]: [%d] -> [%d] (%d)\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size, SCHEDULE_STREAMS(*schd)[0])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s [%d]: [%d] -> [%d] (%d)\n", ccv_nnc_cmd_name(node-> cmd.cmd), idx, node->input_size, node->output_size, ((* schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd) ._heap_streams)[0]); fflush(stdout); } } while (0); | ||||
| 499 | int i, j; | ||||
| 500 | int flag = 0; | ||||
| 501 | for (i = 0; i < schd->stream_size; i++) | ||||
| 502 | { | ||||
| 503 | ccv_nnc_stream_context_t* const stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd )._heap_streams)[i]]; | ||||
| 504 | for (j = 0; j < schd->wait_size; j++) | ||||
| 505 | { | ||||
| 506 | ccv_nnc_stream_context_wait_signal(stream, graph->signals[schd->waits[j]]); | ||||
| 507 | if (!flag) | ||||
| 508 | { | ||||
| 509 | PRINT(CCV_CLI_INFO, "Wait: (%d, %d)", SCHEDULE_STREAMS(*schd)[i], schd->waits[j])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("Wait: (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd)._heap_streams)[i], schd->waits[j]); fflush(stdout ); } } while (0); | ||||
| 510 | flag = 1; | ||||
| 511 | } else | ||||
| 512 | PRINT(CCV_CLI_INFO, ", (%d, %d)", SCHEDULE_STREAMS(*schd)[i], schd->waits[j])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (", (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd)._heap_streams)[i], schd->waits[j]); fflush(stdout ); } } while (0); | ||||
| 513 | } | ||||
| 514 | } | ||||
| 515 | if (flag) | ||||
| 516 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 517 | for (i = 0; i < node->input_size; i++) | ||||
| 518 | { | ||||
| 519 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs[i]->data.u8 : 0), (inputs[i] ? CCV_TENSOR_GET_DEVICE_ID(inputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs [i]->data.u8 : 0), (inputs[i] ? (((inputs[i]->info.type ) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | ||||
| 520 | if (inputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
| 521 | ccv_nnc_print_tensor_info(inputs[i]); | ||||
| 522 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 523 | } | ||||
| 524 | for (i = 0; i < node->output_size; i++) | ||||
| 525 | { | ||||
| 526 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs[i]->data.u8 : 0), (outputs[i] ? CCV_TENSOR_GET_DEVICE_ID(outputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs [i]->data.u8 : 0), (outputs[i] ? (((outputs[i]->info.type ) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | ||||
| 527 | if (outputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
| 528 | ccv_nnc_print_tensor_shape(outputs[i]); | ||||
| 529 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 530 | } | ||||
| 531 | ccv_nnc_stream_context_t* const node_stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd )._heap_streams)[0]]; | ||||
| 532 | ccv_nnc_graph_neighbor_context_discovery_t discovery_context = { | ||||
| 533 | .graph = graph, | ||||
| 534 | .node = schd, | ||||
| 535 | .stream = node_stream | ||||
| 536 | }; | ||||
| 537 | ccv_nnc_stream_context_set_neighbor_discovery(node_stream, _ccv_nnc_graph_neighbor_context_discovery, &discovery_context); | ||||
| 538 | const int status = ccv_nnc_cmd_exec(node->cmd, node->hint, flags, inputs, node->input_size, outputs, node->output_size, node_stream); | ||||
| 539 | if (status != 0) | ||||
| 540 | PRINT(CCV_CLI_INFO, "Invalid Status: %d\n", status)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("Invalid Status: %d\n", status); fflush(stdout); } } while ( 0); | ||||
| 541 | if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) | ||||
| 542 | { | ||||
| 543 | for (i = 0; i < node->output_size; i++) | ||||
| 544 | { | ||||
| 545 | PRINT(CCV_CLI_VERBOSE, "POST: |<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs[i]->data.u8 : 0), (outputs[i] ? CCV_TENSOR_GET_DEVICE_ID(outputs[i]->info.type) : -1))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("POST: |<- %d. %p (%p:%d)", i + 1, outputs[i], ( outputs[i] ? outputs[i]->data.u8 : 0), (outputs[i] ? (((outputs [i]->info.type) & 0xfff00) >> 8) : -1)); fflush( stdout); } } while (0); | ||||
| 546 | if (outputs[i]) | ||||
| 547 | ccv_nnc_print_tensor_info(outputs[i]); | ||||
| 548 | PRINT(CCV_CLI_VERBOSE, "\n")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("\n"); fflush(stdout); } } while (0); | ||||
| 549 | } | ||||
| 550 | } | ||||
| 551 | flag = 0; | ||||
| 552 | for (i = 0; i < schd->stream_size; i++) | ||||
| 553 | if (SCHEDULE_SIGNALS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_signals : (*schd )._heap_signals)[i] >= 0) | ||||
| 554 | { | ||||
| 555 | ccv_nnc_stream_context_t* const stream = graph->streams[SCHEDULE_STREAMS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd )._heap_streams)[i]]; | ||||
| 556 | ccv_nnc_stream_context_emit_signal(stream, graph->signals[SCHEDULE_SIGNALS(*schd)((*schd).stream_size <= 1 ? (*schd)._inline_signals : (*schd )._heap_signals)[i]]); | ||||
| 557 | if (!flag) | ||||
| 558 | { | ||||
| 559 | PRINT(CCV_CLI_INFO, "Emit: (%d, %d)", SCHEDULE_STREAMS(*schd)[i], SCHEDULE_SIGNALS(*schd)[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("Emit: (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd)._heap_streams)[i], ((*schd).stream_size <= 1 ? ( *schd)._inline_signals : (*schd)._heap_signals)[i]); fflush(stdout ); } } while (0); | ||||
| 560 | flag = 1; | ||||
| 561 | } else | ||||
| 562 | PRINT(CCV_CLI_INFO, ", (%d, %d)", SCHEDULE_STREAMS(*schd)[i], SCHEDULE_SIGNALS(*schd)[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (", (%d, %d)", ((*schd).stream_size <= 1 ? (*schd)._inline_streams : (*schd)._heap_streams)[i], ((*schd).stream_size <= 1 ? ( *schd)._inline_signals : (*schd)._heap_signals)[i]); fflush(stdout ); } } while (0); | ||||
| 563 | } | ||||
| 564 | if (flag) | ||||
| 565 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 566 | } | ||||
| 567 | return 0; | ||||
| 568 | } | ||||
| 569 | |||||
| 570 | static void _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_schedule_t* const schd_info, ccv_nnc_graph_exec_info_t* const node, co_routine_t* const task) | ||||
| 571 | { | ||||
| 572 | int i, j; | ||||
| 573 | if (node->outgoings) | ||||
| 574 | for (i = 0; i < node->outgoings->rnum; i++) | ||||
| 575 | { | ||||
| 576 | const int outgoing_idx = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | ||||
| 577 | const ccv_nnc_graph_exec_schedule_t* const outgoing_schd = schd_info + outgoing_idx; | ||||
| 578 | // An outgoing stream can be blocked by multiple other tasks from other streams. But it is OK, | ||||
| 579 | // because on next round of execution, that one will be marked as blocked again. | ||||
| 580 | for (j = 0; j < outgoing_schd->stream_size; j++) | ||||
| 581 | graph->block_stream_tasks[SCHEDULE_STREAMS(*outgoing_schd)((*outgoing_schd).stream_size <= 1 ? (*outgoing_schd)._inline_streams : (*outgoing_schd)._heap_streams)[j]] = task; | ||||
| 582 | } | ||||
| 583 | } | ||||
| 584 | |||||
| 585 | static co_decl_task(_ccv_nnc_graph_wait_any_sub_tasks, (ccv_nnc_graph_t* const graph, co_routine_t* const* const sub_tasks, const int sub_task_size, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int* const pending_nodes, const int pending_node_size), private(co_state_t _ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const _self, void* const _privates_); struct _ccv_nnc_graph_wait_any_sub_tasks_param_s { struct { ccv_nnc_graph_t* const graph;co_routine_t* const* const sub_tasks;const int sub_task_size;const ccv_nnc_graph_exec_schedule_t * const schd_info;const int* const pending_nodes;const int pending_node_size ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params; }; size_t _ccv_nnc_graph_wait_any_sub_tasks_stack_size (void); struct _ccv_nnc_graph_wait_any_sub_tasks_private_s { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s _co_params; }; size_t _ccv_nnc_graph_wait_any_sub_tasks_stack_size(void) { return sizeof (struct _ccv_nnc_graph_wait_any_sub_tasks_private_s); } co_state_t _ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const _self_ , void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s _co_params; }; switch (_self_->line) { case 0: | ||||
| 586 | ))co_state_t _ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const _self, void* const _privates_); struct _ccv_nnc_graph_wait_any_sub_tasks_param_s { struct { ccv_nnc_graph_t* const graph;co_routine_t* const* const sub_tasks;const int sub_task_size;const ccv_nnc_graph_exec_schedule_t * const schd_info;const int* const pending_nodes;const int pending_node_size ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params; }; size_t _ccv_nnc_graph_wait_any_sub_tasks_stack_size (void); struct _ccv_nnc_graph_wait_any_sub_tasks_private_s { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s _co_params; }; size_t _ccv_nnc_graph_wait_any_sub_tasks_stack_size(void) { return sizeof (struct _ccv_nnc_graph_wait_any_sub_tasks_private_s); } co_state_t _ccv_nnc_graph_wait_any_sub_tasks(co_routine_t* const _self_ , void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s _co_params; }; switch (_self_->line) { case 0: { | ||||
| 587 | assert(CO_P(sub_task_size) > 0)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.sub_task_size) > 0) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_))->_co_params._co_params .sub_task_size) > 0) ; else __assert_fail ("CO_P(sub_task_size) > 0" , "ccv_nnc_graph_run.c", 587, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 588 | co_await_any(CO_P(sub_tasks), CO_P(sub_task_size))do { if (!_co_await_any(_self_, (((struct _private_s*)(_privates_ ))->_co_params._co_params.sub_tasks), (((struct _private_s *)(_privates_))->_co_params._co_params.sub_task_size))) { return (co_state_t){ 588, 0 }; } case 588: ; } while (0); | ||||
| 589 | // This is not good, these local variables need to be in the private section. | ||||
| 590 | // I got away with it because there is no yield or resume or apply or any after await above. | ||||
| 591 | int i, j, k; | ||||
| 592 | for (i = 0; i < CO_P(sub_task_size)(((struct _private_s*)(_privates_))->_co_params._co_params .sub_task_size); i++) | ||||
| 593 | if (co_is_done(CO_P(sub_tasks)(((struct _private_s*)(_privates_))->_co_params._co_params .sub_tasks)[i])) | ||||
| 594 | { | ||||
| 595 | for (j = 0; j < CO_P(pending_node_size)(((struct _private_s*)(_privates_))->_co_params._co_params .pending_node_size); j++) | ||||
| 596 | { | ||||
| 597 | const ccv_nnc_graph_exec_schedule_t* const node = CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params .schd_info) + CO_P(pending_nodes)(((struct _private_s*)(_privates_))->_co_params._co_params .pending_nodes)[j]; | ||||
| 598 | for (k = 0; k < node->stream_size; k++) | ||||
| 599 | if (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node )._heap_streams)[k]] == CO_P(sub_tasks)(((struct _private_s*)(_privates_))->_co_params._co_params .sub_tasks)[i]) | ||||
| 600 | CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node )._heap_streams)[k]] = 0; | ||||
| 601 | } | ||||
| 602 | co_free(CO_P(sub_tasks)(((struct _private_s*)(_privates_))->_co_params._co_params .sub_tasks)[i]); | ||||
| 603 | } | ||||
| 604 | } co_end()default: return (co_state_t){ 604, 1 }; } } | ||||
| 605 | |||||
| 606 | static co_decl_task(_ccv_nnc_graph_exec_run_loop, (ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int* const psort, const int start_index, const int exec_info_size, ccv_nnc_tensor_tape_t* const tensor_tape, const int flags), private(co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 607 | int i, p, q;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 608 | int sub_task_size;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 609 | co_routine_t** sub_tasks;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 610 | int* pending_nodes[2];co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 611 | int pending_node_size[2];co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 612 | int idx;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 613 | ccv_nnc_graph_exec_info_t* node;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 614 | const ccv_nnc_graph_exec_schedule_t* schd;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 615 | co_routine_t* task;co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: | ||||
| 616 | ))co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self , void* const _privates_); struct _ccv_nnc_graph_exec_run_loop_param_s { struct { ccv_nnc_graph_t* const graph;ccv_nnc_graph_exec_info_t * const exec_info;const ccv_nnc_graph_exec_schedule_t* const schd_info ;const int* const psort;const int start_index;const int exec_info_size ;ccv_nnc_tensor_tape_t* const tensor_tape;const int flags;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; } _co_params ; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void); struct _ccv_nnc_graph_exec_run_loop_private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q; int sub_task_size; co_routine_t** sub_tasks ; int* pending_nodes[2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; size_t _ccv_nnc_graph_exec_run_loop_stack_size(void ) { return sizeof(struct _ccv_nnc_graph_exec_run_loop_private_s ); } co_state_t _ccv_nnc_graph_exec_run_loop(co_routine_t* const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_exec_run_loop_param_s _co_params; int i, p, q ; int sub_task_size; co_routine_t** sub_tasks; int* pending_nodes [2]; int pending_node_size[2]; int idx; ccv_nnc_graph_exec_info_t * node; const ccv_nnc_graph_exec_schedule_t* schd; co_routine_t * task; }; switch (_self_->line) { case 0: { | ||||
| 617 | CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size) = 0; | ||||
| 618 | CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks) = (co_routine_t**)ccv_nnc_graph_buffer(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), sizeof(co_routine_t*) * (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->sub_graphs ? CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->sub_graphs->rnum : 0) + sizeof(int) * CO_P(exec_info_size)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_info_size) * 2); | ||||
| 619 | CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[0] = (int*)(CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks) + (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->sub_graphs ? CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->sub_graphs->rnum : 0)); | ||||
| 620 | CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[1] = CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[0] + CO_P(exec_info_size)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_info_size); | ||||
| 621 | CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[0] = 0; | ||||
| 622 | CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[1] = 0; | ||||
| 623 | for (CO_V(i)(((struct _private_s*)(_privates_))->i) = CO_P(start_index)(((struct _private_s*)(_privates_))->_co_params._co_params .start_index); CO_V(i)(((struct _private_s*)(_privates_))->i) < CO_P(exec_info_size)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_info_size); CO_V(i)(((struct _private_s*)(_privates_))->i)++) | ||||
| 624 | { | ||||
| 625 | if (__atomic_load_n(&CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->run_state, __ATOMIC_ACQUIRE2) == CCV_NNC_GRAPH_STATE_CANCEL) | ||||
| 626 | break; | ||||
| 627 | CO_V(idx)(((struct _private_s*)(_privates_))->idx) = CO_P(psort)(((struct _private_s*)(_privates_))->_co_params._co_params .psort) ? CO_P(psort)(((struct _private_s*)(_privates_))->_co_params._co_params .psort)[CO_V(i)(((struct _private_s*)(_privates_))->i)] : CO_V(i)(((struct _private_s*)(_privates_))->i); | ||||
| 628 | CO_V(node)(((struct _private_s*)(_privates_))->node) = CO_P(exec_info)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_info) + CO_V(idx)(((struct _private_s*)(_privates_))->idx); | ||||
| 629 | CO_V(schd)(((struct _private_s*)(_privates_))->schd) = CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params .schd_info) + CO_V(idx)(((struct _private_s*)(_privates_))->idx); | ||||
| 630 | // If stream is blocked by but not blocked by current executing task. | ||||
| 631 | int blocked = 0, j; | ||||
| 632 | for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++) | ||||
| 633 | if (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams : (*(((struct _private_s*)(_privates_))->schd))._heap_streams )[j]]) | ||||
| 634 | { | ||||
| 635 | CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[0][CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[0]++] = CO_V(idx)(((struct _private_s*)(_privates_))->idx); | ||||
| 636 | _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params .schd_info), CO_V(node)(((struct _private_s*)(_privates_))->node), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams : (*(((struct _private_s*)(_privates_))->schd))._heap_streams )[j]]); | ||||
| 637 | blocked = 1; | ||||
| 638 | } | ||||
| 639 | if (blocked) | ||||
| 640 | continue; | ||||
| 641 | CO_V(task)(((struct _private_s*)(_privates_))->task) = _ccv_nnc_graph_exec_run_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(schd)(((struct _private_s*)(_privates_))->schd), CO_V(idx)(((struct _private_s*)(_privates_))->idx), CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape), CO_P(flags)(((struct _private_s*)(_privates_))->_co_params._co_params .flags)); | ||||
| 642 | if (CO_V(task)(((struct _private_s*)(_privates_))->task)) | ||||
| 643 | { | ||||
| 644 | co_resume(CO_V(task))do { _co_resume(_self_, (((struct _private_s*)(_privates_))-> task)); return (co_state_t){ 644, 0 }; case 644: _self_->callee = 0; } while (0); | ||||
| 645 | if (!co_is_done(CO_V(task)(((struct _private_s*)(_privates_))->task))) | ||||
| 646 | { | ||||
| 647 | CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks)[CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size)++] = CO_V(task)(((struct _private_s*)(_privates_))->task); | ||||
| 648 | int j; | ||||
| 649 | for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++) | ||||
| 650 | CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams : (*(((struct _private_s*)(_privates_))->schd))._heap_streams )[j]] = CO_V(task)(((struct _private_s*)(_privates_))->task); | ||||
| 651 | _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params .schd_info), CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(task)(((struct _private_s*)(_privates_))->task)); | ||||
| 652 | } else | ||||
| 653 | co_free(CO_V(task)(((struct _private_s*)(_privates_))->task)); | ||||
| 654 | } | ||||
| 655 | } | ||||
| 656 | if (CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size)) | ||||
| 657 | co_apply(_ccv_nnc_graph_wait_any_sub_tasks, (CO_P(graph), CO_V(sub_tasks), CO_V(sub_task_size), CO_P(schd_info), CO_V(pending_nodes)[0], CO_V(pending_node_size)[0]))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_wait_any_sub_tasks_stack_size ())); do { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> _co_params._co_params.graph), (((struct _private_s*)(_privates_ ))->sub_tasks), (((struct _private_s*)(_privates_))->sub_task_size ), (((struct _private_s*)(_privates_))->_co_params._co_params .schd_info), (((struct _private_s*)(_privates_))->pending_nodes )[0], (((struct _private_s*)(_privates_))->pending_node_size )[0] } }; task->fn = _ccv_nnc_graph_wait_any_sub_tasks; task ->line = 0; task->done = 0; task->root = 0; task-> other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); _co_apply(_self_, _self_->callee); return ( co_state_t){ 657, 0 }; case 657: co_free(_self_->callee); _self_ ->callee = 0; } while (0); | ||||
| 658 | if (__atomic_load_n(&CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->run_state, __ATOMIC_ACQUIRE2) == CCV_NNC_GRAPH_STATE_CANCEL) | ||||
| 659 | co_return()do { return (co_state_t){ 659, 1 }; } while (0); | ||||
| 660 | CO_V(p)(((struct _private_s*)(_privates_))->p) = 0; | ||||
| 661 | CO_V(q)(((struct _private_s*)(_privates_))->q) = 1; | ||||
| 662 | while (CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(p)(((struct _private_s*)(_privates_))->p)] > 0) | ||||
| 663 | { | ||||
| 664 | CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(q)(((struct _private_s*)(_privates_))->q)] = 0; | ||||
| 665 | CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size) = 0; | ||||
| 666 | for (CO_V(i)(((struct _private_s*)(_privates_))->i) = 0; CO_V(i)(((struct _private_s*)(_privates_))->i) < CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(p)(((struct _private_s*)(_privates_))->p)]; CO_V(i)(((struct _private_s*)(_privates_))->i)++) | ||||
| 667 | { | ||||
| 668 | CO_V(idx)(((struct _private_s*)(_privates_))->idx) = CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[CO_V(p)(((struct _private_s*)(_privates_))->p)][CO_V(i)(((struct _private_s*)(_privates_))->i)]; | ||||
| 669 | CO_V(node)(((struct _private_s*)(_privates_))->node) = CO_P(exec_info)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_info) + CO_V(idx)(((struct _private_s*)(_privates_))->idx); | ||||
| 670 | CO_V(schd)(((struct _private_s*)(_privates_))->schd) = CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params .schd_info) + CO_V(idx)(((struct _private_s*)(_privates_))->idx); | ||||
| 671 | int blocked = 0, j; | ||||
| 672 | for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++) | ||||
| 673 | if (CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams : (*(((struct _private_s*)(_privates_))->schd))._heap_streams )[j]]) | ||||
| 674 | { | ||||
| 675 | _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params .schd_info), CO_V(node)(((struct _private_s*)(_privates_))->node), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams : (*(((struct _private_s*)(_privates_))->schd))._heap_streams )[j]]); | ||||
| 676 | CO_V(pending_nodes)(((struct _private_s*)(_privates_))->pending_nodes)[CO_V(q)(((struct _private_s*)(_privates_))->q)][CO_V(pending_node_size)(((struct _private_s*)(_privates_))->pending_node_size)[CO_V(q)(((struct _private_s*)(_privates_))->q)]++] = CO_V(idx)(((struct _private_s*)(_privates_))->idx); | ||||
| 677 | blocked = 1; | ||||
| 678 | } | ||||
| 679 | if (blocked) | ||||
| 680 | continue; | ||||
| 681 | CO_V(task)(((struct _private_s*)(_privates_))->task) = _ccv_nnc_graph_exec_run_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(schd)(((struct _private_s*)(_privates_))->schd), CO_V(idx)(((struct _private_s*)(_privates_))->idx), CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape), CO_P(flags)(((struct _private_s*)(_privates_))->_co_params._co_params .flags)); | ||||
| 682 | if (CO_V(task)(((struct _private_s*)(_privates_))->task)) | ||||
| 683 | { | ||||
| 684 | co_resume(CO_V(task))do { _co_resume(_self_, (((struct _private_s*)(_privates_))-> task)); return (co_state_t){ 684, 0 }; case 684: _self_->callee = 0; } while (0); | ||||
| 685 | if (!co_is_done(CO_V(task)(((struct _private_s*)(_privates_))->task))) | ||||
| 686 | { | ||||
| 687 | CO_V(sub_tasks)(((struct _private_s*)(_privates_))->sub_tasks)[CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size)++] = CO_V(task)(((struct _private_s*)(_privates_))->task); | ||||
| 688 | for (j = 0; j < CO_V(schd)(((struct _private_s*)(_privates_))->schd)->stream_size; j++) | ||||
| 689 | CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->block_stream_tasks[SCHEDULE_STREAMS(*CO_V(schd))((*(((struct _private_s*)(_privates_))->schd)).stream_size <= 1 ? (*(((struct _private_s*)(_privates_))->schd))._inline_streams : (*(((struct _private_s*)(_privates_))->schd))._heap_streams )[j]] = CO_V(task)(((struct _private_s*)(_privates_))->task); | ||||
| 690 | _ccv_nnc_graph_mark_outgoing_streams_blocked_by_task(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_P(schd_info)(((struct _private_s*)(_privates_))->_co_params._co_params .schd_info), CO_V(node)(((struct _private_s*)(_privates_))->node), CO_V(task)(((struct _private_s*)(_privates_))->task)); | ||||
| 691 | } else | ||||
| 692 | co_free(CO_V(task)(((struct _private_s*)(_privates_))->task)); | ||||
| 693 | } | ||||
| 694 | } | ||||
| 695 | int t; | ||||
| 696 | CCV_SWAP(CO_V(p), CO_V(q), t)((t) = ((((struct _private_s*)(_privates_))->p)), ((((struct _private_s*)(_privates_))->p)) = ((((struct _private_s*)( _privates_))->q)), ((((struct _private_s*)(_privates_))-> q)) = (t)); | ||||
| 697 | if (CO_V(sub_task_size)(((struct _private_s*)(_privates_))->sub_task_size)) | ||||
| 698 | co_apply(_ccv_nnc_graph_wait_any_sub_tasks, (CO_P(graph), CO_V(sub_tasks), CO_V(sub_task_size), CO_P(schd_info), CO_V(pending_nodes)[CO_V(p)], CO_V(pending_node_size)[CO_V(p)]))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_wait_any_sub_tasks_stack_size ())); do { struct _ccv_nnc_graph_wait_any_sub_tasks_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> _co_params._co_params.graph), (((struct _private_s*)(_privates_ ))->sub_tasks), (((struct _private_s*)(_privates_))->sub_task_size ), (((struct _private_s*)(_privates_))->_co_params._co_params .schd_info), (((struct _private_s*)(_privates_))->pending_nodes )[(((struct _private_s*)(_privates_))->p)], (((struct _private_s *)(_privates_))->pending_node_size)[(((struct _private_s*) (_privates_))->p)] } }; task->fn = _ccv_nnc_graph_wait_any_sub_tasks ; task->line = 0; task->done = 0; task->root = 0; task ->other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params ) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); _co_apply(_self_, _self_->callee); return ( co_state_t){ 698, 0 }; case 698: co_free(_self_->callee); _self_ ->callee = 0; } while (0); | ||||
| 699 | } | ||||
| 700 | } co_end()default: return (co_state_t){ 700, 1 }; } } | ||||
| 701 | |||||
| 702 | co_task(_ccv_nnc_graph_topsorted_run_coro, (ccv_nnc_graph_t* const graph, const int exec_idx, const ccv_nnc_graph_static_schedule_t* const schedule, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, const int flags), private(struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 703 | ccv_nnc_graph_exec_info_t* exec_info;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 704 | const ccv_nnc_graph_exec_schedule_t* schd_info;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 705 | co_routine_t* previous_main;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 706 | int stream_0;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 707 | // while loopstruct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 708 | int64_t count, reverse_count;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 709 | int graph_breakpoint_size;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 710 | int i, j;struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: | ||||
| 711 | ))struct _ccv_nnc_graph_topsorted_run_coro_private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; size_t _ccv_nnc_graph_topsorted_run_coro_stack_size(void) { return sizeof(struct _ccv_nnc_graph_topsorted_run_coro_private_s ); } co_state_t _ccv_nnc_graph_topsorted_run_coro(co_routine_t * const _self_, void* const _privates_) { struct _private_s { struct _ccv_nnc_graph_topsorted_run_coro_param_s _co_params; ccv_nnc_graph_exec_info_t* exec_info; const ccv_nnc_graph_exec_schedule_t * schd_info; co_routine_t* previous_main; int stream_0; int64_t count, reverse_count; int graph_breakpoint_size; int i, j; } ; switch (_self_->line) { case 0: { | ||||
| 712 | assert(CO_P(graph)->stream_size > 0)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.graph)->stream_size > 0) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_))->_co_params._co_params .graph)->stream_size > 0) ; else __assert_fail ("CO_P(graph)->stream_size > 0" , "ccv_nnc_graph_run.c", 712, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 713 | int i; | ||||
| 714 | // Assign the resource container pointer. | ||||
| 715 | for (i = 0; i < CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->stream_size; i++) | ||||
| 716 | CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[i]->resource_container = CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->_inline_container; | ||||
| 717 | CO_V(exec_info)(((struct _private_s*)(_privates_))->exec_info) = (ccv_nnc_graph_exec_info_t*)ccv_array_get(CO_P(graph)->exec_info, 0)((void*)(((char*)(((((struct _private_s*)(_privates_))->_co_params ._co_params.graph)->exec_info)->data)) + (size_t)((((struct _private_s*)(_privates_))->_co_params._co_params.graph)-> exec_info)->rsize * (size_t)(0))); | ||||
| 718 | CO_V(schd_info)(((struct _private_s*)(_privates_))->schd_info) = CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->exec_info; | ||||
| 719 | CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0) = CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->stream_0; | ||||
| 720 | if (CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx) == -1) | ||||
| 721 | { | ||||
| 722 | if (CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main) | ||||
| 723 | { | ||||
| 724 | CO_V(previous_main)(((struct _private_s*)(_privates_))->previous_main) = CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main; | ||||
| 725 | CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main = co_self()(_self_); | ||||
| 726 | // Wait the previous task to be done. This makes sure that our graph run is serial on the same stream. | ||||
| 727 | assert(!co_is_done(CO_V(previous_main)))((void) sizeof ((!co_is_done((((struct _private_s*)(_privates_ ))->previous_main))) ? 1 : 0), __extension__ ({ if (!co_is_done ((((struct _private_s*)(_privates_))->previous_main))) ; else __assert_fail ("!co_is_done(CO_V(previous_main))", "ccv_nnc_graph_run.c" , 727, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 728 | co_await(CO_V(previous_main))do { if (!_co_await_any(_self_, &((((struct _private_s*)( _privates_))->previous_main)), 1)) { return (co_state_t){ 728 , 0 }; } case 728: ; } while (0); | ||||
| 729 | } else | ||||
| 730 | CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main = co_self()(_self_); | ||||
| 731 | PRINT(CCV_CLI_INFO, "Graph Stream %d Begin", CO_V(stream_0))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("Graph Stream %d Begin", (((struct _private_s*)(_privates_)) ->stream_0)); fflush(stdout); } } while (0); | ||||
| 732 | ccv_nnc_stream_signal_t* stream_0_signal; | ||||
| 733 | if (CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context) != CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)]) | ||||
| 734 | { | ||||
| 735 | // Make sure when we start work on streams[0], the current stream context is done. | ||||
| 736 | stream_0_signal = ccv_nnc_stream_context_emit_signal_new(CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)); | ||||
| 737 | ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], stream_0_signal); | ||||
| 738 | } else if (CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->stream_1_size) { | ||||
| 739 | ccv_nnc_stream_context_emit_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->begin); | ||||
| 740 | stream_0_signal = CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->begin; | ||||
| 741 | } | ||||
| 742 | int i, flag = 0; | ||||
| 743 | for (i = 0; i < CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->stream_1_size; i++) | ||||
| 744 | { | ||||
| 745 | ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->stream_1s[i]], stream_0_signal); | ||||
| 746 | if (!flag) | ||||
| 747 | { | ||||
| 748 | PRINT(CCV_CLI_INFO, ", Wait: %d", CO_P(schedule)->stream_1s[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (", Wait: %d", (((struct _private_s*)(_privates_))->_co_params ._co_params.schedule)->stream_1s[i]); fflush(stdout); } } while (0); | ||||
| 749 | flag = 1; | ||||
| 750 | } else | ||||
| 751 | PRINT(CCV_CLI_INFO, ", %d", CO_P(schedule)->stream_1s[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (", %d", (((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->stream_1s[i]); fflush(stdout); } } while (0); | ||||
| 752 | } | ||||
| 753 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 754 | } else { | ||||
| 755 | assert(CO_P(stream_context) == CO_P(graph)->streams[0])((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.stream_context) == (((struct _private_s*)(_privates_ ))->_co_params._co_params.graph)->streams[0]) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_))-> _co_params._co_params.stream_context) == (((struct _private_s *)(_privates_))->_co_params._co_params.graph)->streams[ 0]) ; else __assert_fail ("CO_P(stream_context) == CO_P(graph)->streams[0]" , "ccv_nnc_graph_run.c", 755, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 756 | } | ||||
| 757 | if (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec) && (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)) | ||||
| 758 | { | ||||
| 759 | assert(CO_P(schedule) == CO_P(graph)->default_schedule)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.schedule) == (((struct _private_s*)(_privates_))-> _co_params._co_params.graph)->default_schedule) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_))->_co_params._co_params .schedule) == (((struct _private_s*)(_privates_))->_co_params ._co_params.graph)->default_schedule) ; else __assert_fail ("CO_P(schedule) == CO_P(graph)->default_schedule", "ccv_nnc_graph_run.c" , 759, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 760 | assert(CO_P(exec)->p_while.expr)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.exec)->p_while.expr) ? 1 : 0), __extension__ ( { if ((((struct _private_s*)(_privates_))->_co_params._co_params .exec)->p_while.expr) ; else __assert_fail ("CO_P(exec)->p_while.expr" , "ccv_nnc_graph_run.c", 760, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 761 | CO_V(count)(((struct _private_s*)(_privates_))->count) = 0; | ||||
| 762 | // This is a forward while loop. Backward while loop will just consult its pairing part. | ||||
| 763 | if (CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->cmd.cmd == CCV_NNC_GRAPH_FORWARD) | ||||
| 764 | { | ||||
| 765 | CO_V(graph_breakpoint_size)(((struct _private_s*)(_privates_))->graph_breakpoint_size ) = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->breakpoint_offset + CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->breakpoint_size; | ||||
| 766 | for (;; ++CO_V(count)(((struct _private_s*)(_privates_))->count)) | ||||
| 767 | { | ||||
| 768 | CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->while_count = CO_V(count)(((struct _private_s*)(_privates_))->count); | ||||
| 769 | if (CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape)) | ||||
| 770 | ccv_nnc_tensor_tape_set_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->p, (ccv_nnc_graph_exec_t){ | ||||
| 771 | .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx), | ||||
| 772 | .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->p, | ||||
| 773 | }, CO_V(count)(((struct _private_s*)(_privates_))->count)); | ||||
| 774 | _ccv_nnc_graph_unwrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_V(count)(((struct _private_s*)(_privates_))->count), 0); | ||||
| 775 | if (CO_V(count)(((struct _private_s*)(_privates_))->count) > 0) | ||||
| 776 | _ccv_nnc_graph_transit_move_to(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 777 | co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, 0, CO_V(graph_breakpoint_size), CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size ())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> _co_params._co_params.graph), (((struct _private_s*)(_privates_ ))->exec_info), (((struct _private_s*)(_privates_))->schd_info ), 0, 0, (((struct _private_s*)(_privates_))->graph_breakpoint_size ), (((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape), (((struct _private_s*)(_privates_))->_co_params ._co_params.flags) } }; task->fn = _ccv_nnc_graph_exec_run_loop ; task->line = 0; task->done = 0; task->root = 0; task ->other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params ) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); _co_apply(_self_, _self_->callee); return ( co_state_t){ 777, 0 }; case 777: co_free(_self_->callee); _self_ ->callee = 0; } while (0); | ||||
| 778 | if (__atomic_load_n(&CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->run_state, __ATOMIC_ACQUIRE2) == CCV_NNC_GRAPH_STATE_CANCEL) | ||||
| 779 | break; | ||||
| 780 | // Reached breakpoints, now check the breakpoint, if not met, break out. | ||||
| 781 | // Wait until everything on the stream is executed. | ||||
| 782 | for (CO_V(i)(((struct _private_s*)(_privates_))->i) = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->breakpoint_offset; CO_V(i)(((struct _private_s*)(_privates_))->i) < CO_V(graph_breakpoint_size)(((struct _private_s*)(_privates_))->graph_breakpoint_size ); CO_V(i)(((struct _private_s*)(_privates_))->i)++) | ||||
| 783 | for (CO_V(j)(((struct _private_s*)(_privates_))->j) = 0; CO_V(j)(((struct _private_s*)(_privates_))->j) < CO_V(schd_info)(((struct _private_s*)(_privates_))->schd_info)[CO_V(i)(((struct _private_s*)(_privates_))->i)].stream_size; CO_V(j)(((struct _private_s*)(_privates_))->j)++) | ||||
| 784 | co_stream_await(CO_P(graph)->streams[SCHEDULE_STREAMS(CO_V(schd_info)[CO_V(i)])[CO_V(j)]])do { if (!_co_stream_await(_self_, (((struct _private_s*)(_privates_ ))->_co_params._co_params.graph)->streams[(((((struct _private_s *)(_privates_))->schd_info)[(((struct _private_s*)(_privates_ ))->i)]).stream_size <= 1 ? ((((struct _private_s*)(_privates_ ))->schd_info)[(((struct _private_s*)(_privates_))->i)] )._inline_streams : ((((struct _private_s*)(_privates_))-> schd_info)[(((struct _private_s*)(_privates_))->i)])._heap_streams )[(((struct _private_s*)(_privates_))->j)]])) { return (co_state_t ){ 784, 0 }; } case 784: ; } while (0); | ||||
| 785 | _ccv_nnc_graph_exec_unwrap_while_expr(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)); | ||||
| 786 | if (!CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->p_while.expr(CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->p_while.inputs, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->p_while.input_size, CO_P(exec)(((struct _private_s*)(_privates_))->_co_params._co_params .exec)->p_while.data)) | ||||
| 787 | { | ||||
| 788 | _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 789 | // If we break from here, it is ok because all the streams are waited. | ||||
| 790 | break; | ||||
| 791 | } | ||||
| 792 | co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, CO_V(graph_breakpoint_size), CO_P(graph)->exec_info->rnum, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size ())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> _co_params._co_params.graph), (((struct _private_s*)(_privates_ ))->exec_info), (((struct _private_s*)(_privates_))->schd_info ), 0, (((struct _private_s*)(_privates_))->graph_breakpoint_size ), (((struct _private_s*)(_privates_))->_co_params._co_params .graph)->exec_info->rnum, (((struct _private_s*)(_privates_ ))->_co_params._co_params.tensor_tape), (((struct _private_s *)(_privates_))->_co_params._co_params.flags) } }; task-> fn = _ccv_nnc_graph_exec_run_loop; task->line = 0; task-> done = 0; task->root = 0; task->other_size = 0; task-> notify_any = 0; task->others = 0; task->caller = 0; task ->callee = 0; if (sizeof(params) > 0) memcpy(task + 1, & params, sizeof(params)); } while (0); task; }); _co_apply(_self_ , _self_->callee); return (co_state_t){ 792, 0 }; case 792 : co_free(_self_->callee); _self_->callee = 0; } while ( 0); | ||||
| 793 | // If it is cancelled here, we don't need to breakout yet, we can breakout on earlier place. The most important thing is to avoid stream wait if there is a cancel. | ||||
| 794 | _ccv_nnc_graph_from_move_transit(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 795 | _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 796 | } | ||||
| 797 | } else { | ||||
| 798 | // For backward graph, no need to evaluate the while expr. | ||||
| 799 | assert(CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_)) ->_co_params._co_params.exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ) ; else __assert_fail ("CO_P(exec)->cmd.cmd == CCV_NNC_GRAPH_BACKWARD" , "ccv_nnc_graph_run.c", 799, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 800 | assert(CO_P(graph)->pair)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.graph)->pair) ? 1 : 0), __extension__ ({ if (( ((struct _private_s*)(_privates_))->_co_params._co_params. graph)->pair) ; else __assert_fail ("CO_P(graph)->pair" , "ccv_nnc_graph_run.c", 800, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 801 | assert(CO_P(tensor_tape))((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.tensor_tape)) ? 1 : 0), __extension__ ({ if ((((struct _private_s*)(_privates_))->_co_params._co_params.tensor_tape )) ; else __assert_fail ("CO_P(tensor_tape)", "ccv_nnc_graph_run.c" , 801, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 802 | CO_V(count)(((struct _private_s*)(_privates_))->count) = 0; | ||||
| 803 | CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count) = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->while_count = ccv_nnc_tensor_tape_numbering(CO_P(tensor_tape)(((struct _private_s*)(_privates_))->_co_params._co_params .tensor_tape), CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->p, (ccv_nnc_graph_exec_t){ | ||||
| 804 | .d = CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx), | ||||
| 805 | .graph = CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->p, | ||||
| 806 | }); | ||||
| 807 | _ccv_nnc_graph_unwrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_V(count)(((struct _private_s*)(_privates_))->count), CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count)); | ||||
| 808 | co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, CO_P(graph)->breakpoint_offset, CO_P(graph)->exec_info->rnum, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size ())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> _co_params._co_params.graph), (((struct _private_s*)(_privates_ ))->exec_info), (((struct _private_s*)(_privates_))->schd_info ), 0, (((struct _private_s*)(_privates_))->_co_params._co_params .graph)->breakpoint_offset, (((struct _private_s*)(_privates_ ))->_co_params._co_params.graph)->exec_info->rnum, ( ((struct _private_s*)(_privates_))->_co_params._co_params. tensor_tape), (((struct _private_s*)(_privates_))->_co_params ._co_params.flags) } }; task->fn = _ccv_nnc_graph_exec_run_loop ; task->line = 0; task->done = 0; task->root = 0; task ->other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params ) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); _co_apply(_self_, _self_->callee); return ( co_state_t){ 808, 0 }; case 808: co_free(_self_->callee); _self_ ->callee = 0; } while (0); | ||||
| 809 | // If it is cancelled here, we don't need to breakout yet, we can breakout later. | ||||
| 810 | _ccv_nnc_graph_from_move_transit(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 811 | _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 812 | for (CO_V(count)(((struct _private_s*)(_privates_))->count) = 1; CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count) > 0; ++CO_V(count)(((struct _private_s*)(_privates_))->count)) | ||||
| 813 | { | ||||
| 814 | CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->while_count = --CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count); | ||||
| 815 | _ccv_nnc_graph_unwrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph), CO_V(count)(((struct _private_s*)(_privates_))->count), CO_V(reverse_count)(((struct _private_s*)(_privates_))->reverse_count)); | ||||
| 816 | _ccv_nnc_graph_transit_move_to(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 817 | co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), 0, 0, CO_P(graph)->exec_info->rnum, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size ())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> _co_params._co_params.graph), (((struct _private_s*)(_privates_ ))->exec_info), (((struct _private_s*)(_privates_))->schd_info ), 0, 0, (((struct _private_s*)(_privates_))->_co_params._co_params .graph)->exec_info->rnum, (((struct _private_s*)(_privates_ ))->_co_params._co_params.tensor_tape), (((struct _private_s *)(_privates_))->_co_params._co_params.flags) } }; task-> fn = _ccv_nnc_graph_exec_run_loop; task->line = 0; task-> done = 0; task->root = 0; task->other_size = 0; task-> notify_any = 0; task->others = 0; task->caller = 0; task ->callee = 0; if (sizeof(params) > 0) memcpy(task + 1, & params, sizeof(params)); } while (0); task; }); _co_apply(_self_ , _self_->callee); return (co_state_t){ 817, 0 }; case 817 : co_free(_self_->callee); _self_->callee = 0; } while ( 0); | ||||
| 818 | if (__atomic_load_n(&CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->run_state, __ATOMIC_ACQUIRE2) == CCV_NNC_GRAPH_STATE_CANCEL) | ||||
| 819 | break; | ||||
| 820 | _ccv_nnc_graph_from_move_transit(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 821 | _ccv_nnc_graph_rewrap(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)); | ||||
| 822 | } | ||||
| 823 | } | ||||
| 824 | if (__atomic_load_n(&CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->run_state, __ATOMIC_ACQUIRE2) == CCV_NNC_GRAPH_STATE_CANCEL) | ||||
| 825 | { | ||||
| 826 | // The most important thing is to reset main and then return, we don't need to wait for any streaming event. | ||||
| 827 | if (CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx) == -1 && CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main == co_self()(_self_)) | ||||
| 828 | CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main = 0; | ||||
| 829 | co_return()do { return (co_state_t){ 829, 1 }; } while (0); | ||||
| 830 | } | ||||
| 831 | assert(CO_V(stream_0) == 0)((void) sizeof (((((struct _private_s*)(_privates_))->stream_0 ) == 0) ? 1 : 0), __extension__ ({ if ((((struct _private_s*) (_privates_))->stream_0) == 0) ; else __assert_fail ("CO_V(stream_0) == 0" , "ccv_nnc_graph_run.c", 831, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 832 | int i; | ||||
| 833 | for (i = 0; i < CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->wait_size; i++) | ||||
| 834 | ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[0], CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->signals[CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->waits[i]]); | ||||
| 835 | } else { | ||||
| 836 | CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->while_count = 0; | ||||
| 837 | co_apply(_ccv_nnc_graph_exec_run_loop, (CO_P(graph), CO_V(exec_info), CO_V(schd_info), CO_P(schedule)->psort, 0, CO_P(schedule)->psort ? CO_P(schedule)->psort_size : CO_P(schedule)->exec_info_size, CO_P(tensor_tape), CO_P(flags)))do { _self_->callee = ({ co_routine_t* const task = malloc ((sizeof(co_routine_t) + _ccv_nnc_graph_exec_run_loop_stack_size ())); do { struct _ccv_nnc_graph_exec_run_loop_param_s params = { ._co_params = { (((struct _private_s*)(_privates_))-> _co_params._co_params.graph), (((struct _private_s*)(_privates_ ))->exec_info), (((struct _private_s*)(_privates_))->schd_info ), (((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->psort, 0, (((struct _private_s*)(_privates_))-> _co_params._co_params.schedule)->psort ? (((struct _private_s *)(_privates_))->_co_params._co_params.schedule)->psort_size : (((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->exec_info_size, (((struct _private_s*)(_privates_ ))->_co_params._co_params.tensor_tape), (((struct _private_s *)(_privates_))->_co_params._co_params.flags) } }; task-> fn = _ccv_nnc_graph_exec_run_loop; task->line = 0; task-> done = 0; task->root = 0; task->other_size = 0; task-> notify_any = 0; task->others = 0; task->caller = 0; task ->callee = 0; if (sizeof(params) > 0) memcpy(task + 1, & params, sizeof(params)); } while (0); task; }); _co_apply(_self_ , _self_->callee); return (co_state_t){ 837, 0 }; case 837 : co_free(_self_->callee); _self_->callee = 0; } while ( 0); | ||||
| 838 | if (__atomic_load_n(&CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->run_state, __ATOMIC_ACQUIRE2) == CCV_NNC_GRAPH_STATE_CANCEL) | ||||
| 839 | { | ||||
| 840 | // The most important thing is to reset main and then return, we don't need to wait for any streaming event. | ||||
| 841 | if (CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx) == -1 && CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main == co_self()(_self_)) | ||||
| 842 | CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main = 0; | ||||
| 843 | co_return()do { return (co_state_t){ 843, 1 }; } while (0); | ||||
| 844 | } | ||||
| 845 | PRINT(CCV_CLI_INFO, "Graph Stream %d End", CO_V(stream_0))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("Graph Stream %d End", (((struct _private_s*)(_privates_))-> stream_0)); fflush(stdout); } } while (0); | ||||
| 846 | int i, flag = 0; | ||||
| 847 | for (i = 0; i < CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->wait_size; i++) | ||||
| 848 | { | ||||
| 849 | ccv_nnc_stream_context_wait_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->signals[CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->waits[i]]); | ||||
| 850 | if (!flag) | ||||
| 851 | { | ||||
| 852 | PRINT(CCV_CLI_INFO, ", Wait: %d", CO_P(schedule)->waits[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (", Wait: %d", (((struct _private_s*)(_privates_))->_co_params ._co_params.schedule)->waits[i]); fflush(stdout); } } while (0); | ||||
| 853 | flag = 1; | ||||
| 854 | } else | ||||
| 855 | PRINT(CCV_CLI_INFO, ", %d", CO_P(schedule)->waits[i])do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf (", %d", (((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->waits[i]); fflush(stdout); } } while (0); | ||||
| 856 | } | ||||
| 857 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 858 | } | ||||
| 859 | if (CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context) != CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)]) | ||||
| 860 | { | ||||
| 861 | assert(CO_P(exec_idx) == -1)((void) sizeof (((((struct _private_s*)(_privates_))->_co_params ._co_params.exec_idx) == -1) ? 1 : 0), __extension__ ({ if (( ((struct _private_s*)(_privates_))->_co_params._co_params. exec_idx) == -1) ; else __assert_fail ("CO_P(exec_idx) == -1" , "ccv_nnc_graph_run.c", 861, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 862 | ccv_nnc_stream_context_emit_signal(CO_P(graph)(((struct _private_s*)(_privates_))->_co_params._co_params .graph)->streams[CO_V(stream_0)(((struct _private_s*)(_privates_))->stream_0)], CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->end); | ||||
| 863 | ccv_nnc_stream_context_wait_signal(CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context), CO_P(schedule)(((struct _private_s*)(_privates_))->_co_params._co_params .schedule)->end); | ||||
| 864 | } | ||||
| 865 | // Reset main to 0 if it is current me. | ||||
| 866 | if (CO_P(exec_idx)(((struct _private_s*)(_privates_))->_co_params._co_params .exec_idx) == -1 && CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main == co_self()(_self_)) | ||||
| 867 | CO_P(stream_context)(((struct _private_s*)(_privates_))->_co_params._co_params .stream_context)->main = 0; | ||||
| 868 | } co_end()default: return (co_state_t){ 868, 1 }; } } | ||||
| 869 | |||||
| 870 | static int _ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context); | ||||
| 871 | |||||
| 872 | static inline void _ccv_nnc_graph_exec_run(ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_info_t* const node, const int idx, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, const int flags) | ||||
| 873 | { | ||||
| 874 | int i; | ||||
| 875 | _ccv_nnc_graph_exec_unwrap_io(graph, node); | ||||
| 876 | ccv_nnc_tensor_t** inputs = node->inputs; | ||||
| 877 | ccv_nnc_tensor_t** outputs = inputs ? inputs + node->input_size : 0; | ||||
| 878 | if (tensor_tape) | ||||
| 879 | ccv_nnc_tensor_tape_io(tensor_tape, graph, node->input_flags, inputs, node->input_size, node->output_flags, outputs, node->output_size); | ||||
| 880 | /* Broadcast the updates to all subscribed references for input / output, even though at th | ||||
| 881 | * time output is not written yet, propagate pointer change is still valid. */ | ||||
| 882 | _ccv_nnc_graph_exec_begin_synchronize_multiviews(graph, node); | ||||
| 883 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) | ||||
| 884 | { | ||||
| 885 | assert(!stream_context)((void) sizeof ((!stream_context) ? 1 : 0), __extension__ ({ if (!stream_context) ; else __assert_fail ("!stream_context", "ccv_nnc_graph_run.c" , 885, __extension__ __PRETTY_FUNCTION__); })); // This doesn't work properly with stream context. | ||||
| 886 | if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) | ||||
| 887 | { | ||||
| 888 | int ref; | ||||
| 889 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD) | ||||
| 890 | { | ||||
| 891 | ref = node->case_of.offset + node->case_of.expr(inputs, node->input_size, node->case_of.data); | ||||
| 892 | if (tensor_tape) | ||||
| 893 | ccv_nnc_tensor_tape_set_numbering(tensor_tape, graph, (ccv_nnc_graph_exec_t){ | ||||
| 894 | .d = idx, | ||||
| 895 | .graph = graph, | ||||
| 896 | }, ref); | ||||
| 897 | } else { | ||||
| 898 | assert(node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof ((node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ? 1 : 0), __extension__ ({ if (node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ) ; else __assert_fail ("node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD" , "ccv_nnc_graph_run.c", 898, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 899 | assert(tensor_tape)((void) sizeof ((tensor_tape) ? 1 : 0), __extension__ ({ if ( tensor_tape) ; else __assert_fail ("tensor_tape", "ccv_nnc_graph_run.c" , 899, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 900 | ref = ccv_nnc_tensor_tape_numbering(tensor_tape, graph, (ccv_nnc_graph_exec_t){ | ||||
| 901 | .d = idx, | ||||
| 902 | .graph = graph, | ||||
| 903 | }); | ||||
| 904 | } | ||||
| 905 | if (ref >= 0) | ||||
| 906 | { | ||||
| 907 | assert(ref < node->graph_ref_size)((void) sizeof ((ref < node->graph_ref_size) ? 1 : 0), __extension__ ({ if (ref < node->graph_ref_size) ; else __assert_fail ("ref < node->graph_ref_size", "ccv_nnc_graph_run.c", 907 , __extension__ __PRETTY_FUNCTION__); })); | ||||
| 908 | ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[ref] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node)->_inline_graph_ref) [ref] - 1))); | ||||
| 909 | _ccv_nnc_graph_run(sub_graph, idx, node, inputs, node->input_size, outputs, node->output_size, flags, 0, 0, 0, 0, tensor_tape, stream_context); | ||||
| 910 | } | ||||
| 911 | _ccv_nnc_graph_exec_unwrap_phi(graph, node, ref); | ||||
| 912 | } else if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) { | ||||
| 913 | ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[0] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node)->_inline_graph_ref) [0] - 1))); | ||||
| 914 | _ccv_nnc_graph_run(sub_graph, idx, node, inputs, node->input_size, outputs, node->output_size, flags, 0, 0, 0, 0, tensor_tape, stream_context); | ||||
| 915 | } | ||||
| 916 | } else { | ||||
| 917 | PRINT(CCV_CLI_INFO, "%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd. cmd), idx, node->input_size, node->output_size); fflush (stdout); } } while (0); | ||||
| 918 | for (i = 0; i < node->input_size; i++) | ||||
| 919 | { | ||||
| 920 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs[i]->data.u8 : 0), (inputs[i] ? CCV_TENSOR_GET_DEVICE_ID(inputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", i + 1, inputs[i], (inputs[i] ? inputs [i]->data.u8 : 0), (inputs[i] ? (((inputs[i]->info.type ) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | ||||
| 921 | if (inputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
| |||||
| 922 | ccv_nnc_print_tensor_info(inputs[i]); | ||||
| 923 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 924 | } | ||||
| 925 | ccv_nnc_cmd_exec(node->cmd, node->hint, flags, inputs, node->input_size, outputs, node->output_size, stream_context); | ||||
| 926 | for (i = 0; i < node->output_size; i++) | ||||
| 927 | { | ||||
| 928 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs[i]->data.u8 : 0), (outputs[i] ? CCV_TENSOR_GET_DEVICE_ID(outputs[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", i + 1, outputs[i], (outputs[i] ? outputs [i]->data.u8 : 0), (outputs[i] ? (((outputs[i]->info.type ) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | ||||
| 929 | if (outputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | ||||
| 930 | ccv_nnc_print_tensor_info(outputs[i]); | ||||
| 931 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | ||||
| 932 | } | ||||
| 933 | } | ||||
| 934 | } | ||||
| 935 | |||||
| 936 | static inline void _ccv_nnc_graph_topsorted_run(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, const int flags, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
| 937 | { | ||||
| 938 | int i; | ||||
| 939 | if (exec
| ||||
| 940 | { | ||||
| 941 | assert(!stream_context)((void) sizeof ((!stream_context) ? 1 : 0), __extension__ ({ if (!stream_context) ; else __assert_fail ("!stream_context", "ccv_nnc_graph_run.c" , 941, __extension__ __PRETTY_FUNCTION__); })); // This doesn't work properly with stream context. | ||||
| 942 | assert(exec->p_while.expr)((void) sizeof ((exec->p_while.expr) ? 1 : 0), __extension__ ({ if (exec->p_while.expr) ; else __assert_fail ("exec->p_while.expr" , "ccv_nnc_graph_run.c", 942, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 943 | int64_t count = 0; | ||||
| 944 | // This is a forward while loop. Backward while loop will just consult its pairing part. | ||||
| 945 | if (exec->cmd.cmd == CCV_NNC_GRAPH_FORWARD) | ||||
| 946 | { | ||||
| 947 | const int graph_breakpoint_size = graph->breakpoint_offset + graph->breakpoint_size; | ||||
| 948 | for (;; ++count) | ||||
| 949 | { | ||||
| 950 | graph->while_count = count; | ||||
| 951 | if (tensor_tape) | ||||
| 952 | ccv_nnc_tensor_tape_set_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){ | ||||
| 953 | .d = exec_idx, | ||||
| 954 | .graph = graph->p, | ||||
| 955 | }, count); | ||||
| 956 | _ccv_nnc_graph_unwrap(graph, count, 0); | ||||
| 957 | if (count > 0) | ||||
| 958 | _ccv_nnc_graph_transit_move_to(graph); | ||||
| 959 | for (i = 0; i < graph_breakpoint_size; i++) | ||||
| 960 | _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))), i, tensor_tape, stream_context, flags); | ||||
| 961 | _ccv_nnc_graph_exec_unwrap_while_expr(graph, exec); | ||||
| 962 | // Reached breakpoints, now check the breakpoint, if not met, break out. | ||||
| 963 | if (!exec->p_while.expr(exec->p_while.inputs, exec->p_while.input_size, exec->p_while.data)) | ||||
| 964 | { | ||||
| 965 | _ccv_nnc_graph_rewrap(graph); | ||||
| 966 | break; | ||||
| 967 | } | ||||
| 968 | for (i = graph_breakpoint_size; i < graph->exec_info->rnum; i++) | ||||
| 969 | _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))), i, tensor_tape, stream_context, flags); | ||||
| 970 | _ccv_nnc_graph_from_move_transit(graph); | ||||
| 971 | _ccv_nnc_graph_rewrap(graph); | ||||
| 972 | } | ||||
| 973 | } else { | ||||
| 974 | // For backward graph, no need to evaluate the while expr. | ||||
| 975 | assert(exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof ((exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ? 1 : 0), __extension__ ({ if (exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ) ; else __assert_fail ("exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD" , "ccv_nnc_graph_run.c", 975, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 976 | assert(graph->pair)((void) sizeof ((graph->pair) ? 1 : 0), __extension__ ({ if (graph->pair) ; else __assert_fail ("graph->pair", "ccv_nnc_graph_run.c" , 976, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 977 | assert(tensor_tape)((void) sizeof ((tensor_tape) ? 1 : 0), __extension__ ({ if ( tensor_tape) ; else __assert_fail ("tensor_tape", "ccv_nnc_graph_run.c" , 977, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 978 | count = 0; | ||||
| 979 | int64_t reverse_count = graph->while_count = ccv_nnc_tensor_tape_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){ | ||||
| 980 | .d = exec_idx, | ||||
| 981 | .graph = graph->p, | ||||
| 982 | }); | ||||
| 983 | _ccv_nnc_graph_unwrap(graph, count, reverse_count); | ||||
| 984 | for (i = graph->breakpoint_offset; i < graph->exec_info->rnum; i++) | ||||
| 985 | _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))), i, tensor_tape, stream_context, flags); | ||||
| 986 | _ccv_nnc_graph_from_move_transit(graph); | ||||
| 987 | _ccv_nnc_graph_rewrap(graph); | ||||
| 988 | for (count = 1; reverse_count > 0; ++count) | ||||
| 989 | { | ||||
| 990 | graph->while_count = --reverse_count; | ||||
| 991 | _ccv_nnc_graph_unwrap(graph, count, reverse_count); | ||||
| 992 | _ccv_nnc_graph_transit_move_to(graph); | ||||
| 993 | for (i = 0; i < graph->exec_info->rnum; i++) | ||||
| 994 | _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))), i, tensor_tape, stream_context, flags); | ||||
| 995 | _ccv_nnc_graph_from_move_transit(graph); | ||||
| 996 | _ccv_nnc_graph_rewrap(graph); | ||||
| 997 | } | ||||
| 998 | } | ||||
| 999 | } else { | ||||
| 1000 | graph->while_count = 0; | ||||
| 1001 | for (i = 0; i < graph->exec_info->rnum; i++) | ||||
| 1002 | _ccv_nnc_graph_exec_run(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))), i, tensor_tape, stream_context, flags); | ||||
| 1003 | } | ||||
| 1004 | } | ||||
| 1005 | |||||
| 1006 | static inline void _ccv_nnc_graph_run_slow_path(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
| 1007 | { | ||||
| 1008 | int i, j; | ||||
| 1009 | const ccv_nnc_graph_exec_t* const graph_sources = sources ? sources : (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(0))); | ||||
| 1010 | const int graph_source_size = source_size ? source_size : graph->sources->rnum; | ||||
| 1011 | const ccv_nnc_graph_exec_t* const graph_destinations = destinations ? destinations : (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))); | ||||
| 1012 | const int graph_destination_size = destination_size ? destination_size : graph->destinations->rnum; | ||||
| 1013 | #define visitor(node, idx, ...) \ | ||||
| 1014 | _ccv_nnc_graph_exec_run(graph, node, idx, tensor_tape, stream_context, flags) | ||||
| 1015 | if (exec && (exec->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)) | ||||
| 1016 | { | ||||
| 1017 | assert(!stream_context)((void) sizeof ((!stream_context) ? 1 : 0), __extension__ ({ if (!stream_context) ; else __assert_fail ("!stream_context", "ccv_nnc_graph_run.c" , 1017, __extension__ __PRETTY_FUNCTION__); })); // This doesn't work properly with stream context. | ||||
| 1018 | assert(exec->p_while.expr)((void) sizeof ((exec->p_while.expr) ? 1 : 0), __extension__ ({ if (exec->p_while.expr) ; else __assert_fail ("exec->p_while.expr" , "ccv_nnc_graph_run.c", 1018, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 1019 | int64_t count = 0; | ||||
| 1020 | // This is a forward while loop. Backward while loop will just consult its pairing part. | ||||
| 1021 | if (exec->cmd.cmd == CCV_NNC_GRAPH_FORWARD) | ||||
| 1022 | { | ||||
| 1023 | ccv_array_t* follows = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), graph->breakpoint_size, 0); | ||||
| 1024 | for (i = 0; i < graph->breakpoint_size; i++) | ||||
| 1025 | { | ||||
| 1026 | const ccv_nnc_graph_exec_info_t* const exec_info = (const ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, graph->breakpoints->d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(graph->breakpoints ->d))); | ||||
| 1027 | if (exec_info->outgoings) | ||||
| 1028 | for (j = 0; j < exec_info->outgoings->rnum; j++) | ||||
| 1029 | { | ||||
| 1030 | const ccv_nnc_graph_exec_t exec = { | ||||
| 1031 | .d = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t )(exec_info->outgoings)->rsize * (size_t)(j))), | ||||
| 1032 | .graph = graph, | ||||
| 1033 | }; | ||||
| 1034 | ccv_array_push(follows, &exec); | ||||
| 1035 | } | ||||
| 1036 | } | ||||
| 1037 | for (;; ++count) | ||||
| 1038 | { | ||||
| 1039 | graph->while_count = count; | ||||
| 1040 | if (tensor_tape) | ||||
| 1041 | ccv_nnc_tensor_tape_set_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){ | ||||
| 1042 | .d = exec_idx, | ||||
| 1043 | .graph = graph->p, | ||||
| 1044 | }, count); | ||||
| 1045 | _ccv_nnc_graph_unwrap(graph, count, 0); | ||||
| 1046 | if (count > 0) | ||||
| 1047 | _ccv_nnc_graph_transit_move_to(graph); | ||||
| 1048 | CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph->breakpoints, graph->breakpoint_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++ ) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph ) ; else __assert_fail ("(graph_sources)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_sources)[_i_].d].r = 1; _exists_[0 ][_i_] = (graph_sources)[_i_].d; } int _exist_size_[2] = { (graph_source_size ), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0 ) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue ; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph_run.c" , 1048, __extension__ __PRETTY_FUNCTION__); })); _incomings_[ (graph_sources)[_i_].d].r = 3; _exists_[0][_i_] = (graph_sources )[_i_].d; } _exist_size_[0] = (graph_source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char *)((graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)( (void*)(((char*)((graph->exec_info)->data)) + (size_t)( graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph->breakpoints)[_i_].d].r = 5; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph->breakpoints)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph_run.c" , 1048, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][ _i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph_source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_ [_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0)))) + _idx_), (_idx_), (_incomings_ [_idx_].d)); if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_ ].r = 7; } if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*) ((graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void *)(((char*)((graph->exec_info)->data)) + (size_t)(graph ->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings-> rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->breakpoint_size)) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_ ++) { ((void) sizeof (((graph->breakpoints)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_] .graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1048, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(graph->breakpoints)[_i_].d].r == 7 ) continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph-> breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_ [(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph_run.c" , 1048, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_ [(graph->breakpoints)[_i_].d].c > 0) continue; visitor( (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0)))) + (graph->breakpoints)[_i_].d), ((graph->breakpoints )[_i_].d), (_incomings_[(graph->breakpoints)[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while (0);; | ||||
| 1049 | _ccv_nnc_graph_exec_unwrap_while_expr(graph, exec); | ||||
| 1050 | // Reached breakpoints, now check the breakpoint, if not met, break out. | ||||
| 1051 | if (!exec->p_while.expr(exec->p_while.inputs, exec->p_while.input_size, exec->p_while.data)) | ||||
| 1052 | { | ||||
| 1053 | _ccv_nnc_graph_rewrap(graph); | ||||
| 1054 | break; | ||||
| 1055 | } | ||||
| 1056 | if (follows->rnum > 0) | ||||
| 1057 | CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(follows, 0), follows->rnum, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (follows->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*) ((follows)->data)) + (size_t)(follows)->rsize * (size_t )(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (( (ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*) ((follows)->data)) + (size_t)(follows)->rsize * (size_t )(0))))[_i_].d].r = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((follows)->data)) + (size_t)(follows)-> rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = { (follows ->rnum), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_ ] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue ; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (follows->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data )) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t *)((void*)(((char*)((follows)->data)) + (size_t)(follows)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*) ((follows)->data)) + (size_t)(follows)->rsize * (size_t )(0))))[_i_].d].r = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((follows)->data)) + (size_t)(follows)-> rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (follows-> rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue ; _incomings_[_idx_].r = 4; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].r = 5; _exists_ [0][_i_] = (graph_destinations)[_i_].d; } _exist_size_[0] = ( graph_destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (follows->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data )) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t *)((void*)(((char*)((follows)->data)) + (size_t)(follows)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((follows)->data)) + (size_t)(follows)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(( (char*)((follows)->data)) + (size_t)(follows)->rsize * ( size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = ( follows->rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_ ]; visitor((((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0)))) + _idx_), (_idx_), (_incomings_[_idx_] .d)); if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info ->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_ ++) { ((void) sizeof (((graph_destinations)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(graph_destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations )[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[ (graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0" , "ccv_nnc_graph_run.c", 1057, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(graph_destinations)[_i_].d].c > 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void *)(((char*)((graph->exec_info)->data)) + (size_t)(graph ->exec_info)->rsize * (size_t)(0)))) + (graph_destinations )[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations )[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while ( 0);; | ||||
| 1058 | _ccv_nnc_graph_from_move_transit(graph); | ||||
| 1059 | _ccv_nnc_graph_rewrap(graph); | ||||
| 1060 | } | ||||
| 1061 | ccv_array_free(follows); | ||||
| 1062 | } else { | ||||
| 1063 | // For backward graph, no need to evaluate the while expr. | ||||
| 1064 | assert(exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD)((void) sizeof ((exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) ? 1 : 0), __extension__ ({ if (exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ) ; else __assert_fail ("exec->cmd.cmd == CCV_NNC_GRAPH_BACKWARD" , "ccv_nnc_graph_run.c", 1064, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 1065 | assert(graph->pair)((void) sizeof ((graph->pair) ? 1 : 0), __extension__ ({ if (graph->pair) ; else __assert_fail ("graph->pair", "ccv_nnc_graph_run.c" , 1065, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 1066 | assert(tensor_tape)((void) sizeof ((tensor_tape) ? 1 : 0), __extension__ ({ if ( tensor_tape) ; else __assert_fail ("tensor_tape", "ccv_nnc_graph_run.c" , 1066, __extension__ __PRETTY_FUNCTION__); })); | ||||
| 1067 | count = 0; | ||||
| 1068 | int64_t reverse_count = graph->while_count = ccv_nnc_tensor_tape_numbering(tensor_tape, graph->p, (ccv_nnc_graph_exec_t){ | ||||
| 1069 | .d = exec_idx, | ||||
| 1070 | .graph = graph->p, | ||||
| 1071 | }); | ||||
| 1072 | _ccv_nnc_graph_unwrap(graph, count, reverse_count); | ||||
| 1073 | CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, graph_destinations, graph_destination_size, 1, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size ); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints )[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph->breakpoints)[_i_].d].r = 1; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } int _exist_size_ [2] = { (graph->breakpoint_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph->breakpoints)[_i_].d].r = 3; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_ ].r != 3) continue; _incomings_[_idx_].r = 4; if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].r = 5; _exists_ [0][_i_] = (graph_destinations)[_i_].d; } _exist_size_[0] = ( graph_destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; visitor(( ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0)))) + _idx_), (_idx_), (_incomings_[_idx_].d)); if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info ->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_ ++) { ((void) sizeof (((graph_destinations)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(graph_destinations)[_i_].d].r == 7) continue ; if (!(1)) { ((void) sizeof ((_incomings_[(graph_destinations )[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[ (graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0" , "ccv_nnc_graph_run.c", 1073, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(graph_destinations)[_i_].d].c > 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void *)(((char*)((graph->exec_info)->data)) + (size_t)(graph ->exec_info)->rsize * (size_t)(0)))) + (graph_destinations )[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations )[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while ( 0);; | ||||
| 1074 | _ccv_nnc_graph_from_move_transit(graph); | ||||
| 1075 | _ccv_nnc_graph_rewrap(graph); | ||||
| 1076 | for (count = 1; reverse_count > 0; ++count) | ||||
| 1077 | { | ||||
| 1078 | graph->while_count = --reverse_count; | ||||
| 1079 | _ccv_nnc_graph_unwrap(graph, count, reverse_count); | ||||
| 1080 | _ccv_nnc_graph_transit_move_to(graph); | ||||
| 1081 | CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++ ) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph ) ; else __assert_fail ("(graph_sources)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_sources)[_i_].d].r = 1; _exists_[0 ][_i_] = (graph_sources)[_i_].d; } int _exist_size_[2] = { (graph_source_size ), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0 ) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue ; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph_run.c" , 1081, __extension__ __PRETTY_FUNCTION__); })); _incomings_[ (graph_sources)[_i_].d].r = 3; _exists_[0][_i_] = (graph_sources )[_i_].d; } _exist_size_[0] = (graph_source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char *)((graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)( (void*)(((char*)((graph->exec_info)->data)) + (size_t)( graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].r = 5; _exists_ [0][_i_] = (graph_destinations)[_i_].d; } _exist_size_[0] = ( graph_destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph_run.c" , 1081, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][ _i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph_source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_ [_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0)))) + _idx_), (_idx_), (_incomings_ [_idx_].d)); if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_ ].r = 7; } if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*) ((graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info ->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_ ++) { ((void) sizeof (((graph_destinations)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(graph_destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations )[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[ (graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0" , "ccv_nnc_graph_run.c", 1081, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(graph_destinations)[_i_].d].c > 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void *)(((char*)((graph->exec_info)->data)) + (size_t)(graph ->exec_info)->rsize * (size_t)(0)))) + (graph_destinations )[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations )[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while ( 0);; | ||||
| 1082 | _ccv_nnc_graph_from_move_transit(graph); | ||||
| 1083 | _ccv_nnc_graph_rewrap(graph); | ||||
| 1084 | } | ||||
| 1085 | } | ||||
| 1086 | } else { | ||||
| 1087 | graph->while_count = 0; | ||||
| 1088 | CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++ ) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph ) ; else __assert_fail ("(graph_sources)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_sources)[_i_].d].r = 1; _exists_[0 ][_i_] = (graph_sources)[_i_].d; } int _exist_size_[2] = { (graph_source_size ), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0 ) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue ; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph_run.c" , 1088, __extension__ __PRETTY_FUNCTION__); })); _incomings_[ (graph_sources)[_i_].d].r = 3; _exists_[0][_i_] = (graph_sources )[_i_].d; } _exist_size_[0] = (graph_source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char *)((graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)( (void*)(((char*)((graph->exec_info)->data)) + (size_t)( graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].r = 5; _exists_ [0][_i_] = (graph_destinations)[_i_].d; } _exist_size_[0] = ( graph_destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void ) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0) , __extension__ ({ if ((graph_destinations)[_i_].graph == graph ) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph_run.c" , 1088, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][ _i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph_source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_ [_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0)))) + _idx_), (_idx_), (_incomings_ [_idx_].d)); if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_ ].r = 7; } if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*) ((graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info ->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_ ++) { ((void) sizeof (((graph_destinations)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(graph_destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations )[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[ (graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0" , "ccv_nnc_graph_run.c", 1088, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(graph_destinations)[_i_].d].c > 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void *)(((char*)((graph->exec_info)->data)) + (size_t)(graph ->exec_info)->rsize * (size_t)(0)))) + (graph_destinations )[_i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations )[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while ( 0);; | ||||
| 1089 | } | ||||
| 1090 | #undef visitor | ||||
| 1091 | } | ||||
| 1092 | |||||
| 1093 | static int _ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, const int exec_idx, ccv_nnc_graph_exec_info_t* const exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
| 1094 | { | ||||
| 1095 | assert((sources == 0 && source_size == 0) || (sources && source_size))((void) sizeof (((sources == 0 && source_size == 0) || (sources && source_size)) ? 1 : 0), __extension__ ({ if ((sources == 0 && source_size == 0) || (sources && source_size)) ; else __assert_fail ("(sources == 0 && source_size == 0) || (sources && source_size)" , "ccv_nnc_graph_run.c", 1095, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 1096 | assert((destinations == 0 && destination_size == 0) || (destinations && destination_size))((void) sizeof (((destinations == 0 && destination_size == 0) || (destinations && destination_size)) ? 1 : 0 ), __extension__ ({ if ((destinations == 0 && destination_size == 0) || (destinations && destination_size)) ; else __assert_fail ("(destinations == 0 && destination_size == 0) || (destinations && destination_size)" , "ccv_nnc_graph_run.c", 1096, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 1097 | const ccv_nnc_graph_exec_t* const graph_sources = sources
->sources)->rsize * (size_t)(0))); | ||||
| 1098 | const int graph_source_size = source_size
| ||||
| 1099 | const ccv_nnc_graph_exec_t* const graph_destinations = destinations
)(graph->destinations)->rsize * (size_t)(0))); | ||||
| 1100 | const int graph_destination_size = destination_size
| ||||
| 1101 | int i; | ||||
| 1102 | for (i = 0; i < graph_source_size; i++) | ||||
| 1103 | if (graph_sources[i].graph != graph) | ||||
| 1104 | return CCV_NNC_EXEC_INVALID; | ||||
| 1105 | for (i = 0; i < graph_destination_size; i++) | ||||
| 1106 | if (graph_destinations[i].graph != graph) | ||||
| 1107 | return CCV_NNC_EXEC_INVALID; | ||||
| 1108 | // When topsorted is true, there is no memory allocation when run the graph. | ||||
| 1109 | const int topsorted = (!sources
| ||||
| 1110 | if (topsorted) | ||||
| 1111 | _ccv_nnc_graph_topsorted_run(graph, exec_idx, exec, flags, tensor_tape, stream_context); | ||||
| 1112 | else | ||||
| 1113 | _ccv_nnc_graph_run_slow_path(graph, exec_idx, exec, inputs, input_size, outputs, output_size, flags, sources, source_size, destinations, destination_size, tensor_tape, stream_context); | ||||
| 1114 | return CCV_NNC_EXEC_SUCCESS; | ||||
| 1115 | } | ||||
| 1116 | |||||
| 1117 | int ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
| 1118 | { | ||||
| 1119 | __atomic_store_n(&graph->run_state, CCV_NNC_GRAPH_STATE_RUNNING, __ATOMIC_RELEASE3); | ||||
| 1120 | if (stream_context && graph->topsorted && graph->stream_size > 0 && graph->default_schedule && source_size == 0 && destination_size == 0) | ||||
| |||||
| 1121 | { | ||||
| 1122 | co_scheduler_t* const scheduler = ccv_nnc_stream_context_get_scheduler(stream_context); | ||||
| 1123 | co_routine_t* const task = co_new(_ccv_nnc_graph_topsorted_run_coro, (graph, -1, graph->default_schedule, 0, tensor_tape, stream_context, flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size ())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params = { ._co_params = { graph, -1, graph->default_schedule, 0 , tensor_tape, stream_context, flags } }; task->fn = _ccv_nnc_graph_topsorted_run_coro ; task->line = 0; task->done = 0; task->root = 0; task ->other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params ) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); | ||||
| 1124 | co_schedule(scheduler, task); | ||||
| 1125 | // I don't need to worry about freeing this task, it will free itself at the end. | ||||
| 1126 | return CCV_NNC_EXEC_SUCCESS; | ||||
| 1127 | } else | ||||
| 1128 | return _ccv_nnc_graph_run(graph, -1, 0, 0, 0, 0, 0, flags, sources, source_size, destinations, destination_size, tensor_tape, 0 /* In this case, we don't support stream context yet. */); | ||||
| 1129 | } | ||||
| 1130 | |||||
| 1131 | int ccv_nnc_graph_run_with_schedule(ccv_nnc_graph_t* const graph, const int flags, const ccv_nnc_graph_static_schedule_t* const _schedule, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const _stream_context) | ||||
| 1132 | { | ||||
| 1133 | assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__ ({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted" , "ccv_nnc_graph_run.c", 1133, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 1134 | if (graph->exec_info->rnum == 0) | ||||
| 1135 | return CCV_NNC_EXEC_SUCCESS; | ||||
| 1136 | __atomic_store_n(&graph->run_state, CCV_NNC_GRAPH_STATE_RUNNING, __ATOMIC_RELEASE3); | ||||
| 1137 | assert(graph->stream_size > 0)((void) sizeof ((graph->stream_size > 0) ? 1 : 0), __extension__ ({ if (graph->stream_size > 0) ; else __assert_fail ("graph->stream_size > 0" , "ccv_nnc_graph_run.c", 1137, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 1138 | const ccv_nnc_graph_static_schedule_t* const schedule = _schedule ? _schedule : graph->default_schedule; | ||||
| 1139 | assert(schedule)((void) sizeof ((schedule) ? 1 : 0), __extension__ ({ if (schedule ) ; else __assert_fail ("schedule", "ccv_nnc_graph_run.c", 1139 , __extension__ __PRETTY_FUNCTION__); })); | ||||
| 1140 | assert(schedule->stream_0 < graph->stream_size)((void) sizeof ((schedule->stream_0 < graph->stream_size ) ? 1 : 0), __extension__ ({ if (schedule->stream_0 < graph ->stream_size) ; else __assert_fail ("schedule->stream_0 < graph->stream_size" , "ccv_nnc_graph_run.c", 1140, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| 1141 | ccv_nnc_stream_context_t* const stream_context = _stream_context ? _stream_context : graph->streams[schedule->stream_0]; | ||||
| 1142 | co_scheduler_t* const scheduler = ccv_nnc_stream_context_get_scheduler(stream_context); | ||||
| 1143 | co_routine_t* const task = co_new(_ccv_nnc_graph_topsorted_run_coro, (graph, -1, schedule, 0, tensor_tape, stream_context, flags))({ co_routine_t* const task = malloc((sizeof(co_routine_t) + _ccv_nnc_graph_topsorted_run_coro_stack_size ())); do { struct _ccv_nnc_graph_topsorted_run_coro_param_s params = { ._co_params = { graph, -1, schedule, 0, tensor_tape, stream_context , flags } }; task->fn = _ccv_nnc_graph_topsorted_run_coro; task->line = 0; task->done = 0; task->root = 0; task ->other_size = 0; task->notify_any = 0; task->others = 0; task->caller = 0; task->callee = 0; if (sizeof(params ) > 0) memcpy(task + 1, ¶ms, sizeof(params)); } while (0); task; }); | ||||
| 1144 | co_schedule(scheduler, task); | ||||
| 1145 | // I don't need to worry about freeing this task, it will free itself at the end. | ||||
| 1146 | if (!_stream_context) // If no stream context provided, this is a sync operation. | ||||
| 1147 | ccv_nnc_stream_context_wait(stream_context); | ||||
| 1148 | return CCV_NNC_EXEC_SUCCESS; | ||||
| 1149 | } | ||||
| 1150 | |||||
| 1151 | void ccv_nnc_graph_cancel(ccv_nnc_graph_t* const graph) | ||||
| 1152 | { | ||||
| 1153 | __atomic_store_n(&graph->run_state, CCV_NNC_GRAPH_STATE_CANCEL, __ATOMIC_RELEASE3); | ||||
| 1154 | } |