| File: | nnc/ccv_nnc_graph.c |
| Warning: | line 1523, column 44 Dereference of null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" | |||
| 2 | #include "ccv_nnc_easy.h" | |||
| 3 | #include "ccv_nnc_internal.h" | |||
| 4 | #include "ccv_internal.h" | |||
| 5 | #include "_ccv_nnc_graph.h" | |||
| 6 | ||||
| 7 | // MARK - Level-2 API | |||
| 8 | ||||
| 9 | ccv_nnc_graph_t* ccv_nnc_graph_new(void) | |||
| 10 | { | |||
| 11 | ccv_nnc_graph_t* graph = (ccv_nnc_graph_t*)cccalloccalloc(1, sizeof(ccv_nnc_graph_t)); | |||
| 12 | graph->exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), 5, 0); | |||
| 13 | return graph; | |||
| 14 | } | |||
| 15 | ||||
| 16 | void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size) | |||
| 17 | { | |||
| 18 | if (!graph->sources) | |||
| 19 | graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), source_size, 0); | |||
| 20 | else | |||
| 21 | ccv_array_clear(graph->sources); | |||
| 22 | int i; | |||
| 23 | for (i = 0; i < source_size; i++) | |||
| 24 | ccv_array_push(graph->sources, sources + i); | |||
| 25 | graph->topsorted = 0; | |||
| 26 | } | |||
| 27 | ||||
| 28 | ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph) | |||
| 29 | { | |||
| 30 | return graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(0))) : 0; | |||
| 31 | } | |||
| 32 | ||||
| 33 | int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph) | |||
| 34 | { | |||
| 35 | return graph->sources ? graph->sources->rnum : 0; | |||
| 36 | } | |||
| 37 | ||||
| 38 | void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size) | |||
| 39 | { | |||
| 40 | if (!graph->destinations) | |||
| 41 | graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_t), destination_size, 0); | |||
| 42 | else | |||
| 43 | ccv_array_clear(graph->sources); | |||
| 44 | int i; | |||
| 45 | for (i = 0; i < destination_size; i++) | |||
| 46 | ccv_array_push(graph->destinations, destinations + i); | |||
| 47 | graph->topsorted = 0; | |||
| 48 | } | |||
| 49 | ||||
| 50 | ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph) | |||
| 51 | { | |||
| 52 | return graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))) : 0; | |||
| 53 | } | |||
| 54 | ||||
| 55 | int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph) | |||
| 56 | { | |||
| 57 | return graph->destinations ? graph->destinations->rnum : 0; | |||
| 58 | } | |||
| 59 | ||||
| 60 | void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd) | |||
| 61 | { | |||
| 62 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 62, __extension__ __PRETTY_FUNCTION__); } )); | |||
| 63 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 63, __extension__ __PRETTY_FUNCTION__); } )); | |||
| 64 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
| 65 | exec_info->cmd = cmd; | |||
| 66 | } | |||
| 67 | ||||
| 68 | ccv_nnc_cmd_t ccv_nnc_graph_exec_cmd(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec) | |||
| 69 | { | |||
| 70 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 70, __extension__ __PRETTY_FUNCTION__); } )); | |||
| 71 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 71, __extension__ __PRETTY_FUNCTION__); } )); | |||
| 72 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
| 73 | return exec_info->cmd; | |||
| 74 | } | |||
| 75 | ||||
| 76 | void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint) | |||
| 77 | { | |||
| 78 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 78, __extension__ __PRETTY_FUNCTION__); } )); | |||
| 79 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 79, __extension__ __PRETTY_FUNCTION__); } )); | |||
| 80 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
| 81 | exec_info->hint = hint; | |||
| 82 | } | |||
| 83 | ||||
| 84 | static int _ccv_nnc_tensor_multiview_level_count(const ccv_nnc_tensor_multiview_t* const mv) | |||
| 85 | { | |||
| 86 | if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) | |||
| 87 | return 1; | |||
| 88 | const int count = mv->kind + mv->repeat; | |||
| 89 | int i, c = 0; | |||
| 90 | for (i = 0; i < count; i++) | |||
| 91 | { | |||
| 92 | ccv_nnc_tensor_t* tv = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i]; | |||
| 93 | if (tv == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10))) | |||
| 94 | c = ccv_max(c, 1)({ typeof (c) _a = (c); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 95 | else | |||
| 96 | c = ccv_max(c, _ccv_nnc_tensor_multiview_level_count((ccv_nnc_tensor_multiview_t*)tv))({ typeof (c) _a = (c); typeof (_ccv_nnc_tensor_multiview_level_count ((ccv_nnc_tensor_multiview_t*)tv)) _b = (_ccv_nnc_tensor_multiview_level_count ((ccv_nnc_tensor_multiview_t*)tv)); (_a > _b) ? _a : _b; } ); | |||
| 97 | } | |||
| 98 | return c + 1; | |||
| 99 | } | |||
| 100 | ||||
| 101 | static ccv_nnc_graph_tensor_wrap_t* _ccv_nnc_graph_tensor_wrap_new(const ccv_nnc_tensor_multiview_t* const mv) | |||
| 102 | { | |||
| 103 | const int level_count = _ccv_nnc_tensor_multiview_level_count(mv); | |||
| 104 | ccv_nnc_graph_tensor_wrap_t* tensor_wrap = (ccv_nnc_graph_tensor_wrap_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_tensor_wrap_t) + sizeof(ccv_nnc_tensor_t*) * (level_count - 1)); | |||
| 105 | tensor_wrap->update_required = 0; | |||
| 106 | tensor_wrap->count = level_count; | |||
| 107 | tensor_wrap->index = 0; | |||
| 108 | tensor_wrap->tensors[0] = (ccv_nnc_tensor_t*)mv; | |||
| 109 | return tensor_wrap; | |||
| 110 | } | |||
| 111 | ||||
| 112 | static void _ccv_nnc_graph_exec_rewind(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph) | |||
| 113 | { | |||
| 114 | if (!info->tensor_wraps_ref) | |||
| 115 | return; | |||
| 116 | int i; | |||
| 117 | assert(info->tensor_wraps_ref <= graph->tensor_wraps->rnum)((void) sizeof ((info->tensor_wraps_ref <= graph->tensor_wraps ->rnum) ? 1 : 0), __extension__ ({ if (info->tensor_wraps_ref <= graph->tensor_wraps->rnum) ; else __assert_fail ( "info->tensor_wraps_ref <= graph->tensor_wraps->rnum" , "ccv_nnc_graph.c", 117, __extension__ __PRETTY_FUNCTION__); })); | |||
| 118 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref - 1)));; | |||
| 119 | // Rewind from tensor wraps. | |||
| 120 | for (i = 0; i < info->input_size; i++) | |||
| 121 | if (tensor_wrap_array->tensor_wraps[i]) | |||
| 122 | info->inputs[i] = tensor_wrap_array->tensor_wraps[i]->tensors[0]; | |||
| 123 | const int d = info->input_size; | |||
| 124 | for (i = 0; i < info->output_size; i++) | |||
| 125 | if (tensor_wrap_array->tensor_wraps[d + i]) | |||
| 126 | info->outputs[i] = tensor_wrap_array->tensor_wraps[d + i]->tensors[0]; | |||
| 127 | const int dd = info->input_size + info->output_size; | |||
| 128 | for (i = 0; i < info->update_size; i++) | |||
| 129 | if (tensor_wrap_array->tensor_wraps[dd + i]) | |||
| 130 | info->updates[i] = tensor_wrap_array->tensor_wraps[dd + i]->tensors[0]; | |||
| 131 | } | |||
| 132 | ||||
| 133 | static void _ccv_nnc_graph_tensor_wrap_free(ccv_nnc_graph_tensor_wrap_t* const tensor_wrap) | |||
| 134 | { | |||
| 135 | ccfreefree(tensor_wrap); | |||
| 136 | } | |||
| 137 | ||||
| 138 | ccv_nnc_graph_tensor_wrap_array_t* ccv_nnc_get_tensor_wrap_array(ccv_nnc_graph_t* const graph, const int tensor_wrap_size, int* const tensor_wraps_ref) | |||
| 139 | { | |||
| 140 | ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = *tensor_wraps_ref ? (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, *tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(*tensor_wraps_ref - 1))) : 0; | |||
| 141 | // Otherwise, find an open slot. | |||
| 142 | if (!tensor_wrap_array_ref) | |||
| 143 | { | |||
| 144 | if (!graph->tensor_wraps) | |||
| 145 | graph->tensor_wraps = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wrap_array_t*), 0, 0); | |||
| 146 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = 0; | |||
| 147 | ccv_array_push(graph->tensor_wraps, &tensor_wrap_array); | |||
| 148 | tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, graph->tensor_wraps->rnum - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(graph->tensor_wraps ->rnum - 1))); | |||
| 149 | *tensor_wraps_ref = graph->tensor_wraps->rnum; | |||
| 150 | } | |||
| 151 | int i; | |||
| 152 | if (*tensor_wrap_array_ref) | |||
| 153 | { | |||
| 154 | if ((*tensor_wrap_array_ref)->size != tensor_wrap_size) | |||
| 155 | *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)ccreallocrealloc(*tensor_wrap_array_ref, sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1)); | |||
| 156 | for (i = (*tensor_wrap_array_ref)->size; i < tensor_wrap_size; i++) | |||
| 157 | (*tensor_wrap_array_ref)->tensor_wraps[i] = 0; | |||
| 158 | } else | |||
| 159 | *tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t*)cccalloccalloc(sizeof(ccv_nnc_graph_tensor_wrap_array_t) + sizeof(ccv_nnc_graph_tensor_wrap_t*) * (tensor_wrap_size - 1), 1); | |||
| 160 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref; | |||
| 161 | tensor_wrap_array->size = tensor_wrap_size; | |||
| 162 | return tensor_wrap_array; | |||
| 163 | } | |||
| 164 | ||||
| 165 | void ccv_nnc_set_tensor_wraps(ccv_nnc_graph_tensor_wrap_t** const tensor_wraps, ccv_nnc_tensor_t* const* const tensors, const int tensor_size) | |||
| 166 | { | |||
| 167 | int i; | |||
| 168 | for (i = 0; i < tensor_size; i++) | |||
| 169 | if (tensors[i]) | |||
| 170 | { | |||
| 171 | if (CCV_IS_TENSOR_MULTIVIEW(tensors[i])((*(int*)(tensors[i])) & CCV_TENSOR_MULTIVIEW) && | |||
| 172 | ((ccv_nnc_tensor_multiview_t*)tensors[i])->anchor != CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1) | |||
| 173 | { | |||
| 174 | if (!tensor_wraps[i] || tensors[i] != tensor_wraps[i]->tensors[0]) | |||
| 175 | { | |||
| 176 | if (tensor_wraps[i]) | |||
| 177 | _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]); | |||
| 178 | tensor_wraps[i] = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)tensors[i]); | |||
| 179 | } | |||
| 180 | } else { | |||
| 181 | if (tensor_wraps[i]) | |||
| 182 | _ccv_nnc_graph_tensor_wrap_free(tensor_wraps[i]); | |||
| 183 | tensor_wraps[i] = 0; | |||
| 184 | } | |||
| 185 | } | |||
| 186 | } | |||
| 187 | ||||
| 188 | void ccv_nnc_graph_register_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d) | |||
| 189 | { | |||
| 190 | ccv_nnc_graph_t* p = graph; | |||
| 191 | const ccv_nnc_graph_tensor_wraps_ref_t tensor_wraps_ref = { | |||
| 192 | .d = tensor_wraps_ref_d, | |||
| 193 | .graph = graph, | |||
| 194 | }; | |||
| 195 | do { | |||
| 196 | if (!p->tensor_wraps_refs) | |||
| 197 | { | |||
| 198 | p->tensor_wraps_refs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_wraps_ref_t), 0, 0); | |||
| 199 | ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref); | |||
| 200 | } else { | |||
| 201 | int i; | |||
| 202 | int has_tensor_wraps_ref = 0; | |||
| 203 | for (i = 0; !has_tensor_wraps_ref && i < p->tensor_wraps_refs->rnum; i++) | |||
| 204 | { | |||
| 205 | ccv_nnc_graph_tensor_wraps_ref_t* tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t )(p->tensor_wraps_refs)->rsize * (size_t)(i))); | |||
| 206 | has_tensor_wraps_ref = (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph); | |||
| 207 | } | |||
| 208 | if (!has_tensor_wraps_ref) | |||
| 209 | ccv_array_push(p->tensor_wraps_refs, &tensor_wraps_ref); | |||
| 210 | } | |||
| 211 | p = p->p; | |||
| 212 | } while (p); | |||
| 213 | } | |||
| 214 | ||||
| 215 | static void _ccv_nnc_graph_redo_tensor_wraps(ccv_nnc_graph_exec_info_t* const info, ccv_nnc_graph_t* const graph) | |||
| 216 | { | |||
| 217 | int i; | |||
| 218 | const int has_wrap = ccv_nnc_tensors_have_wraps(info->inputs, info->input_size) || | |||
| 219 | ccv_nnc_tensors_have_wraps(info->outputs, info->output_size) || | |||
| 220 | ccv_nnc_tensors_have_wraps(info->updates, info->update_size); | |||
| 221 | if (has_wrap) | |||
| 222 | { | |||
| 223 | const int tensor_wrap_size = info->input_size + info->output_size + info->update_size; | |||
| 224 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = ccv_nnc_get_tensor_wrap_array(graph, tensor_wrap_size, &info->tensor_wraps_ref); | |||
| 225 | ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps, info->inputs, info->input_size); | |||
| 226 | const int d = info->input_size; | |||
| 227 | ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + d, info->outputs, info->output_size); | |||
| 228 | const int dd = info->input_size + info->output_size; | |||
| 229 | ccv_nnc_set_tensor_wraps(tensor_wrap_array->tensor_wraps + dd, info->updates, info->update_size); | |||
| 230 | } else if (info->tensor_wraps_ref) { | |||
| 231 | ccv_nnc_graph_tensor_wrap_array_t** tensor_wrap_array_ref = (ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(info->tensor_wraps_ref - 1))); | |||
| 232 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *tensor_wrap_array_ref; | |||
| 233 | if (tensor_wrap_array) | |||
| 234 | { | |||
| 235 | for (i = 0; i < tensor_wrap_array->size; i++) | |||
| 236 | if (tensor_wrap_array->tensor_wraps[i]) | |||
| 237 | _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[i]); | |||
| 238 | ccfreefree(tensor_wrap_array); | |||
| 239 | *tensor_wrap_array_ref = 0; | |||
| 240 | info->tensor_wraps_ref = 0; | |||
| 241 | } | |||
| 242 | } | |||
| 243 | } | |||
| 244 | ||||
| 245 | static void _ccv_nnc_graph_deregister_tensor_wraps(ccv_nnc_graph_t* graph, const int tensor_wraps_ref_d) | |||
| 246 | { | |||
| 247 | ccv_nnc_graph_t* p = graph; | |||
| 248 | do { | |||
| 249 | int i; | |||
| 250 | // Remove from the array. | |||
| 251 | if (p->tensor_wraps_refs) | |||
| 252 | for (i = 0; i < p->tensor_wraps_refs->rnum; i++) | |||
| 253 | { | |||
| 254 | ccv_nnc_graph_tensor_wraps_ref_t* const tensor_wraps_ref = (ccv_nnc_graph_tensor_wraps_ref_t*)ccv_array_get(p->tensor_wraps_refs, i)((void*)(((char*)((p->tensor_wraps_refs)->data)) + (size_t )(p->tensor_wraps_refs)->rsize * (size_t)(i))); | |||
| 255 | if (tensor_wraps_ref->d == tensor_wraps_ref_d && tensor_wraps_ref->graph == graph) | |||
| 256 | { | |||
| 257 | --p->tensor_wraps_refs->rnum; | |||
| 258 | if (i < p->tensor_wraps_refs->rnum) | |||
| 259 | memcpy(tensor_wraps_ref, tensor_wraps_ref + 1, sizeof(ccv_nnc_graph_exec_t) * (p->tensor_wraps_refs->rnum - i)); | |||
| 260 | break; | |||
| 261 | } | |||
| 262 | } | |||
| 263 | p = p->p; | |||
| 264 | } while (p); | |||
| 265 | } | |||
| 266 | ||||
| 267 | void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size) | |||
| 268 | { | |||
| 269 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 269, __extension__ __PRETTY_FUNCTION__); })); | |||
| 270 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 270, __extension__ __PRETTY_FUNCTION__); })); | |||
| 271 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
| 272 | assert(input_flag_size <= info->input_size)((void) sizeof ((input_flag_size <= info->input_size) ? 1 : 0), __extension__ ({ if (input_flag_size <= info-> input_size) ; else __assert_fail ("input_flag_size <= info->input_size" , "ccv_nnc_graph.c", 272, __extension__ __PRETTY_FUNCTION__); })); | |||
| 273 | assert(output_flag_size <= info->output_size)((void) sizeof ((output_flag_size <= info->output_size) ? 1 : 0), __extension__ ({ if (output_flag_size <= info-> output_size) ; else __assert_fail ("output_flag_size <= info->output_size" , "ccv_nnc_graph.c", 273, __extension__ __PRETTY_FUNCTION__); })); | |||
| 274 | if (info->input_size + info->output_size == 0) | |||
| 275 | return; | |||
| 276 | if (!info->input_flags) | |||
| 277 | { | |||
| 278 | info->input_flags = (int*)cccalloccalloc(info->input_size + info->output_size, sizeof(int)); | |||
| 279 | info->output_flags = info->input_flags + info->input_size; | |||
| 280 | } | |||
| 281 | if (input_flag_size > 0) | |||
| 282 | memcpy(info->input_flags, input_flags, sizeof(int) * input_flag_size); | |||
| 283 | if (output_flag_size > 0) | |||
| 284 | memcpy(info->output_flags, output_flags, sizeof(int) * output_flag_size); | |||
| 285 | } | |||
| 286 | ||||
| 287 | void ccv_nnc_graph_exec_pair_with(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t pair_exec) | |||
| 288 | { | |||
| 289 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 289, __extension__ __PRETTY_FUNCTION__); })); | |||
| 290 | assert(exec.d >= 0)((void) sizeof ((exec.d >= 0) ? 1 : 0), __extension__ ({ if (exec.d >= 0) ; else __assert_fail ("exec.d >= 0", "ccv_nnc_graph.c" , 290, __extension__ __PRETTY_FUNCTION__); })); | |||
| 291 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 291, __extension__ __PRETTY_FUNCTION__); })); | |||
| 292 | assert(pair_exec.graph == graph || pair_exec.graph == graph->pair)((void) sizeof ((pair_exec.graph == graph || pair_exec.graph == graph->pair) ? 1 : 0), __extension__ ({ if (pair_exec.graph == graph || pair_exec.graph == graph->pair) ; else __assert_fail ("pair_exec.graph == graph || pair_exec.graph == graph->pair" , "ccv_nnc_graph.c", 292, __extension__ __PRETTY_FUNCTION__); })); | |||
| 293 | assert(pair_exec.d >= 0)((void) sizeof ((pair_exec.d >= 0) ? 1 : 0), __extension__ ({ if (pair_exec.d >= 0) ; else __assert_fail ("pair_exec.d >= 0" , "ccv_nnc_graph.c", 293, __extension__ __PRETTY_FUNCTION__); })); | |||
| 294 | if (pair_exec.graph == graph) | |||
| 295 | { assert(pair_exec.d < graph->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph->exec_info ->rnum) ; else __assert_fail ("pair_exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 295, __extension__ __PRETTY_FUNCTION__); })); } | |||
| 296 | else | |||
| 297 | { assert(pair_exec.d < graph->pair->exec_info->rnum)((void) sizeof ((pair_exec.d < graph->pair->exec_info ->rnum) ? 1 : 0), __extension__ ({ if (pair_exec.d < graph ->pair->exec_info->rnum) ; else __assert_fail ("pair_exec.d < graph->pair->exec_info->rnum" , "ccv_nnc_graph.c", 297, __extension__ __PRETTY_FUNCTION__); })); } | |||
| 298 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
| 299 | exec_info->pair_ref = pair_exec.d + 1; | |||
| 300 | } | |||
| 301 | ||||
| 302 | static ccv_nnc_tensor_t* _ccv_nnc_any_tensor_from_tensor_multiview(ccv_nnc_tensor_multiview_t* const mv) | |||
| 303 | { | |||
| 304 | ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv; | |||
| 305 | while (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW)) | |||
| 306 | { | |||
| 307 | ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor; | |||
| 308 | const int count = 0; | |||
| 309 | const int off = mv->kind; | |||
| 310 | const int mod = mv->repeat; | |||
| 311 | // If reached the root. | |||
| 312 | tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[count >= off ? ((count - off) % mod) + off : count]; // Unwrap. | |||
| 313 | } | |||
| 314 | return tensor; | |||
| 315 | } | |||
| 316 | ||||
| 317 | void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
| 318 | { | |||
| 319 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 319, __extension__ __PRETTY_FUNCTION__); })); | |||
| 320 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 320, __extension__ __PRETTY_FUNCTION__); })); | |||
| 321 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
| 322 | // De-register from the graph if it contains multiview tensors. | |||
| 323 | if (info->tensor_wraps_ref) | |||
| 324 | _ccv_nnc_graph_deregister_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
| 325 | // In case it is already executed, rewind. | |||
| 326 | _ccv_nnc_graph_exec_rewind(info, graph); | |||
| 327 | if (input_size == 0 && output_size == 0) | |||
| 328 | { | |||
| 329 | if (info->input_size > 0 || info->output_size > 0) | |||
| 330 | ccfreefree(info->inputs); | |||
| 331 | info->inputs = 0; | |||
| 332 | info->outputs = 0; | |||
| 333 | info->input_size = 0; | |||
| 334 | info->output_size = 0; | |||
| 335 | _ccv_nnc_graph_redo_tensor_wraps(info, graph); | |||
| 336 | if (info->tensor_wraps_ref) | |||
| 337 | ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
| 338 | return; | |||
| 339 | } | |||
| 340 | if (info->inputs) | |||
| 341 | info->inputs = (ccv_nnc_tensor_t**)ccreallocrealloc(info->inputs, sizeof(ccv_nnc_tensor_t*) * (input_size + output_size)); | |||
| 342 | else | |||
| 343 | info->inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size)); | |||
| 344 | info->outputs = info->inputs + input_size; | |||
| 345 | if (inputs) | |||
| 346 | memcpy(info->inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size); | |||
| 347 | if (outputs) | |||
| 348 | memcpy(info->outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size); | |||
| 349 | int i; | |||
| 350 | int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0; | |||
| 351 | for (i = 0; i < input_size + output_size; i++) | |||
| 352 | if (info->inputs[i]) | |||
| 353 | { | |||
| 354 | ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i])((*(int*)(info->inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i]; | |||
| 355 | tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype)((tensor->info.datatype) & 0xFF000); | |||
| 356 | } | |||
| 357 | info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes); | |||
| 358 | info->input_size = input_size; | |||
| 359 | info->output_size = output_size; | |||
| 360 | _ccv_nnc_graph_redo_tensor_wraps(info, graph); | |||
| 361 | // Register again if the tensor wraps exist. | |||
| 362 | if (info->tensor_wraps_ref) | |||
| 363 | ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
| 364 | // Free flags. | |||
| 365 | if (info->input_flags) | |||
| 366 | { | |||
| 367 | ccfreefree(info->input_flags); | |||
| 368 | info->input_flags = info->output_flags = 0; | |||
| 369 | } | |||
| 370 | } | |||
| 371 | ||||
| 372 | void ccv_nnc_graph_exec_add_as_affected(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update) | |||
| 373 | { | |||
| 374 | assert(CCV_IS_TENSOR_MULTIVIEW(update))((void) sizeof ((((*(int*)(update)) & CCV_TENSOR_MULTIVIEW )) ? 1 : 0), __extension__ ({ if (((*(int*)(update)) & CCV_TENSOR_MULTIVIEW )) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(update)", "ccv_nnc_graph.c" , 374, __extension__ __PRETTY_FUNCTION__); })); | |||
| 375 | assert(exec.d < graph->exec_info->rnum)((void) sizeof ((exec.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (exec.d < graph->exec_info-> rnum) ; else __assert_fail ("exec.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 375, __extension__ __PRETTY_FUNCTION__); })); | |||
| 376 | assert(exec.graph == graph)((void) sizeof ((exec.graph == graph) ? 1 : 0), __extension__ ({ if (exec.graph == graph) ; else __assert_fail ("exec.graph == graph" , "ccv_nnc_graph.c", 376, __extension__ __PRETTY_FUNCTION__); })); | |||
| 377 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, exec.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(exec.d))); | |||
| 378 | const int register_tensor_wraps = !info->tensor_wraps_ref; | |||
| 379 | const int update_index = info->update_size; | |||
| 380 | ++info->update_size; | |||
| 381 | if (info->updates) | |||
| 382 | info->updates = (ccv_nnc_tensor_t**)ccreallocrealloc(info->updates, sizeof(ccv_nnc_tensor_t*) * info->update_size); | |||
| 383 | else | |||
| 384 | info->updates = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * info->update_size); | |||
| 385 | info->updates[update_index] = update; | |||
| 386 | _ccv_nnc_graph_redo_tensor_wraps(info, graph); | |||
| 387 | if (register_tensor_wraps) | |||
| 388 | ccv_nnc_graph_register_tensor_wraps(graph, info->tensor_wraps_ref - 1); | |||
| 389 | } | |||
| 390 | ||||
| 391 | ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
| 392 | { | |||
| 393 | int d = graph->exec_info->rnum; | |||
| 394 | ccv_nnc_graph_exec_info_t info = { | |||
| 395 | .cmd = cmd, | |||
| 396 | .hint = hint, | |||
| 397 | .input_size = input_size, | |||
| 398 | .output_size = output_size, | |||
| 399 | }; | |||
| 400 | assert(inputs || input_size == 0)((void) sizeof ((inputs || input_size == 0) ? 1 : 0), __extension__ ({ if (inputs || input_size == 0) ; else __assert_fail ("inputs || input_size == 0" , "ccv_nnc_graph.c", 400, __extension__ __PRETTY_FUNCTION__); })); | |||
| 401 | assert(outputs || output_size == 0)((void) sizeof ((outputs || output_size == 0) ? 1 : 0), __extension__ ({ if (outputs || output_size == 0) ; else __assert_fail ("outputs || output_size == 0" , "ccv_nnc_graph.c", 401, __extension__ __PRETTY_FUNCTION__); })); | |||
| 402 | if (input_size > 0 || output_size > 0) | |||
| 403 | { | |||
| 404 | info.inputs = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * (input_size + output_size)); | |||
| 405 | info.outputs = info.inputs + input_size; | |||
| 406 | if (inputs) | |||
| 407 | memcpy(info.inputs, inputs, sizeof(ccv_nnc_tensor_t*) * input_size); | |||
| 408 | if (outputs) | |||
| 409 | memcpy(info.outputs, outputs, sizeof(ccv_nnc_tensor_t*) * output_size); | |||
| 410 | info.input_size = input_size; | |||
| 411 | info.output_size = output_size; | |||
| 412 | int i; | |||
| 413 | int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0; | |||
| 414 | for (i = 0; i < input_size + output_size; i++) | |||
| 415 | if (info.inputs[i]) | |||
| 416 | { | |||
| 417 | ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i])((*(int*)(info.inputs[i])) & CCV_TENSOR_MULTIVIEW) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i]; | |||
| 418 | tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype)((tensor->info.datatype) & 0xFF000); | |||
| 419 | } | |||
| 420 | info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes); | |||
| 421 | } | |||
| 422 | _ccv_nnc_graph_redo_tensor_wraps(&info, graph); | |||
| 423 | // Add itself to the graph's wraps array, this will help the run time when we run the graph and do unwrapping. | |||
| 424 | if (info.tensor_wraps_ref) | |||
| 425 | ccv_nnc_graph_register_tensor_wraps(graph, info.tensor_wraps_ref - 1); | |||
| 426 | ccv_array_push(graph->exec_info, &info); | |||
| 427 | return (ccv_nnc_graph_exec_t){ | |||
| 428 | .d = d, | |||
| 429 | .graph = graph, | |||
| 430 | }; | |||
| 431 | } | |||
| 432 | ||||
| 433 | void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to) | |||
| 434 | { | |||
| 435 | ccv_nnc_graph_tensor_carry_over_t carry_over = { | |||
| 436 | .from = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)from), | |||
| 437 | .to = _ccv_nnc_graph_tensor_wrap_new((ccv_nnc_tensor_multiview_t*)to) | |||
| 438 | }; | |||
| 439 | if (!graph->carry_overs) | |||
| 440 | graph->carry_overs = ccv_array_new(sizeof(ccv_nnc_graph_tensor_carry_over_t), 0, 0); | |||
| 441 | ccv_array_push(graph->carry_overs, &carry_over); | |||
| 442 | } | |||
| 443 | ||||
| 444 | int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination) | |||
| 445 | { | |||
| 446 | assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__ ({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph" , "ccv_nnc_graph.c", 446, __extension__ __PRETTY_FUNCTION__); })); | |||
| 447 | assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__ ({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph" , "ccv_nnc_graph.c", 447, __extension__ __PRETTY_FUNCTION__); })); | |||
| 448 | assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (source.d < graph->exec_info ->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 448, __extension__ __PRETTY_FUNCTION__); })); | |||
| 449 | assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (destination.d < graph-> exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 449, __extension__ __PRETTY_FUNCTION__); })); | |||
| 450 | ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(source.d))); | |||
| 451 | if (src_info->outgoings == 0) | |||
| 452 | src_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0); | |||
| 453 | else { | |||
| 454 | int i; | |||
| 455 | // Check if this is already connected, if so, skip. | |||
| 456 | for (i = 0; i < src_info->outgoings->rnum; i++) | |||
| 457 | if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(i))) == destination.d) | |||
| 458 | return -1; | |||
| 459 | } | |||
| 460 | ccv_array_push(src_info->outgoings, &destination.d); | |||
| 461 | graph->topsorted = 0; | |||
| 462 | return 0; | |||
| 463 | } | |||
| 464 | ||||
| 465 | int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination) | |||
| 466 | { | |||
| 467 | assert(graph == source.graph)((void) sizeof ((graph == source.graph) ? 1 : 0), __extension__ ({ if (graph == source.graph) ; else __assert_fail ("graph == source.graph" , "ccv_nnc_graph.c", 467, __extension__ __PRETTY_FUNCTION__); })); | |||
| 468 | assert(graph == destination.graph)((void) sizeof ((graph == destination.graph) ? 1 : 0), __extension__ ({ if (graph == destination.graph) ; else __assert_fail ("graph == destination.graph" , "ccv_nnc_graph.c", 468, __extension__ __PRETTY_FUNCTION__); })); | |||
| 469 | assert(source.d < graph->exec_info->rnum)((void) sizeof ((source.d < graph->exec_info->rnum) ? 1 : 0), __extension__ ({ if (source.d < graph->exec_info ->rnum) ; else __assert_fail ("source.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 469, __extension__ __PRETTY_FUNCTION__); })); | |||
| 470 | assert(destination.d < graph->exec_info->rnum)((void) sizeof ((destination.d < graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (destination.d < graph-> exec_info->rnum) ; else __assert_fail ("destination.d < graph->exec_info->rnum" , "ccv_nnc_graph.c", 470, __extension__ __PRETTY_FUNCTION__); })); | |||
| 471 | ccv_nnc_graph_exec_info_t* src_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, source.d)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(source.d))); | |||
| 472 | if (!src_info->outgoings) | |||
| 473 | return -1; | |||
| 474 | int i; | |||
| 475 | // Check if this is already connected, if so, skip. | |||
| 476 | for (i = 0; i < src_info->outgoings->rnum; i++) | |||
| 477 | if (*(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(i))) == destination.d) | |||
| 478 | { | |||
| 479 | if (i < src_info->outgoings->rnum - 1) | |||
| 480 | *(int*)ccv_array_get(src_info->outgoings, i)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(i))) = *(int*)ccv_array_get(src_info->outgoings, src_info->outgoings->rnum - 1)((void*)(((char*)((src_info->outgoings)->data)) + (size_t )(src_info->outgoings)->rsize * (size_t)(src_info->outgoings ->rnum - 1))); | |||
| 481 | --src_info->outgoings->rnum; | |||
| 482 | graph->topsorted = 0; | |||
| 483 | return 0; | |||
| 484 | } | |||
| 485 | return -1; | |||
| 486 | } | |||
| 487 | ||||
| 488 | int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph) | |||
| 489 | { | |||
| 490 | return graph->exec_info ? graph->exec_info->rnum : 0; | |||
| 491 | } | |||
| 492 | ||||
| 493 | void* ccv_nnc_graph_buffer(ccv_nnc_graph_t* const graph, int size) | |||
| 494 | { | |||
| 495 | if (graph->buffer_size >= size) | |||
| 496 | return graph->buffer; | |||
| 497 | graph->buffer_size = size; | |||
| 498 | graph->buffer = (graph->buffer) ? ccreallocrealloc(graph->buffer, size) : ccmallocmalloc(size); | |||
| 499 | return graph->buffer; | |||
| 500 | } | |||
| 501 | ||||
| 502 | void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size) | |||
| 503 | { | |||
| 504 | if (exec_cvt_size == 0 && graph->exec_info->rnum == 0) | |||
| 505 | { | |||
| 506 | graph->topsorted = 1; | |||
| 507 | return; | |||
| 508 | } | |||
| 509 | assert(exec_cvt_size == graph->exec_info->rnum)((void) sizeof ((exec_cvt_size == graph->exec_info->rnum ) ? 1 : 0), __extension__ ({ if (exec_cvt_size == graph->exec_info ->rnum) ; else __assert_fail ("exec_cvt_size == graph->exec_info->rnum" , "ccv_nnc_graph.c", 509, __extension__ __PRETTY_FUNCTION__); })); | |||
| 510 | assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources ->rnum) ? 1 : 0), __extension__ ({ if (graph->sources && graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum" , "ccv_nnc_graph.c", 510, __extension__ __PRETTY_FUNCTION__); })); | |||
| 511 | assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph-> destinations->rnum) ? 1 : 0), __extension__ ({ if (graph-> destinations && graph->destinations->rnum) ; else __assert_fail ("graph->destinations && graph->destinations->rnum" , "ccv_nnc_graph.c", 511, __extension__ __PRETTY_FUNCTION__); })); | |||
| 512 | int i, j; | |||
| 513 | for (i = 0; i < exec_cvt_size; i++) | |||
| 514 | exec_cvt[i] = -1; | |||
| 515 | ccv_array_t* exec_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_info_t), graph->exec_info->rnum, 0); | |||
| 516 | // If there are breakpoints, it is more complicated, we first start to the breakpoints, and then continue from the breakpoints to the destinations. | |||
| 517 | if (graph->breakpoint_size) | |||
| 518 | { | |||
| 519 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, graph->breakpoints, graph->breakpoint_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->sources)->data)) + (size_t)(graph-> sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)((( char*)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_ [2] = { (graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void ) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ( { if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources )->data)) + (size_t)(graph->sources)->rsize * (size_t )(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_ ; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_ [d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph-> exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph ->breakpoint_size); _i_++) { ((void) sizeof (((graph->breakpoints )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph-> breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].r = 5; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { (( void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph ->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->breakpoint_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void *)(((char*)((graph->exec_info)->data)) + (size_t)(graph ->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings-> rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->breakpoint_size)) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_ ++) { ((void) sizeof (((graph->breakpoints)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_] .graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(graph->breakpoints)[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_[(graph-> breakpoints)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_ [(graph->breakpoints)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph->breakpoints)[_i_].d].c == 0", "ccv_nnc_graph.c" , 519, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_ [(graph->breakpoints)[_i_].d].c > 0) continue; _visit_-> node[_visit_->size].index = (((graph->breakpoints)[_i_] .d)); _visit_->node[_visit_->size].term = ((_incomings_ [(graph->breakpoints)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->size <= (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph-> exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 519, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
| 520 | for (i = 0; i < graph->breakpoint_size; i++) | |||
| 521 | exec_cvt[graph->breakpoints[i].d] = -2; // Mark this as breakpoints, so we will skip the first round. | |||
| 522 | ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))) const node __attribute__ ((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(( graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0)))) + idx; { | |||
| 523 | assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ ( { if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref" , "ccv_nnc_graph.c", 523, __extension__ __PRETTY_FUNCTION__); })); // If node has a pair ref, we cannot fix it up. | |||
| 524 | if (exec_cvt[idx] == -2) // Skip breakpoint. | |||
| 525 | continue; | |||
| 526 | // Loop over node and push to the array. | |||
| 527 | ccv_array_push(exec_info, node); | |||
| 528 | // Go to its sub-graph to fix exec_idx | |||
| 529 | for (i = 0; i < node->graph_ref_size; i++) | |||
| 530 | { | |||
| 531 | const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node )->_inline_graph_ref)[i] - 1; | |||
| 532 | if (graph_ref >= 0) | |||
| 533 | { | |||
| 534 | ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(graph_ref))); | |||
| 535 | sub_graph->exec_idx = exec_info->rnum; | |||
| 536 | } | |||
| 537 | } | |||
| 538 | exec_cvt[idx] = exec_info->rnum - 1; | |||
| 539 | } ccv_nnc_graph_visit_endfor} } | |||
| 540 | ccv_nnc_graph_visit_free(visit); | |||
| 541 | graph->breakpoint_offset = exec_info->rnum; | |||
| 542 | visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph->breakpoints, graph->breakpoint_size, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph->breakpoint_size ); _i_++) { ((void) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints )[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].r = 1; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } int _exist_size_ [2] = { (graph->breakpoint_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void ) sizeof (((graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph->breakpoints)[_i_].d].r = 3; _exists_ [0][_i_] = (graph->breakpoints)[_i_].d; } _exist_size_[0] = (graph->breakpoint_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_ ].r != 3) continue; _incomings_[_idx_].r = 4; if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> destinations)->data)) + (size_t)(graph->destinations)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r = 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations )->data)) + (size_t)(graph->destinations)->rsize * ( size_t)(0))))[_i_].d; } _exist_size_[0] = (graph->destinations ->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> destinations)->data)) + (size_t)(graph->destinations)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph->breakpoint_size); _i_++) { ((void) sizeof (( (graph->breakpoints)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph->breakpoints)[_i_].graph == graph) ; else __assert_fail ("(graph->breakpoints)[_i_].graph == graph", "ccv_nnc_graph.c" , 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_ ] = (graph->breakpoints)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph->breakpoint_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->destinations->rnum)) { _exists_[_p_][_i_] = d; continue ; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((( (ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph->destinations->rnum)) { ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (graph->destinations->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void *)(((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r == 7) continue; if (!( 0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)(( void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0" , "ccv_nnc_graph.c", 542, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d)) ; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_) ; } while (0);; ((void) sizeof ((_visit_->size <= (graph ->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (graph->exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 542, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
| 543 | ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))) const node __attribute__ ((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(( graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0)))) + idx; { | |||
| 544 | assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ ( { if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref" , "ccv_nnc_graph.c", 544, __extension__ __PRETTY_FUNCTION__); })); // If node has a pair ref, we cannot fix it up. | |||
| 545 | // Loop over node and push to the array. | |||
| 546 | ccv_array_push(exec_info, node); | |||
| 547 | // Go to its sub-graph to fix exec_idx | |||
| 548 | for (i = 0; i < node->graph_ref_size; i++) | |||
| 549 | { | |||
| 550 | const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node )->_inline_graph_ref)[i] - 1; | |||
| 551 | if (graph_ref >= 0) | |||
| 552 | { | |||
| 553 | ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(graph_ref))); | |||
| 554 | sub_graph->exec_idx = exec_info->rnum; | |||
| 555 | } | |||
| 556 | } | |||
| 557 | exec_cvt[idx] = exec_info->rnum - 1; | |||
| 558 | } ccv_nnc_graph_visit_endfor} } | |||
| 559 | ccv_nnc_graph_visit_free(visit); | |||
| 560 | for (i = 0; i < graph->breakpoint_size; i++) | |||
| 561 | { assert(exec_cvt[graph->breakpoints[i].d] >= 0)((void) sizeof ((exec_cvt[graph->breakpoints[i].d] >= 0 ) ? 1 : 0), __extension__ ({ if (exec_cvt[graph->breakpoints [i].d] >= 0) ; else __assert_fail ("exec_cvt[graph->breakpoints[i].d] >= 0" , "ccv_nnc_graph.c", 561, __extension__ __PRETTY_FUNCTION__); })); } // All breakpoints should be assigned. | |||
| 562 | } else { | |||
| 563 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0), graph->sources->rnum, (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0), graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph->sources->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->sources)->data)) + (size_t)(graph-> sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)((( char*)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_ [2] = { (graph->sources->rnum), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_info_t*) ((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void ) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ( { if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources )->data)) + (size_t)(graph->sources)->rsize * (size_t )(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->sources)->data)) + (size_t)(graph->sources)-> rsize * (size_t)(0))))[_i_].d].r = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->sources)->data)) + (size_t) (graph->sources)->rsize * (size_t)(0))))[_i_].d; } _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_ ; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_ [d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph-> exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph ->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r = 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations )->data)) + (size_t)(graph->destinations)->rsize * ( size_t)(0))))[_i_].d; } _exist_size_[0] = (graph->destinations ->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph-> destinations)->data)) + (size_t)(graph->destinations)-> rsize * (size_t)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)(( graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].d = 1; } for (_i_ = 0; _i_ < (graph->sources->rnum); _i_++) { ((void) sizeof ( (((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources )->data)) + (size_t)(graph->sources)->rsize * (size_t )(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (( (ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)-> data)) + (size_t)(graph->sources)->rsize * (size_t)(0)) ))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph->sources)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->sources)->data)) + (size_t)(graph->sources )->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_ [0] = (graph->sources->rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph ->destinations->rnum)) { _exists_[_p_][_i_] = d; continue ; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((( (ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph->destinations->rnum)) { ((void) sizeof ((_exist_size_ [_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum) ) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (graph->destinations->rnum ); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_t*)((void *)(((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].graph == graph ) ; else __assert_fail ("((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].graph == graph" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char *)((graph->destinations)->data)) + (size_t)(graph->destinations )->rsize * (size_t)(0))))[_i_].d].r == 7) continue; if (!( 0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_t*)(( void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. c == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_t*)((void*)(((char*)((graph->destinations)->data)) + (size_t)(graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0" , "ccv_nnc_graph.c", 563, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_t*)((void*) (((char*)((graph->destinations)->data)) + (size_t)(graph ->destinations)->rsize * (size_t)(0))))[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = ((((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d)) ; _visit_->node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_t *)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))))[_i_].d]. d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_) ; } while (0);; ((void) sizeof ((_visit_->size <= (graph ->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (graph->exec_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 563, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
| 564 | ccv_nnc_graph_visit_for(visit, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof (((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))) const node __attribute__ ((unused)) = ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)(( graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0)))) + idx; { | |||
| 565 | assert(!node->pair_ref)((void) sizeof ((!node->pair_ref) ? 1 : 0), __extension__ ( { if (!node->pair_ref) ; else __assert_fail ("!node->pair_ref" , "ccv_nnc_graph.c", 565, __extension__ __PRETTY_FUNCTION__); })); // If node has a pair ref, we cannot fix it up. | |||
| 566 | // Loop over node and push to the array. | |||
| 567 | ccv_array_push(exec_info, node); | |||
| 568 | // Go to its sub-graph to fix exec_idx | |||
| 569 | for (i = 0; i < node->graph_ref_size; i++) | |||
| 570 | { | |||
| 571 | const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node )->_inline_graph_ref)[i] - 1; | |||
| 572 | if (graph_ref >= 0) | |||
| 573 | { | |||
| 574 | ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, graph_ref)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(graph_ref))); | |||
| 575 | sub_graph->exec_idx = exec_info->rnum; | |||
| 576 | } | |||
| 577 | } | |||
| 578 | exec_cvt[idx] = exec_info->rnum - 1; | |||
| 579 | } ccv_nnc_graph_visit_endfor} } | |||
| 580 | ccv_nnc_graph_visit_free(visit); | |||
| 581 | } | |||
| 582 | assert(graph->exec_info->rnum == exec_info->rnum)((void) sizeof ((graph->exec_info->rnum == exec_info-> rnum) ? 1 : 0), __extension__ ({ if (graph->exec_info-> rnum == exec_info->rnum) ; else __assert_fail ("graph->exec_info->rnum == exec_info->rnum" , "ccv_nnc_graph.c", 582, __extension__ __PRETTY_FUNCTION__); })); | |||
| 583 | ccv_array_free(graph->exec_info); | |||
| 584 | graph->exec_info = exec_info; | |||
| 585 | for (i = 0; i < graph->sources->rnum; i++) | |||
| 586 | { | |||
| 587 | ccv_nnc_graph_exec_t* const source = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, i)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(i))); | |||
| 588 | source->d = exec_cvt[source->d]; | |||
| 589 | } | |||
| 590 | for (i = 0; i < graph->destinations->rnum; i++) | |||
| 591 | { | |||
| 592 | ccv_nnc_graph_exec_t* const destination = (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, i)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(i))); | |||
| 593 | destination->d = exec_cvt[destination->d]; | |||
| 594 | } | |||
| 595 | // Update all outgoings to reflect the latest. | |||
| 596 | for (i = 0; i < exec_info->rnum; i++) | |||
| 597 | { | |||
| 598 | ccv_nnc_graph_exec_info_t* const info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(exec_info, i)((void*)(((char*)((exec_info)->data)) + (size_t)(exec_info )->rsize * (size_t)(i))); | |||
| 599 | if (info->outgoings) | |||
| 600 | for (j = 0; j < info->outgoings->rnum; j++) | |||
| 601 | *(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)( info->outgoings)->rsize * (size_t)(j))) = exec_cvt[*(int*)ccv_array_get(info->outgoings, j)((void*)(((char*)((info->outgoings)->data)) + (size_t)( info->outgoings)->rsize * (size_t)(j)))]; | |||
| 602 | } | |||
| 603 | graph->topsorted = 1; | |||
| 604 | } | |||
| 605 | ||||
| 606 | typedef struct { | |||
| 607 | int device_id; | |||
| 608 | int exec_idx; | |||
| 609 | ccv_array_t* signal_set; | |||
| 610 | ccv_array_t* command_set; // The set of command executed in this stream. In case there is a tie (on rank). We will check this. | |||
| 611 | } ccv_nnc_stream_data_t; | |||
| 612 | ||||
| 613 | static void _ccv_nnc_graph_schedule_assign_signals(ccv_array_t* const incoming, ccv_nnc_graph_exec_schedule_t* const node, ccv_array_t* const stream_data, int* const signal_size, ccv_nnc_graph_exec_schedule_t* const exec_info, const int exec_info_size) | |||
| 614 | { | |||
| 615 | assert(incoming->rnum > 0)((void) sizeof ((incoming->rnum > 0) ? 1 : 0), __extension__ ({ if (incoming->rnum > 0) ; else __assert_fail ("incoming->rnum > 0" , "ccv_nnc_graph.c", 615, __extension__ __PRETTY_FUNCTION__); })); | |||
| 616 | int i, j, k; | |||
| 617 | int wait_size = 0, max_wait_size = 0; | |||
| 618 | for (i = 0; i < incoming->rnum; i++) | |||
| 619 | { | |||
| 620 | const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)-> rsize * (size_t)(i))); | |||
| 621 | ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx; | |||
| 622 | assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->stream_size > 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0" , "ccv_nnc_graph.c", 622, __extension__ __PRETTY_FUNCTION__); })); | |||
| 623 | max_wait_size += incoming_exec_info->stream_size; | |||
| 624 | } | |||
| 625 | int waits[ccv_max(1, max_wait_size)({ typeof (1) _a = (1); typeof (max_wait_size) _b = (max_wait_size ); (_a > _b) ? _a : _b; })]; | |||
| 626 | assert(node->stream_size > 0)((void) sizeof ((node->stream_size > 0) ? 1 : 0), __extension__ ({ if (node->stream_size > 0) ; else __assert_fail ("node->stream_size > 0" , "ccv_nnc_graph.c", 626, __extension__ __PRETTY_FUNCTION__); })); | |||
| 627 | for (i = 0; i < incoming->rnum; i++) | |||
| 628 | { | |||
| 629 | const int incoming_idx = *(int*)ccv_array_get(incoming, i)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)-> rsize * (size_t)(i))); | |||
| 630 | assert(incoming_idx < exec_info_size)((void) sizeof ((incoming_idx < exec_info_size) ? 1 : 0), __extension__ ({ if (incoming_idx < exec_info_size) ; else __assert_fail ("incoming_idx < exec_info_size", "ccv_nnc_graph.c", 630, __extension__ __PRETTY_FUNCTION__); })); | |||
| 631 | assert(incoming_idx >= 0)((void) sizeof ((incoming_idx >= 0) ? 1 : 0), __extension__ ({ if (incoming_idx >= 0) ; else __assert_fail ("incoming_idx >= 0" , "ccv_nnc_graph.c", 631, __extension__ __PRETTY_FUNCTION__); })); | |||
| 632 | ccv_nnc_graph_exec_schedule_t* const incoming_exec_info = exec_info + incoming_idx; | |||
| 633 | assert(incoming_exec_info->stream_size > 0)((void) sizeof ((incoming_exec_info->stream_size > 0) ? 1 : 0), __extension__ ({ if (incoming_exec_info->stream_size > 0) ; else __assert_fail ("incoming_exec_info->stream_size > 0" , "ccv_nnc_graph.c", 633, __extension__ __PRETTY_FUNCTION__); })); | |||
| 634 | int stream_synced = 1; | |||
| 635 | // If the current node's stream is a subset of the incoming node's stream, there | |||
| 636 | // is no need to sync with signal, because we are already synced with the incoming. | |||
| 637 | for (j = 0; stream_synced && j < node->stream_size; j++) | |||
| 638 | { | |||
| 639 | const int s = SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node )._heap_streams)[j]; | |||
| 640 | assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >= 0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 640 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 641 | int flag = 0; | |||
| 642 | for (k = 0; !flag && k < incoming_exec_info->stream_size; k++) | |||
| 643 | flag = (SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_streams : (*incoming_exec_info)._heap_streams)[k] == s); | |||
| 644 | stream_synced = flag; | |||
| 645 | } | |||
| 646 | if (stream_synced) | |||
| 647 | continue; | |||
| 648 | // Otherwise, find the streams we need to sync with, and create signals for these. | |||
| 649 | for (j = 0; j < incoming_exec_info->stream_size; j++) | |||
| 650 | { | |||
| 651 | const int s = SCHEDULE_STREAMS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_streams : (*incoming_exec_info)._heap_streams)[j]; | |||
| 652 | assert(s >= 0)((void) sizeof ((s >= 0) ? 1 : 0), __extension__ ({ if (s >= 0) ; else __assert_fail ("s >= 0", "ccv_nnc_graph.c", 652 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 653 | int flag = 0; | |||
| 654 | for (k = 0; !flag && k < node->stream_size; k++) | |||
| 655 | flag = (SCHEDULE_STREAMS(*node)((*node).stream_size <= 1 ? (*node)._inline_streams : (*node )._heap_streams)[k] == s); | |||
| 656 | if (!flag) // Need to have a signal. | |||
| 657 | { | |||
| 658 | if (SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j] < 0) | |||
| 659 | SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j] = (*signal_size)++; | |||
| 660 | else { | |||
| 661 | int flag = 0; | |||
| 662 | // If any of the stream the current node has already seen this signal, we are good already. | |||
| 663 | for (k = 0; !flag && k < node->stream_size; k++) | |||
| 664 | { | |||
| 665 | assert(SCHEDULE_STREAMS(*node)[k] >= 0)((void) sizeof ((((*node).stream_size <= 1 ? (*node)._inline_streams : (*node)._heap_streams)[k] >= 0) ? 1 : 0), __extension__ ({ if (((*node).stream_size <= 1 ? (*node)._inline_streams : (*node)._heap_streams)[k] >= 0) ; else __assert_fail ("SCHEDULE_STREAMS(*node)[k] >= 0" , "ccv_nnc_graph.c", 665, __extension__ __PRETTY_FUNCTION__); })); | |||
| 666 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(((*node).stream_size <= 1 ? (*node) ._inline_streams : (*node)._heap_streams)[k]))); | |||
| 667 | flag = (data->signal_set && ccv_array_find_int(data->signal_set, SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j])); | |||
| 668 | } | |||
| 669 | if (flag) | |||
| 670 | continue; | |||
| 671 | } | |||
| 672 | // Otherwise, we need to wait for this. Currently, our granularity is about wait on all streams. | |||
| 673 | waits[wait_size++] = SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j]; | |||
| 674 | // All streams on this node have seen this signal. | |||
| 675 | for (k = 0; k < node->stream_size; k++) | |||
| 676 | { | |||
| 677 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(*node)[k])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(((*node).stream_size <= 1 ? (*node) ._inline_streams : (*node)._heap_streams)[k]))); | |||
| 678 | if (!data->signal_set) | |||
| 679 | data->signal_set = ccv_array_new(sizeof(int), 0, 0); | |||
| 680 | ccv_array_push(data->signal_set, &SCHEDULE_SIGNALS(*incoming_exec_info)((*incoming_exec_info).stream_size <= 1 ? (*incoming_exec_info )._inline_signals : (*incoming_exec_info)._heap_signals)[j]); | |||
| 681 | } | |||
| 682 | } | |||
| 683 | } | |||
| 684 | } | |||
| 685 | node->wait_size = wait_size; | |||
| 686 | if (wait_size > 0) | |||
| 687 | { | |||
| 688 | node->waits = node->waits ? ccreallocrealloc(node->waits, sizeof(int) * wait_size) : ccmallocmalloc(sizeof(int) * wait_size); | |||
| 689 | memcpy(node->waits, waits, sizeof(int) * wait_size); | |||
| 690 | } | |||
| 691 | } | |||
| 692 | ||||
| 693 | typedef struct { | |||
| 694 | int rank; | |||
| 695 | ccv_array_t* outgoings; | |||
| 696 | } ccv_nnc_incoming_t; | |||
| 697 | ||||
| 698 | static int _ccv_nnc_device_ids_for_stream_data(ccv_nnc_graph_exec_info_t* const node, const int device_id, ccv_array_t* const stream_data, int* const device_ids, const int max_device_id_size) | |||
| 699 | { | |||
| 700 | // TODO: I need to re-think whether this is GPU only or not. | |||
| 701 | int device_id_size = ccv_nnc_device_ids_for_io(node->inputs, node->input_size, node->outputs, node->output_size, CCV_TENSOR_GPU_MEMORY, device_ids, max_device_id_size); | |||
| 702 | if (device_id_size == 0) | |||
| 703 | { | |||
| 704 | // If there is a default data, use that device id. Otherwise, use the device id passed in (this will be the default data device id). | |||
| 705 | if (stream_data->rnum > 0) | |||
| 706 | { | |||
| 707 | ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(0))); | |||
| 708 | device_ids[0] = default_data->device_id; | |||
| 709 | } else | |||
| 710 | device_ids[0] = device_id >= 0 ? device_id : 0; | |||
| 711 | device_id_size = 1; | |||
| 712 | } | |||
| 713 | return device_id_size; | |||
| 714 | } | |||
| 715 | ||||
| 716 | void ccv_nnc_graph_static_schedule_free(ccv_nnc_graph_static_schedule_t* const schedule) | |||
| 717 | { | |||
| 718 | int i; | |||
| 719 | ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info; | |||
| 720 | for (i = 0; i < schedule->exec_info_size; i++) | |||
| 721 | { | |||
| 722 | if (schd_info[i].stream_size > 1) | |||
| 723 | ccfreefree(schd_info[i]._heap_streams); | |||
| 724 | if (schd_info[i].waits) | |||
| 725 | ccfreefree(schd_info[i].waits); | |||
| 726 | } | |||
| 727 | if (schedule->stream_1s) | |||
| 728 | ccfreefree(schedule->stream_1s); | |||
| 729 | if (schedule->waits) | |||
| 730 | ccfreefree(schedule->waits); | |||
| 731 | if (schedule->psort) | |||
| 732 | ccfreefree(schedule->psort); | |||
| 733 | if (schedule->begin) | |||
| 734 | ccv_nnc_stream_signal_free(schedule->begin); | |||
| 735 | if (schedule->end) | |||
| 736 | ccv_nnc_stream_signal_free(schedule->end); | |||
| 737 | ccfreefree(schedule); | |||
| 738 | } | |||
| 739 | ||||
| 740 | static ccv_nnc_graph_static_schedule_t* _ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int device_id, const int max_stream_count, ccv_nnc_stream_context_t* const stream_context, const ccv_nnc_graph_exec_t* const _sources, const int _source_size, const ccv_nnc_graph_exec_t* const _destinations, const int _destination_size) | |||
| 741 | { | |||
| 742 | assert(graph->sources && graph->sources->rnum)((void) sizeof ((graph->sources && graph->sources ->rnum) ? 1 : 0), __extension__ ({ if (graph->sources && graph->sources->rnum) ; else __assert_fail ("graph->sources && graph->sources->rnum" , "ccv_nnc_graph.c", 742, __extension__ __PRETTY_FUNCTION__); })); | |||
| 743 | assert(graph->destinations && graph->destinations->rnum)((void) sizeof ((graph->destinations && graph-> destinations->rnum) ? 1 : 0), __extension__ ({ if (graph-> destinations && graph->destinations->rnum) ; else __assert_fail ("graph->destinations && graph->destinations->rnum" , "ccv_nnc_graph.c", 743, __extension__ __PRETTY_FUNCTION__); })); | |||
| 744 | assert(graph->topsorted)((void) sizeof ((graph->topsorted) ? 1 : 0), __extension__ ({ if (graph->topsorted) ; else __assert_fail ("graph->topsorted" , "ccv_nnc_graph.c", 744, __extension__ __PRETTY_FUNCTION__); })); // Only support this on a topsorted graph. | |||
| 745 | const int exec_info_size = graph->exec_info->rnum; | |||
| 746 | assert(exec_info_size > 0)((void) sizeof ((exec_info_size > 0) ? 1 : 0), __extension__ ({ if (exec_info_size > 0) ; else __assert_fail ("exec_info_size > 0" , "ccv_nnc_graph.c", 746, __extension__ __PRETTY_FUNCTION__); })); | |||
| 747 | const ccv_nnc_graph_exec_t* const sources = _sources == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(0))) : _sources; | |||
| 748 | const int source_size = _sources == 0 ? graph->sources->rnum : _source_size; | |||
| 749 | if (!_sources) | |||
| 750 | { assert(_source_size == 0)((void) sizeof ((_source_size == 0) ? 1 : 0), __extension__ ( { if (_source_size == 0) ; else __assert_fail ("_source_size == 0" , "ccv_nnc_graph.c", 750, __extension__ __PRETTY_FUNCTION__); })); } | |||
| 751 | const ccv_nnc_graph_exec_t* const destinations = _destinations == 0 ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))) : _destinations; | |||
| 752 | const int destination_size = _destinations == 0 ? graph->destinations->rnum : _destination_size; | |||
| 753 | if (!_destinations) | |||
| 754 | { assert(_destination_size == 0)((void) sizeof ((_destination_size == 0) ? 1 : 0), __extension__ ({ if (_destination_size == 0) ; else __assert_fail ("_destination_size == 0" , "ccv_nnc_graph.c", 754, __extension__ __PRETTY_FUNCTION__); })); } | |||
| 755 | const int root_schedule = (_sources == 0 && _destinations == 0); | |||
| 756 | ccv_nnc_graph_static_schedule_t* const schedule = cccalloccalloc(1, sizeof(ccv_nnc_graph_static_schedule_t) + sizeof(ccv_nnc_graph_exec_schedule_t) * (exec_info_size - 1)); | |||
| 757 | schedule->exec_info_size = exec_info_size; | |||
| 758 | ccv_nnc_graph_exec_schedule_t* const schd_info = schedule->exec_info; | |||
| 759 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(0))); | |||
| 760 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ( (sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0 , }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_ ]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_ [_idx_].r != 1) continue; _incomings_[_idx_].r = 2; if ((exec_info )[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_] .outgoings->rnum; _j_++) { const int d = *(int*)((void*)(( (char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t)( (exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); ++ _incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_ [d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 760 , __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].r = 5; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_ [0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)", "ccv_nnc_graph.c" , 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size ); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0 ; _i_ < (source_size); _i_++) { ((void) sizeof (((sources) [_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1 ; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings ) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size) ) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void ) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 760 , __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(destinations )[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_ [(destinations)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0", "ccv_nnc_graph.c" , 760, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_ [(destinations)[_i_].d].c > 0) continue; _visit_->node[ _visit_->size].index = (((destinations)[_i_].d)); _visit_-> node[_visit_->size].term = ((_incomings_[(destinations)[_i_ ].d].d)); ++_visit_->size;; } if (_heap_mem_) free(_incomings_ ); } while (0);; ((void) sizeof ((_visit_->size <= (exec_info_size )) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size )) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_nnc_graph.c", 760, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
| 761 | if (!root_schedule) | |||
| 762 | { | |||
| 763 | // If this is not a root schedule, we need to do partial topsort. | |||
| 764 | int psort_size = 0; | |||
| 765 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 766 | ++psort_size; | |||
| 767 | } ccv_nnc_graph_visit_endfor} } | |||
| 768 | schedule->psort = (int*)ccmallocmalloc(sizeof(int) * psort_size); | |||
| 769 | schedule->psort_size = psort_size; | |||
| 770 | psort_size = 0; | |||
| 771 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 772 | schedule->psort[psort_size++] = idx; | |||
| 773 | } ccv_nnc_graph_visit_endfor} } | |||
| 774 | } | |||
| 775 | int i, j, k; | |||
| 776 | // Generate exec dependencies (or, in other words, partial ordering of executions). | |||
| 777 | ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(exec_info_size, exec_info_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0); | |||
| 778 | int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_info_size * 2); | |||
| 779 | int buf_size; | |||
| 780 | #define for_block(x, val) \ | |||
| 781 | do { \ | |||
| 782 | if (((int32_t*)val)[0] > 0) \ | |||
| 783 | { \ | |||
| 784 | buf[buf_size * 2] = x; \ | |||
| 785 | buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \ | |||
| 786 | ++buf_size; \ | |||
| 787 | } \ | |||
| 788 | } while (0) | |||
| 789 | for (i = 0; i < exec_info_size; i++) | |||
| 790 | schd_info[i].stream_size = -1; | |||
| 791 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int term __attribute__((unused)) = (visit)->node[_i_ ].term; typeof ((exec_info)) const node __attribute__((unused )) = (exec_info) + idx; { | |||
| 792 | buf_size = 0; /* save all its parent deps to this buffer */ | |||
| 793 | ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx); | |||
| 794 | schd_info[idx].stream_size = 0; | |||
| 795 | if (vector) | |||
| 796 | CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S : { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F: { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S: { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F: { do { int _i_; __attribute__((unused)) const size_t _c_ = ( ((exec_dep)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size ; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_ ))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t ) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000 ) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t * const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.f64 + (0))); } } } while (0); break; } default: { do { int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep )->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR ) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block ((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size [(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep )->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector )->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_-> i), (_d_.u8 + (0))); } } } while (0); } } } while (0); | |||
| 797 | if (!node->outgoings) | |||
| 798 | continue; | |||
| 799 | for (i = 0; i < node->outgoings->rnum; i++) | |||
| 800 | { | |||
| 801 | int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
| 802 | const int32_t one = 1; | |||
| 803 | ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx); | |||
| 804 | /* If not found, set, if the current node is the destination node, no need | |||
| 805 | * set itself as parent of subsequent nodes because its terminal nature. */ | |||
| 806 | if (!term && (!cell.i32 || cell.i32[0] == 0)) | |||
| 807 | ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one); | |||
| 808 | for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */ | |||
| 809 | { | |||
| 810 | ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]); | |||
| 811 | /* If not found, set */ | |||
| 812 | if (!cell.i32 || cell.i32[0] == 0) | |||
| 813 | ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]); | |||
| 814 | else { | |||
| 815 | /* Otherwise, set to the longest one */ | |||
| 816 | int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2 + 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; }); | |||
| 817 | ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep); | |||
| 818 | } | |||
| 819 | } | |||
| 820 | } | |||
| 821 | } ccv_nnc_graph_visit_endfor} } | |||
| 822 | #undef for_block | |||
| 823 | ccfreefree(buf); | |||
| 824 | // Algorithm to allocate signals and streams for this graph. | |||
| 825 | ccv_array_t* const stream_data = ccv_array_new(sizeof(ccv_nnc_stream_data_t), 0, 0); | |||
| 826 | ccv_array_t** const outgoings = cccalloccalloc(exec_info_size, sizeof(ccv_array_t*)); | |||
| 827 | ccv_nnc_incoming_t* const incomings = cccalloccalloc(exec_info_size, sizeof(ccv_nnc_incoming_t)); | |||
| 828 | int max_device_id_size = 1; | |||
| 829 | // Filter out outgoing nodes that we will be able to access it afterwards anyway. | |||
| 830 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 831 | max_device_id_size = ccv_max(node->input_size + node->output_size, max_device_id_size)({ typeof (node->input_size + node->output_size) _a = ( node->input_size + node->output_size); typeof (max_device_id_size ) _b = (max_device_id_size); (_a > _b) ? _a : _b; }); | |||
| 832 | if (node->outgoings) | |||
| 833 | { | |||
| 834 | outgoings[idx] = ccv_array_new(sizeof(int), 0, 0); | |||
| 835 | for (i = 0; i < node->outgoings->rnum; i++) | |||
| 836 | { | |||
| 837 | const int di = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
| 838 | // Skip if we haven't accessed this exec. | |||
| 839 | if (schd_info[di].stream_size < 0) | |||
| 840 | continue; | |||
| 841 | int flag = 0; | |||
| 842 | for (j = 0; !flag && j < node->outgoings->rnum; j++) | |||
| 843 | { | |||
| 844 | if (j != i) | |||
| 845 | { | |||
| 846 | const int dj = *(int*)ccv_array_get(node->outgoings, j)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(j))); | |||
| 847 | ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, di, dj); | |||
| 848 | flag = (cell.i32 && cell.i32[0]); | |||
| 849 | } | |||
| 850 | } | |||
| 851 | if (!flag) | |||
| 852 | { | |||
| 853 | ccv_array_push(outgoings[idx], &di); | |||
| 854 | if (!incomings[di].outgoings) | |||
| 855 | incomings[di].outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 856 | ccv_array_push(incomings[di].outgoings, &idx); | |||
| 857 | } | |||
| 858 | } | |||
| 859 | } | |||
| 860 | } ccv_nnc_graph_visit_endfor} } | |||
| 861 | #define visitor(node, idx, _) \ | |||
| 862 | if (node->outgoings) \ | |||
| 863 | for (i = 0; i < node->outgoings->rnum; i++) \ | |||
| 864 | { \ | |||
| 865 | const int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); \ | |||
| 866 | node->rank = ccv_max(incomings[d].rank + 1, node->rank)({ typeof (incomings[d].rank + 1) _a = (incomings[d].rank + 1 ); typeof (node->rank) _b = (node->rank); (_a > _b) ? _a : _b; }); \ | |||
| 867 | } | |||
| 868 | CCV_NNC_GRAPH_VISIT(graph, incomings, exec_info_size, destinations, destination_size, sources, source_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((incomings)[_i_].outgoings) ? (incomings)[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t ) * (exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t *)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_info_size ) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size )); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size )), (int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size ), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size) ; for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 868 , __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].r = 1; _exists_[0][_i_] = (destinations)[_i_].d; } int _exist_size_[2] = { (destination_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0 ; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue ; _incomings_[_idx_].r = 2; if ((incomings)[_idx_].outgoings) for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)(((incomings )[_idx_].outgoings)->data)) + (size_t)((incomings)[_idx_]. outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void ) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size) ) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_graph.c", 868 , __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].r = 3; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_ [0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_ ].r != 3) continue; _incomings_[_idx_].r = 4; if ((incomings) [_idx_].outgoings) for (_j_ = 0; _j_ < (incomings)[_idx_]. outgoings->rnum; _j_++) { const int d = *(int*)((void*)((( char*)(((incomings)[_idx_].outgoings)->data)) + (size_t)(( incomings)[_idx_].outgoings)->rsize * (size_t)(_j_))); if ( _incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_ [d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c ; if (_incomings_[d].r != 2) continue; _incomings_[d].r = 3; ( (void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources)[_i_].d].r = 5; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_ ]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_ [_idx_].r != 5) continue; _incomings_[_idx_].r = 6; if (_incomings_ [_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_ ].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d ].r = 5; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations )[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_] = (destinations)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (destination_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; visitor(((incomings) + _idx_ ), (_idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d ) { ++_d_; _incomings_[_idx_].r = 7; } if ((incomings)[_idx_] .outgoings) { if ((incomings)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((incomings)[_idx_] .outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings )->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (source_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (incomings)[_idx_].outgoings->rnum; _j_ ++) { const int d = *(int*)((void*)(((char*)(((incomings)[_idx_ ].outgoings)->data)) + (size_t)((incomings)[_idx_].outgoings )->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (source_size)) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_[(sources)[_i_].d].r == 7) continue; if ( !(0)) { ((void) sizeof ((_incomings_[(sources)[_i_].d].c == 0 ) ? 1 : 0), __extension__ ({ if (_incomings_[(sources)[_i_].d ].c == 0) ; else __assert_fail ("_incomings_[(sources)[_i_].d].c == 0" , "ccv_nnc_graph.c", 868, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_[(sources)[_i_].d].c > 0) continue ; visitor(((incomings) + (sources)[_i_].d), ((sources)[_i_].d ), (_incomings_[(sources)[_i_].d].d)); } if (_heap_mem_) free (_incomings_); } while (0);; | |||
| 869 | #undef visitor | |||
| 870 | int device_ids[max_device_id_size]; | |||
| 871 | int outgoing_device_ids[max_device_id_size]; | |||
| 872 | int signal_size = 0; | |||
| 873 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 874 | // Go through the incomings. | |||
| 875 | const int device_id_size = _ccv_nnc_device_ids_for_stream_data(node, device_id, stream_data, device_ids, max_device_id_size); | |||
| 876 | if (schd_info[idx].stream_size == 0) | |||
| 877 | { | |||
| 878 | schd_info[idx].stream_size = device_id_size; // At least at the same size as the device_id_size. | |||
| 879 | if (device_id_size > 1) | |||
| 880 | { | |||
| 881 | schd_info[idx]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * device_id_size * 2); | |||
| 882 | schd_info[idx]._heap_signals = (schd_info[idx]._heap_streams + device_id_size); | |||
| 883 | } | |||
| 884 | for (i = 0; i < device_id_size; i++) | |||
| 885 | SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] = -1, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i] = -1; | |||
| 886 | } | |||
| 887 | for (i = 0; i < device_id_size; i++) | |||
| 888 | // Go through until the end to assign streams. | |||
| 889 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] < 0) | |||
| 890 | { | |||
| 891 | int stream_idx = -1; | |||
| 892 | int stream_has_command = 0; | |||
| 893 | // First, find a good stream in stream data (the stream is good if it can be recycled, and it has the same command). | |||
| 894 | // Otherwise, we prefer a usable stream (it doesn't have the command, but it can be recycled). | |||
| 895 | for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++) | |||
| 896 | { | |||
| 897 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(j))); | |||
| 898 | if (data->device_id == device_ids[i]) | |||
| 899 | { | |||
| 900 | const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, data->exec_idx); | |||
| 901 | // If there is a path to conclude that exec_idx is before idx, then we can reuse | |||
| 902 | // this stream. Otherwise the work in this "empty stream" could still be ongoing, | |||
| 903 | // and we may delay the following work unnecessarily. | |||
| 904 | if (cell.i32 && cell.i32[0] > 0) | |||
| 905 | { | |||
| 906 | if (ccv_array_find_uint(data->command_set, node->cmd.cmd)) | |||
| 907 | stream_idx = j, stream_has_command = 1; | |||
| 908 | else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet. | |||
| 909 | stream_idx = j; | |||
| 910 | } | |||
| 911 | } | |||
| 912 | } | |||
| 913 | if (stream_idx < 0) | |||
| 914 | { | |||
| 915 | // Note that the max stream count is a "soft" limit. Even we have different devices, our compute allocation has to be on different streams. | |||
| 916 | if (stream_data->rnum >= max_stream_count && max_stream_count > 0) | |||
| 917 | { | |||
| 918 | // If we are already at out limit, go through again to see if a stream is available, if the stream has command, and also its exec_idx is not preceding this execution. | |||
| 919 | for (j = 0; (stream_idx < 0 || !stream_has_command) && j < stream_data->rnum; j++) | |||
| 920 | { | |||
| 921 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, j)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(j))); | |||
| 922 | if (data->device_id == device_ids[i]) | |||
| 923 | { | |||
| 924 | const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, data->exec_idx, idx); | |||
| 925 | // There must be no path from idx to exec_idx otherwise we already have stream_idx. Now we just to verify | |||
| 926 | // there is no path from exec_idx to idx as well. | |||
| 927 | if (!cell.i32 || cell.i32[0] == 0) | |||
| 928 | { | |||
| 929 | if (ccv_array_find_uint(data->command_set, node->cmd.cmd)) | |||
| 930 | stream_idx = j, stream_has_command = 1; | |||
| 931 | else if (stream_idx < 0) // Otherwise, only assign the stream idx if it is not assigned yet. | |||
| 932 | stream_idx = j; | |||
| 933 | } | |||
| 934 | } | |||
| 935 | } | |||
| 936 | if (stream_idx >= 0) | |||
| 937 | { | |||
| 938 | // Now need to mark exec_idx is after idx, so we can avoid A -> B -> A deadlock. | |||
| 939 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(stream_idx))); | |||
| 940 | const int32_t one = 1; | |||
| 941 | ccv_set_sparse_matrix_cell(exec_dep, idx, data->exec_idx, &one); | |||
| 942 | } | |||
| 943 | } | |||
| 944 | if (stream_idx < 0) | |||
| 945 | { | |||
| 946 | stream_idx = stream_data->rnum; | |||
| 947 | const ccv_nnc_stream_data_t data = { | |||
| 948 | .device_id = device_ids[i], | |||
| 949 | }; | |||
| 950 | ccv_array_push(stream_data, &data); | |||
| 951 | } | |||
| 952 | } | |||
| 953 | assert(stream_idx >= 0)((void) sizeof ((stream_idx >= 0) ? 1 : 0), __extension__ ( { if (stream_idx >= 0) ; else __assert_fail ("stream_idx >= 0" , "ccv_nnc_graph.c", 953, __extension__ __PRETTY_FUNCTION__); })); | |||
| 954 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(stream_idx))); | |||
| 955 | if (!data->command_set) | |||
| 956 | data->command_set = ccv_array_new(sizeof(uint32_t), 1, 0); | |||
| 957 | SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] = stream_idx; | |||
| 958 | ccv_array_add_unique_uint(data->command_set, node->cmd.cmd); | |||
| 959 | // Assign all subsequent node to use this stream. | |||
| 960 | int outgoing_idx = idx; | |||
| 961 | // if we want to enforce the stream count is only 1, we certainly don't want to the greedy approach. | |||
| 962 | // With the greedy approach, the current stream will go all the way down and certainly conflict with | |||
| 963 | // other streams. We'd prefer to interleaving the execution instead in this case. | |||
| 964 | if (max_stream_count != 1) | |||
| 965 | while (outgoings[outgoing_idx] && outgoings[outgoing_idx]->rnum) | |||
| 966 | { | |||
| 967 | int highest_rank = -1; | |||
| 968 | int highest_idx = -1; | |||
| 969 | int stream_n = -1; | |||
| 970 | int stream_has_command = 0; | |||
| 971 | for (j = 0; j < outgoings[outgoing_idx]->rnum; j++) | |||
| 972 | { | |||
| 973 | const int d = *(int*)ccv_array_get(outgoings[outgoing_idx], j)((void*)(((char*)((outgoings[outgoing_idx])->data)) + (size_t )(outgoings[outgoing_idx])->rsize * (size_t)(j))); | |||
| 974 | // This is not outside of our scope at this point. | |||
| 975 | assert(schd_info[d].stream_size >= 0)((void) sizeof ((schd_info[d].stream_size >= 0) ? 1 : 0), __extension__ ({ if (schd_info[d].stream_size >= 0) ; else __assert_fail ("schd_info[d].stream_size >= 0", "ccv_nnc_graph.c", 975, __extension__ __PRETTY_FUNCTION__); })); | |||
| 976 | ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + d; | |||
| 977 | const int outgoing_device_id_size = _ccv_nnc_device_ids_for_stream_data(outgoing_node, device_id, stream_data, outgoing_device_ids, max_device_id_size); | |||
| 978 | if (schd_info[d].stream_size == 0) | |||
| 979 | { | |||
| 980 | schd_info[d].stream_size = outgoing_device_id_size; // At least at the same size as the device_id_size. | |||
| 981 | if (outgoing_device_id_size > 1) | |||
| 982 | { | |||
| 983 | schd_info[d]._heap_streams = (int*)ccmallocmalloc(sizeof(int) * outgoing_device_id_size * 2); | |||
| 984 | schd_info[d]._heap_signals = (schd_info[d]._heap_streams + outgoing_device_id_size); | |||
| 985 | } | |||
| 986 | for (k = 0; k < outgoing_device_id_size; k++) | |||
| 987 | SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams : (schd_info[d])._heap_streams)[k] = -1, SCHEDULE_SIGNALS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_signals : (schd_info[d])._heap_signals)[k] = -1; | |||
| 988 | } | |||
| 989 | assert(schd_info[d].stream_size == outgoing_device_id_size)((void) sizeof ((schd_info[d].stream_size == outgoing_device_id_size ) ? 1 : 0), __extension__ ({ if (schd_info[d].stream_size == outgoing_device_id_size ) ; else __assert_fail ("schd_info[d].stream_size == outgoing_device_id_size" , "ccv_nnc_graph.c", 989, __extension__ __PRETTY_FUNCTION__); })); | |||
| 990 | for (k = 0; k < outgoing_device_id_size; k++) | |||
| 991 | // If it should be on the same device and the stream is not assign, potentially. | |||
| 992 | if (outgoing_device_ids[k] == device_ids[i] && | |||
| 993 | SCHEDULE_STREAMS(schd_info[d])((schd_info[d]).stream_size <= 1 ? (schd_info[d])._inline_streams : (schd_info[d])._heap_streams)[k] < 0 && | |||
| 994 | (incomings[d].rank > highest_rank || | |||
| 995 | (incomings[d].rank == highest_rank && | |||
| 996 | !stream_has_command && ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd)))) | |||
| 997 | { | |||
| 998 | highest_rank = incomings[d].rank; | |||
| 999 | highest_idx = d; | |||
| 1000 | stream_n = k; | |||
| 1001 | // This is 1 if rank is the same (thus, I must break the tie already), if the rank is not the same, we need to compute this. | |||
| 1002 | stream_has_command = (incomings[d].rank == highest_rank || ccv_array_find_uint(data->command_set, outgoing_node->cmd.cmd)); | |||
| 1003 | } | |||
| 1004 | } | |||
| 1005 | if (highest_idx >= 0) | |||
| 1006 | { | |||
| 1007 | outgoing_idx = highest_idx; | |||
| 1008 | ccv_nnc_graph_exec_info_t* const outgoing_node = exec_info + outgoing_idx; | |||
| 1009 | assert(stream_n >= 0)((void) sizeof ((stream_n >= 0) ? 1 : 0), __extension__ ({ if (stream_n >= 0) ; else __assert_fail ("stream_n >= 0" , "ccv_nnc_graph.c", 1009, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1010 | SCHEDULE_STREAMS(schd_info[outgoing_idx])((schd_info[outgoing_idx]).stream_size <= 1 ? (schd_info[outgoing_idx ])._inline_streams : (schd_info[outgoing_idx])._heap_streams)[stream_n] = stream_idx; | |||
| 1011 | ccv_array_add_unique_uint(data->command_set, outgoing_node->cmd.cmd); | |||
| 1012 | } else | |||
| 1013 | break; | |||
| 1014 | } | |||
| 1015 | data->exec_idx = outgoing_idx; | |||
| 1016 | } | |||
| 1017 | } ccv_nnc_graph_visit_endfor} } | |||
| 1018 | // Go through to assign signals when necessary. | |||
| 1019 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1020 | if (incomings[idx].outgoings && incomings[idx].outgoings->rnum) | |||
| 1021 | _ccv_nnc_graph_schedule_assign_signals(incomings[idx].outgoings, schd_info + idx, stream_data, &signal_size, schd_info, exec_info_size); | |||
| 1022 | } ccv_nnc_graph_visit_endfor} } | |||
| 1023 | for (i = 0; i < exec_info_size; i++) | |||
| 1024 | if (outgoings[i]) | |||
| 1025 | ccv_array_free(outgoings[i]); | |||
| 1026 | ccfreefree(outgoings); | |||
| 1027 | ccv_matrix_free(exec_dep); | |||
| 1028 | ccv_nnc_stream_data_t* const default_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(0))); | |||
| 1029 | if (device_id >= 0) | |||
| 1030 | { | |||
| 1031 | // If the default stream (stream 0) is not the same as desired stream, swap with the one that is. | |||
| 1032 | if (default_data->device_id != device_id) | |||
| 1033 | { | |||
| 1034 | int exchange_stream_idx = -1; | |||
| 1035 | // Find the stream idx to exchange. | |||
| 1036 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1037 | int flag = 0; | |||
| 1038 | for(i = 0; !flag && i < schd_info[idx].stream_size; i++) | |||
| 1039 | { | |||
| 1040 | const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i]; | |||
| 1041 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(stream_idx))); | |||
| 1042 | if (data->device_id == device_id) | |||
| 1043 | { | |||
| 1044 | exchange_stream_idx = stream_idx; | |||
| 1045 | flag = 1; | |||
| 1046 | } | |||
| 1047 | } | |||
| 1048 | if (flag) | |||
| 1049 | break; | |||
| 1050 | } ccv_nnc_graph_visit_endfor} } | |||
| 1051 | assert(exchange_stream_idx >= 0)((void) sizeof ((exchange_stream_idx >= 0) ? 1 : 0), __extension__ ({ if (exchange_stream_idx >= 0) ; else __assert_fail ("exchange_stream_idx >= 0" , "ccv_nnc_graph.c", 1051, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1052 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1053 | for (i = 0; i < schd_info[idx].stream_size; i++) | |||
| 1054 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] == 0) | |||
| 1055 | SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] = -1; | |||
| 1056 | } ccv_nnc_graph_visit_endfor} } | |||
| 1057 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1058 | for (i = 0; i < schd_info[idx].stream_size; i++) | |||
| 1059 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] == exchange_stream_idx) | |||
| 1060 | SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] = 0; | |||
| 1061 | } ccv_nnc_graph_visit_endfor} } | |||
| 1062 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1063 | for (i = 0; i < schd_info[idx].stream_size; i++) | |||
| 1064 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] == -1) | |||
| 1065 | SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] = exchange_stream_idx; | |||
| 1066 | } ccv_nnc_graph_visit_endfor} } | |||
| 1067 | ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, exchange_stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(exchange_stream_idx))))->device_id = default_data->device_id; | |||
| 1068 | default_data->device_id = device_id; | |||
| 1069 | } | |||
| 1070 | } | |||
| 1071 | int graph_stream_1_size = 0; | |||
| 1072 | for (i = 0; i < source_size; i++) | |||
| 1073 | { | |||
| 1074 | const int idx = sources[i].d; | |||
| 1075 | // If it has incoming nodes, check whether these are on stream 0. | |||
| 1076 | if (incomings[idx].outgoings && incomings[idx].outgoings->rnum) | |||
| 1077 | { | |||
| 1078 | int flag = 0; | |||
| 1079 | const ccv_array_t* const incoming = incomings[idx].outgoings; | |||
| 1080 | for (j = 0; !flag && j < incoming->rnum; j++) | |||
| 1081 | { | |||
| 1082 | const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)-> rsize * (size_t)(j))); | |||
| 1083 | for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++) | |||
| 1084 | flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx ])._inline_streams : (schd_info[incoming_idx])._heap_streams)[k] == 0); // If this is the default stream, we already have a good start. | |||
| 1085 | } | |||
| 1086 | if (flag) | |||
| 1087 | continue; | |||
| 1088 | } | |||
| 1089 | for (j = 0; j < schd_info[idx].stream_size; j++) | |||
| 1090 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[j] != 0) // If this is not the default stream, we need explicit begin signal to start. | |||
| 1091 | ++graph_stream_1_size; | |||
| 1092 | } | |||
| 1093 | if (graph_stream_1_size > 0) | |||
| 1094 | { | |||
| 1095 | schedule->stream_1s = ccmallocmalloc(sizeof(int) * graph_stream_1_size); | |||
| 1096 | graph_stream_1_size = 0; | |||
| 1097 | for (i = 0; i < source_size; i++) | |||
| 1098 | { | |||
| 1099 | const int idx = sources[i].d; | |||
| 1100 | // If it has incoming nodes, check whether these are on stream 0. | |||
| 1101 | if (incomings[idx].outgoings && incomings[idx].outgoings->rnum) | |||
| 1102 | { | |||
| 1103 | int flag = 0; | |||
| 1104 | const ccv_array_t* const incoming = incomings[idx].outgoings; | |||
| 1105 | for (j = 0; !flag && j < incoming->rnum; j++) | |||
| 1106 | { | |||
| 1107 | const int incoming_idx = *(int*)ccv_array_get(incoming, j)((void*)(((char*)((incoming)->data)) + (size_t)(incoming)-> rsize * (size_t)(j))); | |||
| 1108 | for (k = 0; !flag && k < schd_info[incoming_idx].stream_size; k++) | |||
| 1109 | flag = (SCHEDULE_STREAMS(schd_info[incoming_idx])((schd_info[incoming_idx]).stream_size <= 1 ? (schd_info[incoming_idx ])._inline_streams : (schd_info[incoming_idx])._heap_streams)[k] == 0); // If this is the default stream, we already have a good start. | |||
| 1110 | } | |||
| 1111 | if (flag) | |||
| 1112 | continue; | |||
| 1113 | } | |||
| 1114 | for (j = 0; j < schd_info[idx].stream_size; j++) | |||
| 1115 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[j] != 0) // If this is not the default stream, we need explicit begin signal to start. | |||
| 1116 | { | |||
| 1117 | const int stream_idx = SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[j]; | |||
| 1118 | int flag = 0; | |||
| 1119 | for (k = 0; !flag && k < graph_stream_1_size; k++) | |||
| 1120 | flag = (stream_idx == schedule->stream_1s[k]); | |||
| 1121 | if (!flag) | |||
| 1122 | schedule->stream_1s[graph_stream_1_size++] = stream_idx; | |||
| 1123 | } | |||
| 1124 | } | |||
| 1125 | schedule->stream_1_size = graph_stream_1_size; | |||
| 1126 | } | |||
| 1127 | for (i = 0; i < exec_info_size; i++) | |||
| 1128 | if (incomings[i].outgoings) | |||
| 1129 | ccv_array_free(incomings[i].outgoings); | |||
| 1130 | ccfreefree(incomings); | |||
| 1131 | int graph_wait_size = 0; | |||
| 1132 | for (i = 0; i < destination_size; i++) | |||
| 1133 | { | |||
| 1134 | const int idx = destinations[i].d; | |||
| 1135 | for (j = 0; j < schd_info[idx].stream_size; j++) | |||
| 1136 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[j] != 0) // If this exec_info doesn't end with default stream, we need to wait. | |||
| 1137 | ++graph_wait_size; | |||
| 1138 | } | |||
| 1139 | if (graph_wait_size > 0) | |||
| 1140 | { | |||
| 1141 | schedule->waits = ccmallocmalloc(sizeof(int) * graph_wait_size); | |||
| 1142 | graph_wait_size = 0; | |||
| 1143 | for (i = 0; i < destination_size; i++) | |||
| 1144 | { | |||
| 1145 | const int idx = destinations[i].d; | |||
| 1146 | for (j = 0; j < schd_info[idx].stream_size; j++) | |||
| 1147 | if (SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[j] != 0) // If this exec_info doesn't end with default stream, we need to wait. | |||
| 1148 | { | |||
| 1149 | ccv_nnc_stream_data_t* const default_stream_data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, 0)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(0))); | |||
| 1150 | if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[j] < 0) | |||
| 1151 | SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[j] = signal_size++; | |||
| 1152 | else if (default_stream_data->signal_set && ccv_array_find_int(default_stream_data->signal_set, SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[j])) | |||
| 1153 | continue; | |||
| 1154 | schedule->waits[graph_wait_size++] = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[j]; | |||
| 1155 | } | |||
| 1156 | } | |||
| 1157 | schedule->wait_size = graph_wait_size; | |||
| 1158 | } | |||
| 1159 | for (i = 0; i < stream_data->rnum; i++) | |||
| 1160 | { | |||
| 1161 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(i))); | |||
| 1162 | if (data->signal_set) | |||
| 1163 | ccv_array_free(data->signal_set); | |||
| 1164 | assert(data->command_set)((void) sizeof ((data->command_set) ? 1 : 0), __extension__ ({ if (data->command_set) ; else __assert_fail ("data->command_set" , "ccv_nnc_graph.c", 1164, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1165 | ccv_array_free(data->command_set); | |||
| 1166 | } | |||
| 1167 | // Allocate streams & signals | |||
| 1168 | int default_stream_type = stream_type; | |||
| 1169 | CCV_STREAM_SET_DEVICE_ID(default_stream_type, default_data->device_id)(default_stream_type) = (((default_stream_type) & ~0xfff00 ) | (((default_data->device_id) & 0xfff) << 8)); | |||
| 1170 | if (root_schedule) | |||
| 1171 | { | |||
| 1172 | assert(!graph->streams)((void) sizeof ((!graph->streams) ? 1 : 0), __extension__ ( { if (!graph->streams) ; else __assert_fail ("!graph->streams" , "ccv_nnc_graph.c", 1172, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1173 | graph->stream_size = stream_data->rnum; | |||
| 1174 | graph->streams = (ccv_nnc_stream_context_t**)ccmallocmalloc(sizeof(ccv_nnc_stream_context_t*) * graph->stream_size); | |||
| 1175 | graph->block_stream_tasks = (co_routine_t**)cccalloccalloc(graph->stream_size, sizeof(co_routine_t*)); | |||
| 1176 | if (stream_context) | |||
| 1177 | graph->streams[0] = stream_context; | |||
| 1178 | for (i = (stream_context ? 1 : 0); i < stream_data->rnum; i++) | |||
| 1179 | { | |||
| 1180 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(i))); | |||
| 1181 | int type = stream_type; | |||
| 1182 | CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) & 0xfff) << 8)); | |||
| 1183 | graph->streams[i] = ccv_nnc_stream_context_new(type); | |||
| 1184 | } | |||
| 1185 | graph->signal_size = signal_size; | |||
| 1186 | graph->signals = (ccv_nnc_stream_signal_t**)cccalloccalloc(signal_size, sizeof(ccv_nnc_stream_signal_t*)); | |||
| 1187 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1188 | for (i = 0; i < schd_info[idx].stream_size; i++) | |||
| 1189 | if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i] >= 0) | |||
| 1190 | { | |||
| 1191 | const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i]; | |||
| 1192 | if (!graph->signals[signal]) | |||
| 1193 | { | |||
| 1194 | const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams )[i]))); | |||
| 1195 | int type = stream_type; | |||
| 1196 | CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) & 0xfff) << 8)); | |||
| 1197 | graph->signals[signal] = ccv_nnc_stream_signal_new(type); | |||
| 1198 | } | |||
| 1199 | } | |||
| 1200 | } ccv_nnc_graph_visit_endfor} } | |||
| 1201 | } else { | |||
| 1202 | assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ ( { if (graph->streams) ; else __assert_fail ("graph->streams" , "ccv_nnc_graph.c", 1202, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1203 | assert(graph->stream_size >= stream_data->rnum)((void) sizeof ((graph->stream_size >= stream_data-> rnum) ? 1 : 0), __extension__ ({ if (graph->stream_size >= stream_data->rnum) ; else __assert_fail ("graph->stream_size >= stream_data->rnum" , "ccv_nnc_graph.c", 1203, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1204 | // Find streams to proper allocated stream based on the type we need. | |||
| 1205 | int* const stream_idxs = (int*)ccmallocmalloc(sizeof(int) * (stream_data->rnum + signal_size)); | |||
| 1206 | uint64_t* const stream_used = (uint64_t*)cccalloccalloc(((graph->stream_size + 63) >> 6) + ((graph->signal_size + 63) >> 6), sizeof(uint64_t)); | |||
| 1207 | for (i = 0; i < stream_data->rnum; i++) | |||
| 1208 | { | |||
| 1209 | ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, i)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(i))); | |||
| 1210 | int type = stream_type; | |||
| 1211 | CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) & 0xfff) << 8)); | |||
| 1212 | for (j = 0; j < graph->stream_size; j++) | |||
| 1213 | if (!(stream_used[j >> 6] & ((uint64_t)1 << (j & 63)))) | |||
| 1214 | { | |||
| 1215 | const int stream_type = ccv_nnc_stream_context_type(graph->streams[j]); | |||
| 1216 | if (stream_type == type) | |||
| 1217 | { | |||
| 1218 | stream_idxs[i] = j; | |||
| 1219 | stream_used[j >> 6] |= ((uint64_t)1 << (j & 63)); | |||
| 1220 | break; | |||
| 1221 | } | |||
| 1222 | } | |||
| 1223 | } | |||
| 1224 | assert(graph->signal_size >= signal_size)((void) sizeof ((graph->signal_size >= signal_size) ? 1 : 0), __extension__ ({ if (graph->signal_size >= signal_size ) ; else __assert_fail ("graph->signal_size >= signal_size" , "ccv_nnc_graph.c", 1224, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1225 | // Find signals to proper allocated signal based on the type we need. | |||
| 1226 | int* const signal_idxs = stream_idxs + stream_data->rnum; | |||
| 1227 | uint64_t* const signal_used = stream_used + ((graph->stream_size + 63) >> 6); | |||
| 1228 | for (i = 0; i < signal_size; i++) | |||
| 1229 | signal_idxs[i] = -1; | |||
| 1230 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1231 | for (i = 0; i < schd_info[idx].stream_size; i++) | |||
| 1232 | if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i] >= 0) | |||
| 1233 | { | |||
| 1234 | const int signal = SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i]; | |||
| 1235 | if (signal_idxs[signal] < 0) | |||
| 1236 | { | |||
| 1237 | const ccv_nnc_stream_data_t* const data = (ccv_nnc_stream_data_t*)ccv_array_get(stream_data, SCHEDULE_STREAMS(schd_info[idx])[i])((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams )[i]))); | |||
| 1238 | int type = stream_type; | |||
| 1239 | CCV_STREAM_SET_DEVICE_ID(type, data->device_id)(type) = (((type) & ~0xfff00) | (((data->device_id) & 0xfff) << 8)); | |||
| 1240 | for (j = 0; j < graph->signal_size; j++) | |||
| 1241 | if (!(signal_used[j >> 6] & ((uint64_t)1 << (j & 63)))) | |||
| 1242 | { | |||
| 1243 | const int signal_type = ccv_nnc_stream_signal_type(graph->signals[j]); | |||
| 1244 | if (signal_type == type) | |||
| 1245 | { | |||
| 1246 | signal_idxs[signal] = j; | |||
| 1247 | signal_used[j >> 6] |= ((uint64_t)1 << (j & 63)); | |||
| 1248 | break; | |||
| 1249 | } | |||
| 1250 | } | |||
| 1251 | } | |||
| 1252 | } | |||
| 1253 | } ccv_nnc_graph_visit_endfor} } | |||
| 1254 | // Now rebind streams and signals from the schedule. | |||
| 1255 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1256 | for (i = 0; i < schd_info[idx].stream_size; i++) | |||
| 1257 | { | |||
| 1258 | SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i] = stream_idxs[SCHEDULE_STREAMS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_streams : (schd_info[idx])._heap_streams)[i]]; | |||
| 1259 | if (SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i] >= 0) | |||
| 1260 | SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i] = signal_idxs[SCHEDULE_SIGNALS(schd_info[idx])((schd_info[idx]).stream_size <= 1 ? (schd_info[idx])._inline_signals : (schd_info[idx])._heap_signals)[i]]; | |||
| 1261 | } | |||
| 1262 | for (i = 0; i < schd_info[idx].wait_size; i++) | |||
| 1263 | schd_info[idx].waits[i] = signal_idxs[schd_info[idx].waits[i]]; | |||
| 1264 | } ccv_nnc_graph_visit_endfor} } | |||
| 1265 | for (i = 0; i < schedule->stream_1_size; i++) | |||
| 1266 | schedule->stream_1s[i] = stream_idxs[schedule->stream_1s[i]]; | |||
| 1267 | for (i = 0; i < schedule->wait_size; i++) | |||
| 1268 | schedule->waits[i] = signal_idxs[schedule->waits[i]]; | |||
| 1269 | // Rebind who is the stream 0 (default stream). | |||
| 1270 | schedule->stream_0 = stream_idxs[0]; | |||
| 1271 | ccfreefree(stream_used); | |||
| 1272 | ccfreefree(stream_idxs); | |||
| 1273 | } | |||
| 1274 | assert(graph->streams)((void) sizeof ((graph->streams) ? 1 : 0), __extension__ ( { if (graph->streams) ; else __assert_fail ("graph->streams" , "ccv_nnc_graph.c", 1274, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1275 | ccv_nnc_graph_visit_free(visit); | |||
| 1276 | for (i = 0; i < signal_size; i++) | |||
| 1277 | { assert(graph->signals[i])((void) sizeof ((graph->signals[i]) ? 1 : 0), __extension__ ({ if (graph->signals[i]) ; else __assert_fail ("graph->signals[i]" , "ccv_nnc_graph.c", 1277, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
| 1278 | if (schedule->stream_1_size) | |||
| 1279 | schedule->begin = ccv_nnc_stream_signal_new(default_stream_type); | |||
| 1280 | schedule->end = ccv_nnc_stream_signal_new(default_stream_type); | |||
| 1281 | // Do this recursively for its sub graphs. | |||
| 1282 | if (graph->sub_graphs) | |||
| 1283 | for (i = 0; i < graph->sub_graphs->rnum; i++) | |||
| 1284 | { | |||
| 1285 | ccv_nnc_graph_t* const sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(i))); | |||
| 1286 | if (sub_graph && !sub_graph->default_schedule) | |||
| 1287 | { | |||
| 1288 | const int exec_idx = sub_graph->exec_idx - 1; | |||
| 1289 | assert(schd_info[exec_idx].stream_size == 1)((void) sizeof ((schd_info[exec_idx].stream_size == 1) ? 1 : 0 ), __extension__ ({ if (schd_info[exec_idx].stream_size == 1) ; else __assert_fail ("schd_info[exec_idx].stream_size == 1" , "ccv_nnc_graph.c", 1289, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1290 | const int stream_idx = SCHEDULE_STREAMS(schd_info[exec_idx])((schd_info[exec_idx]).stream_size <= 1 ? (schd_info[exec_idx ])._inline_streams : (schd_info[exec_idx])._heap_streams)[0]; | |||
| 1291 | const int device_id = ((ccv_nnc_stream_data_t*)ccv_array_get(stream_data, stream_idx)((void*)(((char*)((stream_data)->data)) + (size_t)(stream_data )->rsize * (size_t)(stream_idx))))->device_id; | |||
| 1292 | sub_graph->default_schedule = _ccv_nnc_graph_static_schedule_new(sub_graph, stream_type, device_id, max_stream_count, graph->streams[stream_idx], 0, 0, 0, 0); | |||
| 1293 | } | |||
| 1294 | } | |||
| 1295 | ccv_array_free(stream_data); | |||
| 1296 | return schedule; | |||
| 1297 | } | |||
| 1298 | void ccv_nnc_graph_set_default_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type, const int max_stream_count) | |||
| 1299 | { | |||
| 1300 | assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({ if (graph->p == 0) ; else __assert_fail ("graph->p == 0" , "ccv_nnc_graph.c", 1300, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1301 | if (graph->default_schedule) | |||
| 1302 | ccv_nnc_graph_static_schedule_free(graph->default_schedule); | |||
| 1303 | graph->default_schedule = _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, max_stream_count, 0, 0, 0, 0, 0); | |||
| 1304 | } | |||
| 1305 | ||||
| 1306 | ccv_nnc_graph_static_schedule_t* ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int max_stream_count, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size) | |||
| 1307 | { | |||
| 1308 | assert(graph->p == 0)((void) sizeof ((graph->p == 0) ? 1 : 0), __extension__ ({ if (graph->p == 0) ; else __assert_fail ("graph->p == 0" , "ccv_nnc_graph.c", 1308, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 1309 | return _ccv_nnc_graph_static_schedule_new(graph, stream_type, -1, max_stream_count, 0, sources, source_size, destinations, destination_size); | |||
| 1310 | } | |||
| 1311 | ||||
| 1312 | ccv_nnc_stream_context_t* ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph) | |||
| 1313 | { | |||
| 1314 | if (graph->streams && graph->stream_size > 0) | |||
| 1315 | return graph->streams[0]; | |||
| 1316 | return 0; | |||
| 1317 | } | |||
| 1318 | ||||
| 1319 | static void _ccv_nnc_graph_dot_exec(const int index, const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, ccv_nnc_stream_context_t** const streams, const int flags, FILE* out) | |||
| 1320 | { | |||
| 1321 | if (flags == CCV_NNC_LONG_DOT_GRAPH) | |||
| 1322 | fputc('{', out); | |||
| 1323 | fprintf(out, "node%d", index); | |||
| 1324 | if (flags == CCV_NNC_LONG_DOT_GRAPH) | |||
| 1325 | { | |||
| 1326 | fputs("|Command: ", out); | |||
| 1327 | fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out); | |||
| 1328 | if (schd_info) | |||
| 1329 | { | |||
| 1330 | if (schd_info->stream_size > 0) | |||
| 1331 | { | |||
| 1332 | int i, flag = 0; | |||
| 1333 | fputs("|Stream: ", out); | |||
| 1334 | for (i = 0; i < schd_info->stream_size; i++) | |||
| 1335 | { | |||
| 1336 | const int device_id = streams ? CCV_TENSOR_GET_DEVICE_ID(streams[SCHEDULE_STREAMS(*schd_info)[i]]->type)(((streams[((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams : (*schd_info)._heap_streams)[i]]->type) & 0xfff00) >> 8) : 0; | |||
| 1337 | if (i == 0) | |||
| 1338 | fprintf(out, "%d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams : (*schd_info)._heap_streams)[i], device_id); | |||
| 1339 | else | |||
| 1340 | fprintf(out, ", %d (d%d)", SCHEDULE_STREAMS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_streams : (*schd_info)._heap_streams)[i], device_id); | |||
| 1341 | } | |||
| 1342 | for (i = 0; i < schd_info->stream_size; i++) | |||
| 1343 | if (SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals : (*schd_info)._heap_signals)[i] >= 0) | |||
| 1344 | { | |||
| 1345 | if (!flag) | |||
| 1346 | { | |||
| 1347 | flag = 1; | |||
| 1348 | fprintf(out, "|Signal: %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals : (*schd_info)._heap_signals)[i]); | |||
| 1349 | } else | |||
| 1350 | fprintf(out, ", %d", SCHEDULE_SIGNALS(*schd_info)((*schd_info).stream_size <= 1 ? (*schd_info)._inline_signals : (*schd_info)._heap_signals)[i]); | |||
| 1351 | } | |||
| 1352 | } | |||
| 1353 | if (schd_info->wait_size > 0) | |||
| 1354 | { | |||
| 1355 | fputs("|Wait: ", out); | |||
| 1356 | int i; | |||
| 1357 | for (i = 0; i < schd_info->wait_size - 1; i++) | |||
| 1358 | fprintf(out, "%d, ", schd_info->waits[i]); | |||
| 1359 | fprintf(out, "%d", schd_info->waits[schd_info->wait_size - 1]); | |||
| 1360 | } | |||
| 1361 | } | |||
| 1362 | fputc('}', out); | |||
| 1363 | } | |||
| 1364 | } | |||
| 1365 | ||||
| 1366 | static void _ccv_nnc_graph_dot_tensor(const int index, const ccv_nnc_tensor_t* const tensor, const int zone, const int flags, const int depth, FILE* out) | |||
| 1367 | { | |||
| 1368 | // if it has an alias pointer, or, it is a long form. | |||
| 1369 | if (flags == CCV_NNC_LONG_DOT_GRAPH) | |||
| 1370 | fputc('{', out); | |||
| 1371 | const int is_tensor_view = CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW); | |||
| 1372 | if (is_tensor_view) | |||
| 1373 | fprintf(out, "tensorview%d", index); | |||
| 1374 | else | |||
| 1375 | fprintf(out, "tensor%d", index); | |||
| 1376 | int i; | |||
| 1377 | for (i = 0; i < depth; i++) // Print subscription to denote depth. | |||
| 1378 | fputc('\'', out); | |||
| 1379 | if (CCV_GET_TAPE_ALLOC(tensor->type)((tensor->type) & CCV_TAPE_ALLOC)) | |||
| 1380 | fputs(" (t)", out); | |||
| 1381 | if (flags == CCV_NNC_LONG_DOT_GRAPH) | |||
| 1382 | { | |||
| 1383 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8); | |||
| 1384 | fprintf(out, "|d%d|zone%d", device_id, zone); | |||
| 1385 | for (i = 0; i < depth; i++) // Print subscription to denote depth. | |||
| 1386 | fputc('\'', out); | |||
| 1387 | uintptr_t aptr = (uintptr_t)tensor->data.u8; | |||
| 1388 | size_t tensor_size; | |||
| 1389 | if (is_tensor_view) | |||
| 1390 | tensor_size = (size_t)((ccv_nnc_tensor_view_t*)(tensor))->stride[0] * tensor->info.dim[0] * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >> 12]; | |||
| 1391 | else | |||
| 1392 | tensor_size = ccv_nnc_dimension_count(tensor->info.dim) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >> 12]; | |||
| 1393 | // Print out the range as well. | |||
| 1394 | fprintf(out, "|{%#010x|%#010x}|%d", (uint32_t)aptr, (uint32_t)(aptr + tensor_size - 1), tensor->info.dim[0]); | |||
| 1395 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor->info.dim[i]; i++) | |||
| 1396 | fprintf(out, "x%d", tensor->info.dim[i]); | |||
| 1397 | fputc('}', out); | |||
| 1398 | } | |||
| 1399 | } | |||
| 1400 | ||||
| 1401 | typedef struct { | |||
| 1402 | int index; | |||
| 1403 | int name; | |||
| 1404 | int zone; | |||
| 1405 | uintptr_t tensor_ref; | |||
| 1406 | uintptr_t start_ptr; | |||
| 1407 | uintptr_t end_ptr; | |||
| 1408 | } ccv_nnc_tensor_dot_t; | |||
| 1409 | ||||
| 1410 | typedef struct { | |||
| 1411 | ccv_nnc_tensor_dot_t* dots; | |||
| 1412 | int* remap; | |||
| 1413 | int* rename_zone; | |||
| 1414 | int* rename_index; | |||
| 1415 | } ccv_nnc_tensor_dot_recovery_t; | |||
| 1416 | ||||
| 1417 | // First sort by start_ptr, then sort by tensor ptr (so that we will have the same tensor sorted to one cluster). | |||
| 1418 | #define less_than(i1, i2, aux) ((i1).start_ptr < (i2).start_ptr || ((i1).start_ptr == (i2).start_ptr && (i1).tensor_ref < (i2).tensor_ref)) | |||
| 1419 | static CCV_IMPLEMENT_QSORT(_ccv_nnc_tensor_dot_sort_by_ptr, ccv_nnc_tensor_dot_t, less_than)void _ccv_nnc_tensor_dot_sort_by_ptr(ccv_nnc_tensor_dot_t *array , size_t total, int aux) { int isort_thresh = 7; ccv_nnc_tensor_dot_t t; int sp = 0; struct { ccv_nnc_tensor_dot_t *lb; ccv_nnc_tensor_dot_t *ub; } stack[48]; if( total <= 1 ) return; stack[0].lb = array ; stack[0].ub = array + (total - 1); while( sp >= 0 ) { ccv_nnc_tensor_dot_t * left = stack[sp].lb; ccv_nnc_tensor_dot_t* right = stack[sp --].ub; for(;;) { int i, n = (int)(right - left) + 1, m; ccv_nnc_tensor_dot_t * ptr; ccv_nnc_tensor_dot_t* ptr2; if( n <= isort_thresh ) { insert_sort: for( ptr = left + 1; ptr <= right; ptr++ ) { for( ptr2 = ptr; ptr2 > left && less_than(ptr2[ 0],ptr2[-1], aux); ptr2--) (((t)) = ((ptr2[0])), ((ptr2[0])) = ((ptr2[-1])), ((ptr2[-1])) = ((t))); } break; } else { ccv_nnc_tensor_dot_t * left0; ccv_nnc_tensor_dot_t* left1; ccv_nnc_tensor_dot_t* right0 ; ccv_nnc_tensor_dot_t* right1; ccv_nnc_tensor_dot_t* pivot; ccv_nnc_tensor_dot_t * a; ccv_nnc_tensor_dot_t* b; ccv_nnc_tensor_dot_t* c; int swap_cnt = 0; left0 = left; right0 = right; pivot = left + (n/2); if( n > 40 ) { int d = n / 8; a = left, b = left + d, c = left + 2*d; left = less_than(*a, *b, aux) ? (less_than(*b, *c, aux ) ? b : (less_than(*a, *c, aux) ? c : a)) : (less_than(*c, *b , aux) ? b : (less_than(*a, *c, aux) ? a : c)); a = pivot - d , b = pivot, c = pivot + d; pivot = less_than(*a, *b, aux) ? ( less_than(*b, *c, aux) ? b : (less_than(*a, *c, aux) ? c : a) ) : (less_than(*c, *b, aux) ? b : (less_than(*a, *c, aux) ? a : c)); a = right - 2*d, b = right - d, c = right; right = less_than (*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than(*a, * c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than( *a, *c, aux) ? a : c)); } a = left, b = pivot, c = right; pivot = less_than(*a, *b, aux) ? (less_than(*b, *c, aux) ? b : (less_than (*a, *c, aux) ? c : a)) : (less_than(*c, *b, aux) ? b : (less_than (*a, *c, aux) ? a : c)); if( pivot != left0 ) { (((t)) = ((*pivot )), ((*pivot)) = ((*left0)), ((*left0)) = ((t))); pivot = left0 ; } left = left1 = left0 + 1; right = right1 = right0; for(;; ) { while( left <= right && !less_than(*pivot, *left , aux) ) { if( !less_than(*left, *pivot, aux) ) { if( left > left1 ) (((t)) = ((*left1)), ((*left1)) = ((*left)), ((*left )) = ((t))); swap_cnt = 1; left1++; } left++; } while( left <= right && !less_than(*right, *pivot, aux) ) { if( !less_than (*pivot, *right, aux) ) { if( right < right1 ) (((t)) = (( *right1)), ((*right1)) = ((*right)), ((*right)) = ((t))); swap_cnt = 1; right1--; } right--; } if( left > right ) break; ((( t)) = ((*left)), ((*left)) = ((*right)), ((*right)) = ((t))); swap_cnt = 1; left++; right--; } if( swap_cnt == 0 ) { left = left0, right = right0; goto insert_sort; } n = ({ typeof ((int )(left1 - left0)) _a = ((int)(left1 - left0)); typeof ((int)( left - left1)) _b = ((int)(left - left1)); (_a < _b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left0[i])), ( (left0[i])) = ((left[i-n])), ((left[i-n])) = ((t))); n = ({ typeof ((int)(right0 - right1)) _a = ((int)(right0 - right1)); typeof ((int)(right1 - right)) _b = ((int)(right1 - right)); (_a < _b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left [i])), ((left[i])) = ((right0[i-n+1])), ((right0[i-n+1])) = ( (t))); n = (int)(left - left1); m = (int)(right1 - right); if ( n > 1 ) { if( m > 1 ) { if( n > m ) { stack[++sp]. lb = left0; stack[sp].ub = left0 + n - 1; left = right0 - m + 1, right = right0; } else { stack[++sp].lb = right0 - m + 1; stack[sp].ub = right0; left = left0, right = left0 + n - 1; } } else left = left0, right = left0 + n - 1; } else if( m > 1 ) left = right0 - m + 1, right = right0; else break; } } } } | |||
| 1420 | #undef less_than | |||
| 1421 | ||||
| 1422 | static int _ccv_nnc_graph_dot_tensor_multiview_count(const ccv_nnc_tensor_multiview_t* const mv) | |||
| 1423 | { | |||
| 1424 | if (!CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) | |||
| 1425 | return 1; | |||
| 1426 | const int count = mv->kind + mv->repeat; | |||
| 1427 | int i, c = 0; | |||
| 1428 | for (i = 0; i < count; i++) | |||
| 1429 | c += _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i]); | |||
| 1430 | return c; | |||
| 1431 | } | |||
| 1432 | ||||
| 1433 | static void _ccv_nnc_graph_dot_tensor_multiview_tensor_dots(const ccv_nnc_tensor_multiview_t* const mv, ccv_nnc_tensor_dot_t* const tensor_dots, int* tensor_index) | |||
| 1434 | { | |||
| 1435 | const int count = mv->kind + mv->repeat; | |||
| 1436 | int i; | |||
| 1437 | for (i = 0; i < count; i++) | |||
| 1438 | if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)-> _inline_data)[i])) & CCV_TENSOR_MULTIVIEW)) | |||
| 1439 | _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i], tensor_dots, tensor_index); | |||
| 1440 | else { | |||
| 1441 | tensor_dots[*tensor_index].name = *tensor_index; | |||
| 1442 | tensor_dots[*tensor_index].start_ptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i]->data.u8; | |||
| 1443 | // Because tv's pointer will get updated, it is not correct in this case to have one tensor_ref. | |||
| 1444 | tensor_dots[*tensor_index].tensor_ref = tensor_dots[*tensor_index].start_ptr; | |||
| 1445 | const size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data)[i]->type) & 0xFF000) >> 12]; | |||
| 1446 | tensor_dots[*tensor_index].end_ptr = tensor_dots[*tensor_index].start_ptr + dim_size - 1; | |||
| 1447 | ++(*tensor_index); | |||
| 1448 | } | |||
| 1449 | } | |||
| 1450 | ||||
| 1451 | static ccv_nnc_tensor_dot_recovery_t _ccv_nnc_graph_tensor_dot_recovery(const ccv_nnc_graph_t* const graph) | |||
| 1452 | { | |||
| 1453 | int i, j; | |||
| 1454 | // Recover tensor relationships for all tensors referenced in the graph. | |||
| 1455 | // Most notably, we have to give these indexes, and find if they point to | |||
| 1456 | // the same memory region, and whether they overlap. These information | |||
| 1457 | // are lost since we converted from symbolic form to the execution form. | |||
| 1458 | // and here we do our best to recover because that is easier to understand | |||
| 1459 | // if we want to present the graph visually (also, we don't want to put this | |||
| 1460 | // information into the tensor or execution graph to avoid overhead, thus, | |||
| 1461 | // recovering is the best we can do). | |||
| 1462 | int tensor_count = 0; | |||
| 1463 | for (i = 0; i < graph->exec_info->rnum; i++) | |||
| 1464 | { | |||
| 1465 | ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))); | |||
| 1466 | for (j = 0; j < exec_info->input_size; j++) | |||
| 1467 | if (exec_info->inputs[j]) | |||
| 1468 | tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[j])((*(int*)(exec_info->inputs[j])) & CCV_TENSOR_MULTIVIEW ) ? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->inputs[j]) : 1; | |||
| 1469 | for (j = 0; j < exec_info->output_size; j++) | |||
| 1470 | if (exec_info->outputs[j]) | |||
| 1471 | tensor_count += CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[j])((*(int*)(exec_info->outputs[j])) & CCV_TENSOR_MULTIVIEW ) ? _ccv_nnc_graph_dot_tensor_multiview_count((ccv_nnc_tensor_multiview_t*)exec_info->outputs[j]) : 1; | |||
| 1472 | } | |||
| 1473 | ccv_nnc_tensor_dot_t* tensor_dots = tensor_count > 0 ? (ccv_nnc_tensor_dot_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_dot_t) * tensor_count) : 0; | |||
| 1474 | int k = 0; | |||
| 1475 | for (i = 0; i < graph->exec_info->rnum; i++) | |||
| 1476 | { | |||
| 1477 | ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))); | |||
| 1478 | for (j = 0; j
| |||
| 1479 | { | |||
| 1480 | ccv_nnc_tensor_t* tensor = exec_info->inputs[j]; | |||
| 1481 | if (!tensor) | |||
| 1482 | continue; | |||
| 1483 | if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW)) | |||
| 1484 | _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k); | |||
| 1485 | else { | |||
| 1486 | tensor_dots[k].name = k; | |||
| 1487 | tensor_dots[k].tensor_ref = (uintptr_t)tensor; | |||
| 1488 | tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8; | |||
| 1489 | size_t tensor_size; | |||
| 1490 | if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW)) | |||
| 1491 | tensor_size = (size_t)((ccv_nnc_tensor_view_t*)(tensor))->stride[0] * tensor->info.dim[0] * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >> 12]; | |||
| 1492 | else | |||
| 1493 | tensor_size = ccv_nnc_dimension_count(tensor->info.dim) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >> 12]; | |||
| 1494 | tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + tensor_size - 1; | |||
| 1495 | ++k; | |||
| 1496 | } | |||
| 1497 | } | |||
| 1498 | for (j = 0; j < exec_info->output_size; j++) | |||
| 1499 | { | |||
| 1500 | ccv_nnc_tensor_t* tensor = exec_info->outputs[j]; | |||
| 1501 | if (!tensor
| |||
| 1502 | continue; | |||
| 1503 | if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW)) | |||
| 1504 | _ccv_nnc_graph_dot_tensor_multiview_tensor_dots((ccv_nnc_tensor_multiview_t*)tensor, tensor_dots, &k); | |||
| 1505 | else { | |||
| 1506 | tensor_dots[k].name = k; | |||
| 1507 | tensor_dots[k].tensor_ref = (uintptr_t)tensor; | |||
| 1508 | tensor_dots[k].start_ptr = (uintptr_t)tensor->data.u8; | |||
| 1509 | size_t tensor_size; | |||
| 1510 | if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW)) | |||
| 1511 | tensor_size = (size_t)((ccv_nnc_tensor_view_t*)(tensor))->stride[0] * tensor->info.dim[0] * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >> 12]; | |||
| 1512 | else | |||
| 1513 | tensor_size = ccv_nnc_dimension_count(tensor->info.dim) * CCV_GET_DATA_TYPE_SIZE(tensor->type)_ccv_get_data_type_size[((tensor->type) & 0xFF000) >> 12]; | |||
| 1514 | tensor_dots[k].end_ptr = tensor_dots[k].start_ptr + tensor_size - 1; | |||
| 1515 | ++k; | |||
| 1516 | } | |||
| 1517 | } | |||
| 1518 | } | |||
| 1519 | tensor_count = k; // We may over count, now shrink. | |||
| 1520 | // To group overlap memory into one zone, we sort it by start ptr first (secondary by the tensor pointer). | |||
| 1521 | _ccv_nnc_tensor_dot_sort_by_ptr(tensor_dots, tensor_count, 0); | |||
| 1522 | int index = 0, zone = 0; | |||
| 1523 | uintptr_t tensor_ref = tensor_count > 0 ? tensor_dots[0].tensor_ref : 0; | |||
| ||||
| 1524 | uintptr_t end_ptr = tensor_count > 0 ? tensor_dots[0].end_ptr : 0; | |||
| 1525 | // Then, it is trivial, we go by end ptr. If the next start ptr is still within the end ptr (start ptr <= end ptr), | |||
| 1526 | // they are the same zone. | |||
| 1527 | for (i = 0; i < tensor_count; i++) | |||
| 1528 | { | |||
| 1529 | if (tensor_dots[i].tensor_ref != tensor_ref) | |||
| 1530 | { | |||
| 1531 | tensor_ref = tensor_dots[i].tensor_ref; | |||
| 1532 | ++index; | |||
| 1533 | } | |||
| 1534 | if (tensor_dots[i].start_ptr > end_ptr) | |||
| 1535 | { | |||
| 1536 | end_ptr = ccv_max(end_ptr, tensor_dots[i].end_ptr)({ typeof (end_ptr) _a = (end_ptr); typeof (tensor_dots[i].end_ptr ) _b = (tensor_dots[i].end_ptr); (_a > _b) ? _a : _b; }); | |||
| 1537 | ++zone; | |||
| 1538 | } | |||
| 1539 | tensor_dots[i].index = index; | |||
| 1540 | tensor_dots[i].zone = zone; | |||
| 1541 | } | |||
| 1542 | // We already have index and zone assigned, but the problem is that these are not very human interpretable (because | |||
| 1543 | // it follows the pointer from low to high, not the tensor creation order). The following code renamed both the index | |||
| 1544 | // and the zone so that it is much more understandable. | |||
| 1545 | const int index_count = index + 1; | |||
| 1546 | const int zone_count = zone + 1; | |||
| 1547 | int* remap = (int*)ccmallocmalloc(sizeof(int) * (tensor_count + index_count + zone_count)); | |||
| 1548 | int* rename_index = remap + tensor_count; | |||
| 1549 | int* rename_zone = rename_index + index_count; | |||
| 1550 | for (i = 0; i < tensor_count; i++) | |||
| 1551 | remap[tensor_dots[i].name] = i; | |||
| 1552 | for (i = 0; i < index_count; i++) | |||
| 1553 | rename_index[i] = -1; | |||
| 1554 | for (i = 0; i < zone_count; i++) | |||
| 1555 | rename_zone[i] = -1; | |||
| 1556 | index = 0; | |||
| 1557 | zone = 0; | |||
| 1558 | for (i = 0; i < tensor_count; i++) | |||
| 1559 | { | |||
| 1560 | ccv_nnc_tensor_dot_t* tensor_dot = tensor_dots + remap[i]; | |||
| 1561 | if (rename_index[tensor_dot->index] == -1) | |||
| 1562 | rename_index[tensor_dot->index] = index++; | |||
| 1563 | if (rename_zone[tensor_dot->zone] == -1) | |||
| 1564 | rename_zone[tensor_dot->zone] = zone++; | |||
| 1565 | } | |||
| 1566 | ccv_nnc_tensor_dot_recovery_t recovery = { | |||
| 1567 | .dots = tensor_dots, | |||
| 1568 | .remap = remap, | |||
| 1569 | .rename_index = rename_index, | |||
| 1570 | .rename_zone = rename_zone, | |||
| 1571 | }; | |||
| 1572 | return recovery; | |||
| 1573 | } | |||
| 1574 | ||||
| 1575 | static void _ccv_nnc_graph_tensor_dot_recovery_free(const ccv_nnc_tensor_dot_recovery_t recovery) | |||
| 1576 | { | |||
| 1577 | ccfreefree(recovery.dots); | |||
| 1578 | ccfreefree(recovery.remap); | |||
| 1579 | } | |||
| 1580 | ||||
| 1581 | static void _ccv_nnc_graph_dot_tensor_multiview_one(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int depth, int* tensor_index, FILE* out) | |||
| 1582 | { | |||
| 1583 | const int count = mv->kind + mv->repeat; | |||
| 1584 | int i, j; | |||
| 1585 | fputs("|{", out); | |||
| 1586 | for (i = 0; i < count; i++) | |||
| 1587 | if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)-> _inline_data)[i])) & CCV_TENSOR_MULTIVIEW)) | |||
| 1588 | { | |||
| 1589 | fprintf(out, "{%d", i); | |||
| 1590 | if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0)) | |||
| 1591 | fputc('*', out); // Denotes that we loop on this. | |||
| 1592 | _ccv_nnc_graph_dot_tensor_multiview_one((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i], recovery, depth, tensor_index, out); | |||
| 1593 | if (i == count - 1) | |||
| 1594 | fputc('}', out); | |||
| 1595 | else | |||
| 1596 | fputs("}|", out); | |||
| 1597 | } else { | |||
| 1598 | fprintf(out, "{%d", i); | |||
| 1599 | if (mv->kind == CCV_NNC_MULTIVIEW_K0N || (mv->kind == CCV_NNC_MULTIVIEW_K1N && i > 0)) | |||
| 1600 | fputc('*', out); // Denotes that we loop on this. | |||
| 1601 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index]; | |||
| 1602 | fprintf(out, "|zone%d", recovery.rename_zone[tensor_dot->zone]); | |||
| 1603 | for (j = 0; j < depth; j++) | |||
| 1604 | fputc('\'', out); | |||
| 1605 | uintptr_t aptr = (uintptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i]->data.u8; | |||
| 1606 | // For the last one, we don't extend to full ainc. | |||
| 1607 | size_t dim_size = ccv_nnc_dimension_count(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data )[i]->info.dim) * CCV_GET_DATA_TYPE_SIZE(CCV_NNC_MULTIVIEW_DATA(mv)[i]->type)_ccv_get_data_type_size[((((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data)[i]->type) & 0xFF000) >> 12]; | |||
| 1608 | // Print out the range as well. | |||
| 1609 | fprintf(out, "|{%#010x|%#010x}", (uint32_t)aptr, (uint32_t)(aptr + dim_size - 1)); | |||
| 1610 | ++(*tensor_index); | |||
| 1611 | if (i == count - 1) | |||
| 1612 | fputc('}', out); | |||
| 1613 | else | |||
| 1614 | fputs("}|", out); | |||
| 1615 | } | |||
| 1616 | fputc('}', out); | |||
| 1617 | } | |||
| 1618 | ||||
| 1619 | static void _ccv_nnc_graph_dot_tensor_multiview(const ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, int* tensor_index, FILE* out) | |||
| 1620 | { | |||
| 1621 | // if it has an alias pointer, or, it is a long form. | |||
| 1622 | if (flags == CCV_NNC_LONG_DOT_GRAPH) | |||
| 1623 | fputc('{', out); | |||
| 1624 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[*tensor_index]; | |||
| 1625 | fprintf(out, "multiview%d", recovery.rename_index[tensor_dot->index]); | |||
| 1626 | int i; | |||
| 1627 | for (i = 0; i < depth; i++) // Print subscription to denote depth. | |||
| 1628 | fputc('\'', out); | |||
| 1629 | if (CCV_GET_TAPE_ALLOC(mv->type)((mv->type) & CCV_TAPE_ALLOC)) | |||
| 1630 | fputs(" (t)", out); | |||
| 1631 | if (flags == CCV_NNC_LONG_DOT_GRAPH) | |||
| 1632 | { | |||
| 1633 | _ccv_nnc_graph_dot_tensor_multiview_one(mv, recovery, depth, tensor_index, out); | |||
| 1634 | const ccv_nnc_tensor_t* root = (ccv_nnc_tensor_t*)mv; | |||
| 1635 | while (CCV_IS_TENSOR_MULTIVIEW(root)((*(int*)(root)) & CCV_TENSOR_MULTIVIEW)) | |||
| 1636 | root = CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)root)(((ccv_nnc_tensor_multiview_t*)root)->_heap_data ? ((ccv_nnc_tensor_multiview_t *)root)->_heap_data : ((ccv_nnc_tensor_multiview_t*)root)-> _inline_data)[0]; | |||
| 1637 | fprintf(out, "|%d", root->info.dim[0]); | |||
| 1638 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && root->info.dim[i]; i++) | |||
| 1639 | fprintf(out, "x%d", root->info.dim[i]); | |||
| 1640 | fputc('}', out); | |||
| 1641 | } else | |||
| 1642 | *tensor_index += _ccv_nnc_graph_dot_tensor_multiview_count(mv); | |||
| 1643 | } | |||
| 1644 | ||||
| 1645 | static void _ccv_nnc_graph_dot_node(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_graph_exec_schedule_t* const schd_info, const int exec_index, ccv_nnc_stream_context_t** const streams, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* const tensor_index) | |||
| 1646 | { | |||
| 1647 | fprintf(out, "node%d [shape=record,label=\"", exec_index); | |||
| 1648 | _ccv_nnc_graph_dot_exec(exec_index, exec_info, schd_info, streams, flags, out); | |||
| 1649 | int i; | |||
| 1650 | int k = *tensor_index; | |||
| 1651 | if (exec_info->input_size > 0) | |||
| 1652 | { | |||
| 1653 | fputs("|{Input", out); | |||
| 1654 | for (i = 0; i < exec_info->input_size; i++) | |||
| 1655 | if (exec_info->inputs[i]) | |||
| 1656 | { | |||
| 1657 | fputc('|', out); | |||
| 1658 | if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW )) | |||
| 1659 | _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out); | |||
| 1660 | else { | |||
| 1661 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k]; | |||
| 1662 | _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out); | |||
| 1663 | ++k; | |||
| 1664 | } | |||
| 1665 | } else | |||
| 1666 | fputs("|-", out); | |||
| 1667 | fputc('}', out); | |||
| 1668 | } | |||
| 1669 | if (exec_info->output_size > 0) | |||
| 1670 | { | |||
| 1671 | fputs("|{Output", out); | |||
| 1672 | for (i = 0; i < exec_info->output_size; i++) | |||
| 1673 | if (exec_info->outputs[i]) | |||
| 1674 | { | |||
| 1675 | fputc('|', out); | |||
| 1676 | if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW )) | |||
| 1677 | _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out); | |||
| 1678 | else { | |||
| 1679 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k]; | |||
| 1680 | _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out); | |||
| 1681 | ++k; | |||
| 1682 | } | |||
| 1683 | } else | |||
| 1684 | fputs("|-", out); | |||
| 1685 | fputc('}', out); | |||
| 1686 | } | |||
| 1687 | fputs("\"];\n", out); | |||
| 1688 | *tensor_index = k; | |||
| 1689 | } | |||
| 1690 | ||||
| 1691 | static void _ccv_nnc_graph_dot_while_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const ccv_nnc_graph_t* const while_graph, const int flags, const int depth, FILE* out, int* tensor_index) | |||
| 1692 | { | |||
| 1693 | int i; | |||
| 1694 | fprintf(out, "label=<<b>while%d </b>Command: ", exec_index); | |||
| 1695 | fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out); | |||
| 1696 | fputs(">;\n", out); | |||
| 1697 | fprintf(out, "label%d [shape=record,label=\"{", exec_index); | |||
| 1698 | int k = *tensor_index; | |||
| 1699 | if (exec_info->input_size > 0) | |||
| 1700 | { | |||
| 1701 | fputs("{Input|{", out); | |||
| 1702 | for (i = 0; i < exec_info->input_size; i++) | |||
| 1703 | { | |||
| 1704 | if (i > 0) | |||
| 1705 | fputc('|', out); | |||
| 1706 | if (exec_info->inputs[i]) | |||
| 1707 | { | |||
| 1708 | if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW )) | |||
| 1709 | _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out); | |||
| 1710 | else { | |||
| 1711 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k]; | |||
| 1712 | _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out); | |||
| 1713 | ++k; | |||
| 1714 | } | |||
| 1715 | } else | |||
| 1716 | fputc('-', out); | |||
| 1717 | } | |||
| 1718 | fputs("}}", out); | |||
| 1719 | } | |||
| 1720 | if (exec_info->output_size > 0) | |||
| 1721 | { | |||
| 1722 | if (exec_info->input_size > 0) | |||
| 1723 | fputs("|", out); | |||
| 1724 | fputs("{Output|{", out); | |||
| 1725 | for (i = 0; i < exec_info->output_size; i++) | |||
| 1726 | { | |||
| 1727 | if (i > 0) | |||
| 1728 | fputc('|', out); | |||
| 1729 | if (exec_info->outputs[i]) | |||
| 1730 | { | |||
| 1731 | if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW )) | |||
| 1732 | _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out); | |||
| 1733 | else { | |||
| 1734 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k]; | |||
| 1735 | _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out); | |||
| 1736 | ++k; | |||
| 1737 | } | |||
| 1738 | } else | |||
| 1739 | fputc('-', out); | |||
| 1740 | } | |||
| 1741 | fputs("}}", out); | |||
| 1742 | } | |||
| 1743 | fputs("}\"];\n", out); | |||
| 1744 | *tensor_index = k; | |||
| 1745 | } | |||
| 1746 | ||||
| 1747 | static void _ccv_nnc_graph_dot_case_of_label(const ccv_nnc_graph_exec_info_t* const exec_info, const int exec_index, const ccv_nnc_tensor_dot_recovery_t recovery, const int flags, const int depth, FILE* out, int* tensor_index) | |||
| 1748 | { | |||
| 1749 | int i; | |||
| 1750 | fprintf(out, "label=<<b>caseof%d </b>Command: ", exec_index); | |||
| 1751 | fputs(ccv_nnc_cmd_name(exec_info->cmd.cmd), out); | |||
| 1752 | fputs(">;\n", out); | |||
| 1753 | fprintf(out, "label%d [shape=record,label=\"{", exec_index); | |||
| 1754 | int k = *tensor_index; | |||
| 1755 | if (exec_info->input_size > 0) | |||
| 1756 | { | |||
| 1757 | fputs("{Input|{", out); | |||
| 1758 | for (i = 0; i < exec_info->input_size; i++) | |||
| 1759 | { | |||
| 1760 | if (i > 0) | |||
| 1761 | fputc('|', out); | |||
| 1762 | if (exec_info->inputs[i]) | |||
| 1763 | { | |||
| 1764 | if (CCV_IS_TENSOR_MULTIVIEW(exec_info->inputs[i])((*(int*)(exec_info->inputs[i])) & CCV_TENSOR_MULTIVIEW )) | |||
| 1765 | _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->inputs[i], recovery, flags, depth, &k, out); | |||
| 1766 | else { | |||
| 1767 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k]; | |||
| 1768 | _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->inputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out); | |||
| 1769 | ++k; | |||
| 1770 | } | |||
| 1771 | } else | |||
| 1772 | fputc('-', out); | |||
| 1773 | } | |||
| 1774 | fputs("}}", out); | |||
| 1775 | } | |||
| 1776 | if (exec_info->output_size > 0) | |||
| 1777 | { | |||
| 1778 | if (exec_info->input_size > 0) | |||
| 1779 | fputs("|", out); | |||
| 1780 | fputs("{Output|{", out); | |||
| 1781 | for (i = 0; i < exec_info->output_size; i++) | |||
| 1782 | { | |||
| 1783 | if (i > 0) | |||
| 1784 | fputc('|', out); | |||
| 1785 | if (exec_info->outputs[i]) | |||
| 1786 | { | |||
| 1787 | if (CCV_IS_TENSOR_MULTIVIEW(exec_info->outputs[i])((*(int*)(exec_info->outputs[i])) & CCV_TENSOR_MULTIVIEW )) | |||
| 1788 | _ccv_nnc_graph_dot_tensor_multiview((ccv_nnc_tensor_multiview_t*)exec_info->outputs[i], recovery, flags, depth, &k, out); | |||
| 1789 | else { | |||
| 1790 | const ccv_nnc_tensor_dot_t* const tensor_dot = recovery.dots + recovery.remap[k]; | |||
| 1791 | _ccv_nnc_graph_dot_tensor(recovery.rename_index[tensor_dot->index], exec_info->outputs[i], recovery.rename_zone[tensor_dot->zone], flags, depth, out); | |||
| 1792 | ++k; | |||
| 1793 | } | |||
| 1794 | } else | |||
| 1795 | fputc('-', out); | |||
| 1796 | } | |||
| 1797 | fputs("}}", out); | |||
| 1798 | } | |||
| 1799 | fputs("}\"];\n", out); | |||
| 1800 | *tensor_index = k; | |||
| 1801 | } | |||
| 1802 | ||||
| 1803 | static void _ccv_nnc_graph_dot_sub_graphs(const ccv_nnc_graph_exec_info_t* const exec_info, const ccv_nnc_tensor_dot_recovery_t p_recovery, const ccv_array_t* const sub_graphs, const int flags, const int depth, FILE* out, int* tensor_index, int* exec_index) | |||
| 1804 | { | |||
| 1805 | if (exec_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) | |||
| 1806 | { | |||
| 1807 | fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index); | |||
| 1808 | const ccv_nnc_graph_t* const while_graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[0] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs )->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info )->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0] - 1))); | |||
| 1809 | // Output this node info within this subgraph. | |||
| 1810 | _ccv_nnc_graph_dot_while_label(exec_info, *exec_index, p_recovery, while_graph, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index); | |||
| 1811 | } else if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) { | |||
| 1812 | fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\n", *exec_index, *exec_index); | |||
| 1813 | _ccv_nnc_graph_dot_case_of_label(exec_info, *exec_index, p_recovery, flags, depth - 1 /* Label all references to its level above. */, out, tensor_index); | |||
| 1814 | } | |||
| 1815 | ++(*exec_index); | |||
| 1816 | int p; | |||
| 1817 | for (p = 0; p < exec_info->graph_ref_size; p++) | |||
| 1818 | { | |||
| 1819 | if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) | |||
| 1820 | { | |||
| 1821 | fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=\"\"\n", *exec_index, *exec_index); | |||
| 1822 | ++(*exec_index); | |||
| 1823 | } | |||
| 1824 | const ccv_nnc_graph_t* const graph = *(ccv_nnc_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_info)[p] - 1)((void*)(((char*)((sub_graphs)->data)) + (size_t)(sub_graphs )->rsize * (size_t)(((exec_info)->_heap_graph_ref ? (exec_info )->_heap_graph_ref : (exec_info)->_inline_graph_ref)[p] - 1))); | |||
| 1825 | const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule; | |||
| 1826 | ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph); | |||
| 1827 | int i, j; | |||
| 1828 | int k = 0; | |||
| 1829 | int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum); | |||
| 1830 | // Output styles. | |||
| 1831 | for (i = 0; i
| |||
| 1832 | { | |||
| 1833 | node_id[i] = *exec_index; | |||
| 1834 | ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))); | |||
| 1835 | if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0]) | |||
| 1836 | _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, depth + 1, out, &k, exec_index); | |||
| 1837 | else { | |||
| 1838 | _ccv_nnc_graph_dot_node(exec_info, | |||
| 1839 | schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0, | |||
| 1840 | *exec_index, graph->streams, recovery, flags, depth, out, &k); | |||
| 1841 | ++(*exec_index); | |||
| 1842 | } | |||
| 1843 | } | |||
| 1844 | // Output connections. | |||
| 1845 | for (i = 0; i
| |||
| 1846 | { | |||
| 1847 | ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))); | |||
| 1848 | if (exec_info->outgoings) | |||
| 1849 | for (j = 0; j < exec_info->outgoings->rnum; j++) | |||
| 1850 | { | |||
| 1851 | const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t )(exec_info->outgoings)->rsize * (size_t)(j))); | |||
| 1852 | const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(outgoing_idx))); | |||
| 1853 | // If both are sub-graphs, have both tail and head specified. | |||
| 1854 | if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref : (outgoing_info)->_inline_graph_ref)[0]) | |||
| 1855 | fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]); | |||
| 1856 | else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref : (outgoing_info)->_inline_graph_ref)[0]) | |||
| 1857 | fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]); | |||
| 1858 | else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref : (outgoing_info)->_inline_graph_ref)[0]) | |||
| 1859 | fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]); | |||
| 1860 | else | |||
| 1861 | fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]); | |||
| 1862 | } | |||
| 1863 | } | |||
| 1864 | fputs("}\n", out); | |||
| 1865 | _ccv_nnc_graph_tensor_dot_recovery_free(recovery); | |||
| 1866 | ccfreefree(node_id); | |||
| 1867 | } | |||
| 1868 | // Extra subgraph cluster. | |||
| 1869 | if (exec_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) | |||
| 1870 | fputs("}\n", out); | |||
| 1871 | } | |||
| 1872 | ||||
| 1873 | void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out) | |||
| 1874 | { | |||
| 1875 | fputs("digraph G {\ncompound=true;\n", out); | |||
| 1876 | ccv_nnc_tensor_dot_recovery_t recovery = _ccv_nnc_graph_tensor_dot_recovery(graph); | |||
| 1877 | int i, j; | |||
| 1878 | int k = 0, c = 0; | |||
| 1879 | int* node_id = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum); | |||
| 1880 | const ccv_nnc_graph_static_schedule_t* const schedule = graph->default_schedule; | |||
| 1881 | // Output styles. | |||
| 1882 | for (i = 0; i < graph->exec_info->rnum; i++) | |||
| ||||
| 1883 | { | |||
| 1884 | node_id[i] = c; | |||
| 1885 | ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))); | |||
| 1886 | if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0]) | |||
| 1887 | _ccv_nnc_graph_dot_sub_graphs(exec_info, recovery, graph->sub_graphs, flags, 1, out, &k, &c); | |||
| 1888 | else { | |||
| 1889 | _ccv_nnc_graph_dot_node(exec_info, | |||
| 1890 | schedule ? (i < schedule->exec_info_size ? schedule->exec_info + i : 0) : 0, | |||
| 1891 | c, graph->streams, recovery, flags, 0, out, &k); | |||
| 1892 | ++c; | |||
| 1893 | } | |||
| 1894 | } | |||
| 1895 | // Output connections. | |||
| 1896 | for (i = 0; i < graph->exec_info->rnum; i++) | |||
| 1897 | { | |||
| 1898 | ccv_nnc_graph_exec_info_t* exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))); | |||
| 1899 | if (exec_info->outgoings) | |||
| 1900 | for (j = 0; j < exec_info->outgoings->rnum; j++) | |||
| 1901 | { | |||
| 1902 | const int outgoing_idx = *(int*)ccv_array_get(exec_info->outgoings, j)((void*)(((char*)((exec_info->outgoings)->data)) + (size_t )(exec_info->outgoings)->rsize * (size_t)(j))); | |||
| 1903 | const ccv_nnc_graph_exec_info_t* const outgoing_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, outgoing_idx)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(outgoing_idx))); | |||
| 1904 | // If both are sub-graphs, have both tail and head specified. | |||
| 1905 | if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref : (outgoing_info)->_inline_graph_ref)[0]) | |||
| 1906 | fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]); | |||
| 1907 | else if (CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0] && !CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref : (outgoing_info)->_inline_graph_ref)[0]) | |||
| 1908 | fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]); | |||
| 1909 | else if (!CCV_NNC_GRAPH_REF(exec_info)((exec_info)->_heap_graph_ref ? (exec_info)->_heap_graph_ref : (exec_info)->_inline_graph_ref)[0] && CCV_NNC_GRAPH_REF(outgoing_info)((outgoing_info)->_heap_graph_ref ? (outgoing_info)->_heap_graph_ref : (outgoing_info)->_inline_graph_ref)[0]) | |||
| 1910 | fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]); | |||
| 1911 | else | |||
| 1912 | fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]); | |||
| 1913 | } | |||
| 1914 | } | |||
| 1915 | fputs("}\n", out); | |||
| 1916 | _ccv_nnc_graph_tensor_dot_recovery_free(recovery); | |||
| 1917 | ccfreefree(node_id); | |||
| 1918 | } | |||
| 1919 | ||||
| 1920 | void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size) | |||
| 1921 | { | |||
| 1922 | ccv_nnc_drain_autotune_cache(); | |||
| 1923 | // exec current node, for synchronous CPU execution, no stream unit. | |||
| 1924 | int i; | |||
| 1925 | #define visitor(node, idx, ...) \ | |||
| 1926 | do { \ | |||
| 1927 | if (node->cmd.cmd == CCV_NNC_NOOP) \ | |||
| 1928 | continue; \ | |||
| 1929 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD) \ | |||
| 1930 | for (i = 0; i < node->graph_ref_size; i++) \ | |||
| 1931 | { \ | |||
| 1932 | ccv_nnc_graph_t* sub_graph = *(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[i] - 1)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node)->_inline_graph_ref) [i] - 1))); \ | |||
| 1933 | ccv_nnc_graph_autotune(sub_graph, max_workspace_size, flags, 0, 0, 0, 0); \ | |||
| 1934 | } \ | |||
| 1935 | else { \ | |||
| 1936 | /* Need to unwrap these tensors */ \ | |||
| 1937 | for (i = 0; i < node->input_size + node->output_size; i++) \ | |||
| 1938 | if (node->inputs[i] && CCV_IS_TENSOR_MULTIVIEW(node->inputs[i])((*(int*)(node->inputs[i])) & CCV_TENSOR_MULTIVIEW)) \ | |||
| 1939 | node->inputs[i] = _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)node->inputs[i]); \ | |||
| 1940 | PRINT(CCV_CLI_VERBOSE, "%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node->cmd.cmd), idx, node->input_size, node->output_size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("%s [%d]: [%d] -> [%d]\n", ccv_nnc_cmd_name(node ->cmd.cmd), idx, node->input_size, node->output_size ); fflush(stdout); } } while (0); \ | |||
| 1941 | for (i = 0; i < node->input_size; i++) \ | |||
| 1942 | { \ | |||
| 1943 | PRINT(CCV_CLI_VERBOSE, "|-> %d. %p (%p)", i + 1, node->inputs[i], (node->inputs[i] ? node->inputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("|-> %d. %p (%p)", i + 1, node->inputs[i], (node ->inputs[i] ? node->inputs[i]->data.u8 : 0)); fflush (stdout); } } while (0); \ | |||
| 1944 | if (node->inputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) \ | |||
| 1945 | ccv_nnc_print_tensor_shape(node->inputs[i]); \ | |||
| 1946 | PRINT(CCV_CLI_VERBOSE, "\n")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("\n"); fflush(stdout); } } while (0); \ | |||
| 1947 | } \ | |||
| 1948 | for (i = 0; i < node->output_size; i++) \ | |||
| 1949 | { \ | |||
| 1950 | PRINT(CCV_CLI_VERBOSE, "|<- %d. %p (%p)", i + 1, node->outputs[i], (node->outputs[i] ? node->outputs[i]->data.u8 : 0))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("|<- %d. %p (%p)", i + 1, node->outputs[i], ( node->outputs[i] ? node->outputs[i]->data.u8 : 0)); fflush (stdout); } } while (0); \ | |||
| 1951 | if (node->outputs[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) \ | |||
| 1952 | ccv_nnc_print_tensor_shape(node->outputs[i]); \ | |||
| 1953 | PRINT(CCV_CLI_VERBOSE, "\n")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("\n"); fflush(stdout); } } while (0); \ | |||
| 1954 | } \ | |||
| 1955 | node->cmd = ccv_nnc_cmd_autotune(node->cmd, max_workspace_size, node->hint, flags, node->inputs, node->input_size, node->outputs, node->output_size, 0); \ | |||
| 1956 | } \ | |||
| 1957 | } while (0) | |||
| 1958 | const ccv_nnc_graph_exec_t* const graph_sources = sources ? sources : (graph->sources ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->sources, 0)((void*)(((char*)((graph->sources)->data)) + (size_t)(graph ->sources)->rsize * (size_t)(0))): 0); | |||
| 1959 | const int graph_source_size = source_size ? source_size : (graph->sources ? graph->sources->rnum : 0); | |||
| 1960 | const ccv_nnc_graph_exec_t* const graph_destinations = destinations ? destinations : (graph->destinations ? (ccv_nnc_graph_exec_t*)ccv_array_get(graph->destinations, 0)((void*)(((char*)((graph->destinations)->data)) + (size_t )(graph->destinations)->rsize * (size_t)(0))) : 0); | |||
| 1961 | const int graph_destination_size = destination_size ? destination_size : (graph->destinations ? graph->destinations->rnum : 0); | |||
| 1962 | CCV_NNC_GRAPH_VISIT(graph, (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, 0), graph->exec_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_info->rnum); _i_ ++) _incoming_edges_ += (((ccv_nnc_graph_exec_info_t*)((void* )(((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_i_].outgoings ->rnum : 0; const int _heap_mem_ = ((graph->exec_info-> rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof (int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (graph->exec_info->rnum) + sizeof(int32_t) * ((graph->exec_info->rnum) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (graph ->exec_info->rnum)); int32_t* _exists_[2] = { (int32_t* )(_incomings_ + (graph->exec_info->rnum)), (int32_t*)(_incomings_ + (graph->exec_info->rnum)) + (graph->exec_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_info ->rnum); for (_i_ = 0; _i_ < (graph_source_size); _i_++ ) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph ) ; else __assert_fail ("(graph_sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); _incomings_[(graph_sources)[_i_].d].r = 1; _exists_[0] [_i_] = (graph_sources)[_i_].d; } int _exist_size_[2] = { (graph_source_size ), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0 ) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue ; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph_source_size); _i_++) { ((void) sizeof (((graph_sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources)[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph", "ccv_nnc_graph.c", 1962 , __extension__ __PRETTY_FUNCTION__); })); _incomings_[(graph_sources )[_i_].d].r = 3; _exists_[0][_i_] = (graph_sources)[_i_].d; } _exist_size_[0] = (graph_source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0 ) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 3) continue; _incomings_[_idx_].r = 4 ; if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph-> exec_info)->data)) + (size_t)(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < (( ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph->exec_info )->data)) + (size_t)(graph->exec_info)->rsize * (size_t )(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int *)((void*)(((char*)((((ccv_nnc_graph_exec_info_t*)((void*)((( char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->data )) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*)(((char*)( (graph->exec_info)->data)) + (size_t)(graph->exec_info )->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t )(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_ ; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue; _incomings_ [d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (graph-> exec_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)", "ccv_nnc_graph.c" , 1962, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_ ][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = ( _p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof (((graph_destinations )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_destinations )[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); _incomings_[(graph_destinations)[_i_].d].r = 5; _exists_ [0][_i_] = (graph_destinations)[_i_].d; } _exist_size_[0] = ( graph_destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_info->rnum)) ? 1 : 0 ), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info ->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph", "ccv_nnc_graph.c" , 1962, __extension__ __PRETTY_FUNCTION__); })); _incomings_[ (graph_destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < ( graph_source_size); _i_++) { ((void) sizeof (((graph_sources) [_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_sources )[_i_].graph == graph) ; else __assert_fail ("(graph_sources)[_i_].graph == graph" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); _exists_[0][_i_] = (graph_sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (graph_source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; visitor((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0)))) + _idx_), ( _idx_), (_incomings_[_idx_].d)); if (_incomings_[_idx_].d) { ++ _d_; _incomings_[_idx_].r = 7; } if (((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings ) { if (((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_info_t*)((void*)(((char*)((graph ->exec_info)->data)) + (size_t)(graph->exec_info)-> rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_info_t *)((void*)(((char*)((graph->exec_info)->data)) + (size_t )(graph->exec_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_info_t*)((void*) (((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (graph_destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (graph->exec_info ->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_info->rnum)" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_ )); } for (_i_ = 0; _i_ < (graph_destination_size); _i_++) { ((void) sizeof (((graph_destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((graph_destinations)[_i_].graph == graph) ; else __assert_fail ("(graph_destinations)[_i_].graph == graph" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); if (_incomings_[(graph_destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(graph_destinations )[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[ (graph_destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(graph_destinations)[_i_].d].c == 0" , "ccv_nnc_graph.c", 1962, __extension__ __PRETTY_FUNCTION__) ; })); } else if (_incomings_[(graph_destinations)[_i_].d].c > 0) continue; visitor((((ccv_nnc_graph_exec_info_t*)((void*)( ((char*)((graph->exec_info)->data)) + (size_t)(graph-> exec_info)->rsize * (size_t)(0)))) + (graph_destinations)[ _i_].d), ((graph_destinations)[_i_].d), (_incomings_[(graph_destinations )[_i_].d].d)); } if (_heap_mem_) free(_incomings_); } while ( 0);; | |||
| 1963 | #undef visitor | |||
| 1964 | ccv_nnc_drain_autotune_cache(); | |||
| 1965 | } | |||
| 1966 | ||||
| 1967 | void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph) | |||
| 1968 | { | |||
| 1969 | int i, j; | |||
| 1970 | for (i = 0; i < graph->exec_info->rnum; i++) | |||
| 1971 | { | |||
| 1972 | ccv_nnc_graph_exec_info_t *info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, i)((void*)(((char*)((graph->exec_info)->data)) + (size_t) (graph->exec_info)->rsize * (size_t)(i))); | |||
| 1973 | if (info->_heap_graph_ref) | |||
| 1974 | ccfreefree(info->_heap_graph_ref); | |||
| 1975 | ccv_array_t* outgoings = info->outgoings; | |||
| 1976 | if (outgoings) | |||
| 1977 | ccv_array_free(outgoings); | |||
| 1978 | // We allocate inputs & outputs in continuous fashion, therefore, only need to free the input array. | |||
| 1979 | if (info->inputs) | |||
| 1980 | ccfreefree(info->inputs); | |||
| 1981 | if (info->input_flags) | |||
| 1982 | ccfreefree(info->input_flags); | |||
| 1983 | if (info->updates) | |||
| 1984 | ccfreefree(info->updates); | |||
| 1985 | if ((info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) && info->p_while.inputs) | |||
| 1986 | ccfreefree(info->p_while.inputs); | |||
| 1987 | } | |||
| 1988 | if (graph->tensor_wraps) | |||
| 1989 | { | |||
| 1990 | for (i = 0; i < graph->tensor_wraps->rnum; i++) | |||
| 1991 | { | |||
| 1992 | ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, i)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t )(graph->tensor_wraps)->rsize * (size_t)(i))); | |||
| 1993 | if (tensor_wrap_array) | |||
| 1994 | { | |||
| 1995 | for (j = 0; j < tensor_wrap_array->size; j++) | |||
| 1996 | _ccv_nnc_graph_tensor_wrap_free(tensor_wrap_array->tensor_wraps[j]); | |||
| 1997 | ccfreefree(tensor_wrap_array); | |||
| 1998 | } | |||
| 1999 | } | |||
| 2000 | ccv_array_free(graph->tensor_wraps); | |||
| 2001 | } | |||
| 2002 | if (graph->tensor_wraps_refs) | |||
| 2003 | ccv_array_free(graph->tensor_wraps_refs); | |||
| 2004 | if (graph->breakpoints) | |||
| 2005 | ccfreefree(graph->breakpoints); | |||
| 2006 | if (graph->sources) | |||
| 2007 | ccv_array_free(graph->sources); | |||
| 2008 | if (graph->destinations) | |||
| 2009 | ccv_array_free(graph->destinations); | |||
| 2010 | if (graph->default_schedule) | |||
| 2011 | ccv_nnc_graph_static_schedule_free(graph->default_schedule); | |||
| 2012 | if (graph->streams) | |||
| 2013 | { | |||
| 2014 | // If the graph has parent graph, the default stream is allocated by the parent graph, we need to skip. | |||
| 2015 | if (!graph->p) | |||
| 2016 | ccv_nnc_stream_context_free(graph->streams[0]); | |||
| 2017 | for (i = 1; i < graph->stream_size; i++) | |||
| 2018 | ccv_nnc_stream_context_free(graph->streams[i]); | |||
| 2019 | ccfreefree(graph->streams); | |||
| 2020 | } | |||
| 2021 | if (graph->block_stream_tasks) | |||
| 2022 | ccfreefree(graph->block_stream_tasks); | |||
| 2023 | if (graph->signals) | |||
| 2024 | { | |||
| 2025 | for (i = 0; i < graph->signal_size; i++) | |||
| 2026 | ccv_nnc_stream_signal_free(graph->signals[i]); | |||
| 2027 | ccfreefree(graph->signals); | |||
| 2028 | } | |||
| 2029 | if (graph->carry_overs) | |||
| 2030 | { | |||
| 2031 | for (i = 0; i < graph->carry_overs->rnum; i++) | |||
| 2032 | { | |||
| 2033 | ccv_nnc_graph_tensor_carry_over_t* const carry_over = (ccv_nnc_graph_tensor_carry_over_t*)ccv_array_get(graph->carry_overs, i)((void*)(((char*)((graph->carry_overs)->data)) + (size_t )(graph->carry_overs)->rsize * (size_t)(i))); | |||
| 2034 | _ccv_nnc_graph_tensor_wrap_free(carry_over->from); | |||
| 2035 | _ccv_nnc_graph_tensor_wrap_free(carry_over->to); | |||
| 2036 | } | |||
| 2037 | ccv_array_free(graph->carry_overs); | |||
| 2038 | } | |||
| 2039 | if (graph->sub_graphs) | |||
| 2040 | { | |||
| 2041 | for (i = 0; i < graph->sub_graphs->rnum; i++) | |||
| 2042 | ccv_nnc_graph_free(*(ccv_nnc_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(i)))); | |||
| 2043 | ccv_array_free(graph->sub_graphs); | |||
| 2044 | } | |||
| 2045 | ccv_array_free(graph->exec_info); | |||
| 2046 | if (graph->buffer) | |||
| 2047 | ccfreefree(graph->buffer); | |||
| 2048 | ccfreefree(graph); | |||
| 2049 | } |