| File: | nnc/ccv_nnc_dynamic_graph.c |
| Warning: | line 651, column 9 Branch condition evaluates to a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" | |||
| 2 | #include "ccv_nnc_easy.h" | |||
| 3 | #include "ccv_nnc_internal.h" | |||
| 4 | #include "ccv_nnc_easy.h" | |||
| 5 | #include "ccv_internal.h" | |||
| 6 | #include "_ccv_nnc_dynamic_graph.h" | |||
| 7 | #ifdef HAVE_MPS | |||
| 8 | #include "mps/ccv_nnc_mps.h" | |||
| 9 | #endif | |||
| 10 | ||||
| 11 | // MARK - Level-4 API | |||
| 12 | ||||
| 13 | ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void) | |||
| 14 | { | |||
| 15 | ccv_nnc_dynamic_graph_t* graph = ccmallocmalloc(sizeof(ccv_nnc_dynamic_graph_t)); | |||
| 16 | graph->no_grad = 0; | |||
| 17 | graph->reuse_var = -1; | |||
| 18 | graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0); | |||
| 19 | graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0); | |||
| 20 | graph->tape = ccv_nnc_symbolic_graph_new(); | |||
| 21 | graph->xpu_alloc.mp_hdr = -1; | |||
| 22 | graph->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str(); | |||
| 23 | graph->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc(); | |||
| 24 | // These may not be used as frequent, init as needed. | |||
| 25 | graph->stateful_execs = 0; | |||
| 26 | graph->reuse_stateful_exec = -1; | |||
| 27 | graph->stream_map = 0; | |||
| 28 | graph->ws = 0; | |||
| 29 | return graph; | |||
| 30 | } | |||
| 31 | ||||
| 32 | static void _ccv_nnc_tensor_variable_wait_fast_fence(ccv_nnc_tensor_view_t* const tensor_view) | |||
| 33 | { | |||
| 34 | #ifdef HAVE_MPS | |||
| 35 | ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(tensor_view) & ~(uintptr_t )1)); | |||
| 36 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) | |||
| 37 | ccv_nnc_mps_tensor_fast_fence_wait(tensor); | |||
| 38 | #endif | |||
| 39 | } | |||
| 40 | ||||
| 41 | static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing) | |||
| 42 | { | |||
| 43 | const int index = tensor_variable->index; | |||
| 44 | if (tensor_variable->tensor_view) | |||
| 45 | { | |||
| 46 | if (tensor_variable->destructor_hook.func) | |||
| 47 | tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context); | |||
| 48 | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view)((uintptr_t)(tensor_variable->tensor_view) & 1)) | |||
| 49 | { | |||
| 50 | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) | |||
| 51 | ccv_nnc_tensor_view_free(tensor_variable->tensor_view); | |||
| 52 | else { | |||
| 53 | if (!tensor_variable->alias_index_ref && // Return this memory to the graph. | |||
| 54 | CCV_TENSOR_GET_MEMORY(tensor_variable->tensor_view->info.type)((tensor_variable->tensor_view->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && tensor_variable->tensor_view->data.u8) | |||
| 55 | ccv_nnc_xpu_free(&graph->xpu_alloc, tensor_variable->tensor_view->data.u8); | |||
| 56 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); | |||
| 57 | } | |||
| 58 | } | |||
| 59 | } | |||
| 60 | ccfreefree(tensor_variable); | |||
| 61 | if (zeroing) | |||
| 62 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(index))) = 0; | |||
| 63 | int i; | |||
| 64 | for (i = graph->vars->rnum - 1; i >= 0; i--) | |||
| 65 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) != 0) | |||
| 66 | { | |||
| 67 | graph->vars->rnum = i + 1; | |||
| 68 | break; | |||
| 69 | } | |||
| 70 | if (index < graph->vars->rnum && | |||
| 71 | (index < graph->reuse_var || graph->reuse_var < 0)) | |||
| 72 | graph->reuse_var = index; | |||
| 73 | else if (graph->reuse_var >= graph->vars->rnum) | |||
| 74 | graph->reuse_var = -1; | |||
| 75 | } | |||
| 76 | ||||
| 77 | static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing) | |||
| 78 | { | |||
| 79 | bind->index = CCV_NNC_TENSOR_NO_VARIABLE; | |||
| 80 | if (bind->sources) | |||
| 81 | ccv_array_free(bind->sources); | |||
| 82 | if (bind->destinations) | |||
| 83 | ccv_array_free(bind->destinations); | |||
| 84 | if (bind->tensor_view) | |||
| 85 | { | |||
| 86 | if (bind->destructor_hook.func) | |||
| 87 | bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context); | |||
| 88 | if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view)((uintptr_t)(bind->tensor_view) & 1)) | |||
| 89 | { | |||
| 90 | if (CCV_IS_TENSOR_VIEW(bind->tensor_view)((*(int*)(bind->tensor_view)) & CCV_TENSOR_VIEW)) | |||
| 91 | ccv_nnc_tensor_view_free(bind->tensor_view); | |||
| 92 | else { | |||
| 93 | if (!bind->alias_ref && // Return this memory to the graph. | |||
| 94 | CCV_TENSOR_GET_MEMORY(bind->tensor_view->info.type)((bind->tensor_view->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && bind->tensor_view->data.u8) | |||
| 95 | ccv_nnc_xpu_free(&graph->xpu_alloc, bind->tensor_view->data.u8); | |||
| 96 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view); | |||
| 97 | } | |||
| 98 | } | |||
| 99 | } | |||
| 100 | if (zeroing) | |||
| 101 | { | |||
| 102 | bind->sources = 0; | |||
| 103 | bind->destinations = 0; | |||
| 104 | bind->tensor_view = 0; | |||
| 105 | bind->destructor_hook.func = 0; | |||
| 106 | bind->destructor_hook.context = 0; | |||
| 107 | } | |||
| 108 | } | |||
| 109 | ||||
| 110 | void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph) | |||
| 111 | { | |||
| 112 | int i; | |||
| 113 | for (i = 0; i < graph->vars->rnum; i++) | |||
| 114 | { | |||
| 115 | ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))); | |||
| 116 | if (tensor_variable) | |||
| 117 | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0); | |||
| 118 | } | |||
| 119 | ccv_array_free(graph->vars); | |||
| 120 | for (i = 0; i < graph->binds->rnum; i++) | |||
| 121 | _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(i))), 0); | |||
| 122 | ccv_array_free(graph->binds); | |||
| 123 | ccv_nnc_symbolic_graph_free(graph->tape); | |||
| 124 | if (graph->ws) | |||
| 125 | ccv_array_free(graph->ws); | |||
| 126 | if (graph->stateful_execs) | |||
| 127 | { | |||
| 128 | for (i = 0; i < graph->stateful_execs->rnum; i++) | |||
| 129 | { | |||
| 130 | ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i)((void*)(((char*)((graph->stateful_execs)->data)) + (size_t )(graph->stateful_execs)->rsize * (size_t)(i))); | |||
| 131 | if (stateful_exec) | |||
| 132 | ccfreefree(stateful_exec); | |||
| 133 | } | |||
| 134 | ccv_array_free(graph->stateful_execs); | |||
| 135 | } | |||
| 136 | if (graph->stream_map) | |||
| 137 | { | |||
| 138 | khiter_t k; | |||
| 139 | for (k = kh_begin(graph->stream_map)(khint_t)(0); k != kh_end(graph->stream_map)((graph->stream_map)->n_buckets); ++k) | |||
| 140 | { | |||
| 141 | if (!kh_exist(graph->stream_map, k)(!(((graph->stream_map)->flags[(k)>>4]>>((( k)&0xfU)<<1))&3))) | |||
| 142 | continue; | |||
| 143 | ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]); | |||
| 144 | ccv_nnc_stream_context_free(stream); | |||
| 145 | } | |||
| 146 | kh_destroy(stream_map, graph->stream_map)kh_destroy_stream_map(graph->stream_map); | |||
| 147 | } | |||
| 148 | ccv_nnc_xpu_alloc_destroy(&graph->xpu_alloc); | |||
| 149 | ccfreefree(graph); | |||
| 150 | } | |||
| 151 | ||||
| 152 | void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor) | |||
| 153 | { | |||
| 154 | assert(!tensor_variable->alias_index_ref)((void) sizeof ((!tensor_variable->alias_index_ref) ? 1 : 0 ), __extension__ ({ if (!tensor_variable->alias_index_ref) ; else __assert_fail ("!tensor_variable->alias_index_ref" , "ccv_nnc_dynamic_graph.c", 154, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 155 | if (tensor_variable->tensor_view && !CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view)((uintptr_t)(tensor_variable->tensor_view) & 1)) | |||
| 156 | { | |||
| 157 | assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))((void) sizeof ((!((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!((*( int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)" , "ccv_nnc_dynamic_graph.c", 157, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 158 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view); | |||
| 159 | } | |||
| 160 | tensor_variable->info = tensor->info; | |||
| 161 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1); | |||
| 162 | } | |||
| 163 | ||||
| 164 | void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context) | |||
| 165 | { | |||
| 166 | tensor_variable->destructor_hook.func = func; | |||
| 167 | tensor_variable->destructor_hook.context = context; | |||
| 168 | } | |||
| 169 | ||||
| 170 | inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info) | |||
| 171 | { | |||
| 172 | tensor_variable->alias_index_ref = 0; | |||
| 173 | tensor_variable->alias_off = 0; | |||
| 174 | tensor_variable->destructor_hook.func = 0; | |||
| 175 | tensor_variable->destructor_hook.context = 0; | |||
| 176 | tensor_variable->info = info; | |||
| 177 | tensor_variable->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 178 | tensor_variable->tensor_view = 0; | |||
| 179 | if (graph->reuse_var >= 0) | |||
| 180 | { | |||
| 181 | const int reuse_var = graph->reuse_var; | |||
| 182 | assert(reuse_var < graph->vars->rnum)((void) sizeof ((reuse_var < graph->vars->rnum) ? 1 : 0), __extension__ ({ if (reuse_var < graph->vars->rnum ) ; else __assert_fail ("reuse_var < graph->vars->rnum" , "ccv_nnc_dynamic_graph.c", 182, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 183 | tensor_variable->index = reuse_var; | |||
| 184 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(reuse_var))) = tensor_variable; | |||
| 185 | int i; | |||
| 186 | graph->reuse_var = -1; | |||
| 187 | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++) | |||
| 188 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) == 0) | |||
| 189 | graph->reuse_var = i; | |||
| 190 | } else { | |||
| 191 | tensor_variable->index = graph->vars->rnum; | |||
| 192 | ccv_array_push(graph->vars, &tensor_variable); | |||
| 193 | } | |||
| 194 | } | |||
| 195 | ||||
| 196 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) | |||
| 197 | { | |||
| 198 | ccv_nnc_tensor_variable_t tensor_variable = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | |||
| 199 | tensor_variable->type = CCV_NNC_TENSOR_VARIABLE; | |||
| 200 | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); | |||
| 201 | return tensor_variable; | |||
| 202 | } | |||
| 203 | ||||
| 204 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info) | |||
| 205 | { | |||
| 206 | ccv_nnc_tensor_variable_t tensor_variable = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | |||
| 207 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | |||
| 208 | _ccv_nnc_tensor_variable_init(graph, tensor_variable, info); | |||
| 209 | return tensor_variable; | |||
| 210 | } | |||
| 211 | ||||
| 212 | int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
| 213 | { | |||
| 214 | return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT; | |||
| 215 | } | |||
| 216 | ||||
| 217 | ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
| 218 | { | |||
| 219 | return tensor_variable->info; | |||
| 220 | } | |||
| 221 | ||||
| 222 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info) | |||
| 223 | { | |||
| 224 | ccv_nnc_tensor_variable_t variable_alias = ccmallocmalloc(sizeof(struct ccv_nnc_tensor_variable_s)); | |||
| 225 | variable_alias->type = tensor_variable->type; | |||
| 226 | // If the tensor variable is an alias itself, we point directly to its original. | |||
| 227 | if (tensor_variable->alias_index_ref) | |||
| 228 | { | |||
| 229 | variable_alias->alias_index_ref = tensor_variable->alias_index_ref; | |||
| 230 | // The tensor variable need to be fully specified if I am doing alias an alias. | |||
| 231 | assert(!ccv_nnc_is_tensor_auto(tensor_variable->info))((void) sizeof ((!ccv_nnc_is_tensor_auto(tensor_variable-> info)) ? 1 : 0), __extension__ ({ if (!ccv_nnc_is_tensor_auto (tensor_variable->info)) ; else __assert_fail ("!ccv_nnc_is_tensor_auto(tensor_variable->info)" , "ccv_nnc_dynamic_graph.c", 231, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 232 | int i; | |||
| 233 | int no_stride = 1; | |||
| 234 | for (i = 0; no_stride && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
| 235 | no_stride = (tensor_variable->stride[i] == 0); | |||
| 236 | int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 237 | int* to_stride; | |||
| 238 | if (no_stride) | |||
| 239 | { | |||
| 240 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, stride_from_dim); | |||
| 241 | to_stride = stride_from_dim; | |||
| 242 | } else | |||
| 243 | to_stride = tensor_variable->stride; | |||
| 244 | // If we provide stride, or reshape to a different size, assert the tensor variable itself is contiguous (otherwise we cannot satisfy the reshape requirements). | |||
| 245 | const int different_dim = ccv_nnc_tensor_nd(info.dim) != ccv_nnc_tensor_nd(tensor_variable->info.dim); | |||
| 246 | if (different_dim || (stride[0] != 0 && memcmp(stride, to_stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0)) | |||
| 247 | { assert(ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, to_stride))((void) sizeof ((ccv_nnc_tensor_view_is_contiguous(tensor_variable ->info.dim, to_stride)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_is_contiguous (tensor_variable->info.dim, to_stride)) ; else __assert_fail ("ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, to_stride)" , "ccv_nnc_dynamic_graph.c", 247, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 248 | // Need to compute alias off, that is the alias off of the tensor variable plus its ofs. | |||
| 249 | const off_t off = ccv_nnc_tensor_view_offset(tensor_variable->info.datatype, to_stride, tensor_variable->ofs); | |||
| 250 | variable_alias->alias_off = tensor_variable->alias_off + off; | |||
| 251 | // If we don't provide stride, copy the stride from previous variable. | |||
| 252 | if (stride[0] == 0) | |||
| 253 | { | |||
| 254 | if (different_dim) | |||
| 255 | ccv_nnc_tensor_get_stride(info.dim, variable_alias->stride); | |||
| 256 | else | |||
| 257 | memcpy(variable_alias->stride, to_stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
| 258 | } else | |||
| 259 | memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
| 260 | } else { | |||
| 261 | variable_alias->alias_index_ref = tensor_variable->index + 1; | |||
| 262 | variable_alias->alias_off = 0; | |||
| 263 | memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
| 264 | } | |||
| 265 | variable_alias->info = info; | |||
| 266 | variable_alias->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 267 | variable_alias->destructor_hook.func = 0; | |||
| 268 | variable_alias->destructor_hook.context = 0; | |||
| 269 | variable_alias->tensor_view = 0; | |||
| 270 | memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
| 271 | if (graph->reuse_var >= 0) | |||
| 272 | { | |||
| 273 | const int reuse_var = graph->reuse_var; | |||
| 274 | assert(reuse_var < graph->vars->rnum)((void) sizeof ((reuse_var < graph->vars->rnum) ? 1 : 0), __extension__ ({ if (reuse_var < graph->vars->rnum ) ; else __assert_fail ("reuse_var < graph->vars->rnum" , "ccv_nnc_dynamic_graph.c", 274, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 275 | variable_alias->index = reuse_var; | |||
| 276 | *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(reuse_var))) = variable_alias; | |||
| 277 | int i; | |||
| 278 | graph->reuse_var = -1; | |||
| 279 | for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++) | |||
| 280 | if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(i))) == 0) | |||
| 281 | graph->reuse_var = i; | |||
| 282 | } else { | |||
| 283 | variable_alias->index = graph->vars->rnum; | |||
| 284 | ccv_array_push(graph->vars, &variable_alias); | |||
| 285 | } | |||
| 286 | return variable_alias; | |||
| 287 | } | |||
| 288 | ||||
| 289 | int ccv_nnc_tensor_variable_alias_params(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) | |||
| 290 | { | |||
| 291 | if (!tensor_variable->alias_index_ref) | |||
| 292 | return -1; | |||
| 293 | if (ofs) | |||
| 294 | memcpy(ofs, tensor_variable->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
| 295 | if (stride) | |||
| 296 | memcpy(stride, tensor_variable->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
| 297 | return 0; | |||
| 298 | } | |||
| 299 | ||||
| 300 | ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context) | |||
| 301 | { | |||
| 302 | if (tensor_variable->tensor_view) | |||
| 303 | { | |||
| 304 | if (tensor_variable->alias_index_ref) | |||
| 305 | { | |||
| 306 | const int alias_index = tensor_variable->alias_index_ref - 1; | |||
| 307 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 307, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 308 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
| 309 | if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view)((*(int*)(tensor_variable->tensor_view)) & CCV_TENSOR_VIEW )) | |||
| 310 | { | |||
| 311 | ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view; | |||
| 312 | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. | |||
| 313 | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv))((void) sizeof ((!((uintptr_t)(tv) & 1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t)(tv) & 1)) ; else __assert_fail ("!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)" , "ccv_nnc_dynamic_graph.c", 313, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 314 | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. | |||
| 315 | ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8, tv->off + tensor_variable->alias_off, &tv->data, &tv->dataof); | |||
| 316 | } else { | |||
| 317 | ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | |||
| 318 | // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid. | |||
| 319 | assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv))((void) sizeof ((!((uintptr_t)(tv) & 1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t)(tv) & 1)) ; else __assert_fail ("!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv)" , "ccv_nnc_dynamic_graph.c", 319, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 320 | // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed. | |||
| 321 | ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8, tensor_variable->alias_off, &tv->data, &tv->dataof); | |||
| 322 | } | |||
| 323 | } | |||
| 324 | return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(tensor_variable->tensor_view ) & ~(uintptr_t)1)); | |||
| 325 | } | |||
| 326 | if (!tensor_variable->alias_index_ref) | |||
| 327 | { | |||
| 328 | // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0. | |||
| 329 | if (ccv_nnc_is_tensor_auto(tensor_variable->info)) | |||
| 330 | return 0; | |||
| 331 | void* ptr = 0; | |||
| 332 | const size_t data_size = ccv_nnc_tensor_data_size(tensor_variable->info); | |||
| 333 | if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type)((tensor_variable->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && data_size > 0) | |||
| 334 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type)(((tensor_variable->info.type) & 0xfff00) >> 8), stream_context, data_size); | |||
| 335 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0); | |||
| 336 | if (tensor_variable->info.dim[0] > 0) | |||
| 337 | { assert(tensor_variable->tensor_view->data.u8)((void) sizeof ((tensor_variable->tensor_view->data.u8) ? 1 : 0), __extension__ ({ if (tensor_variable->tensor_view ->data.u8) ; else __assert_fail ("tensor_variable->tensor_view->data.u8" , "ccv_nnc_dynamic_graph.c", 337, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 338 | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | |||
| 339 | } | |||
| 340 | const int alias_index = tensor_variable->alias_index_ref - 1; | |||
| 341 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 341, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 342 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
| 343 | assert(!variable_to->alias_index_ref)((void) sizeof ((!variable_to->alias_index_ref) ? 1 : 0), __extension__ ({ if (!variable_to->alias_index_ref) ; else __assert_fail ("!variable_to->alias_index_ref", "ccv_nnc_dynamic_graph.c" , 343, __extension__ __PRETTY_FUNCTION__); })); | |||
| 344 | if (!variable_to->tensor_view) | |||
| 345 | { | |||
| 346 | // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0. | |||
| 347 | if (ccv_nnc_is_tensor_auto(variable_to->info)) | |||
| 348 | return 0; | |||
| 349 | void* ptr = 0; | |||
| 350 | assert(variable_to->info.type == tensor_variable->info.type)((void) sizeof ((variable_to->info.type == tensor_variable ->info.type) ? 1 : 0), __extension__ ({ if (variable_to-> info.type == tensor_variable->info.type) ; else __assert_fail ("variable_to->info.type == tensor_variable->info.type" , "ccv_nnc_dynamic_graph.c", 350, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 351 | const size_t data_size = ccv_nnc_tensor_data_size(variable_to->info); | |||
| 352 | if (CCV_TENSOR_GET_MEMORY(variable_to->info.type)((variable_to->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && data_size > 0) | |||
| 353 | ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type)(((variable_to->info.type) & 0xfff00) >> 8), stream_context, data_size); | |||
| 354 | variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0); | |||
| 355 | assert(variable_to->tensor_view->data.u8)((void) sizeof ((variable_to->tensor_view->data.u8) ? 1 : 0), __extension__ ({ if (variable_to->tensor_view->data .u8) ; else __assert_fail ("variable_to->tensor_view->data.u8" , "ccv_nnc_dynamic_graph.c", 355, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 356 | } | |||
| 357 | int i; | |||
| 358 | int no_ofs = 1; | |||
| 359 | for (i = 0; no_ofs && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
| 360 | no_ofs = (tensor_variable->ofs[i] == 0); | |||
| 361 | int no_stride = 1; | |||
| 362 | for (i = 0; no_stride && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
| 363 | no_stride = (tensor_variable->stride[i] == 0); | |||
| 364 | int stride_is_packed = no_stride; | |||
| 365 | if (!no_stride) // We have stride, now if it is packed. | |||
| 366 | stride_is_packed = ccv_nnc_is_tensor_stride_packed(tensor_variable->stride, tensor_variable->info.dim); | |||
| 367 | assert(CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info))((void) sizeof ((_ccv_get_data_type_size[((tensor_variable-> info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count (tensor_variable->info) + tensor_variable->alias_off <= _ccv_get_data_type_size[((variable_to->info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count(variable_to-> info)) ? 1 : 0), __extension__ ({ if (_ccv_get_data_type_size [((tensor_variable->info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable ->alias_off <= _ccv_get_data_type_size[((variable_to-> info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count (variable_to->info)) ; else __assert_fail ("CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info)" , "ccv_nnc_dynamic_graph.c", 367, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 368 | // Allowing vector type to be normal tensor, rather than a tensor view. We cannot have any offset though. | |||
| 369 | if (no_ofs && !stride_is_packed) | |||
| 370 | stride_is_packed = ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, tensor_variable->stride); | |||
| 371 | if (no_ofs && stride_is_packed) | |||
| 372 | tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1))->data.u8, tensor_variable->info, 0); | |||
| 373 | else { | |||
| 374 | if (no_stride) | |||
| 375 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride); | |||
| 376 | tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(variable_to->tensor_view ) & ~(uintptr_t)1)), tensor_variable->info, tensor_variable->ofs, tensor_variable->stride); | |||
| 377 | } | |||
| 378 | if (tensor_variable->alias_off) | |||
| 379 | ccv_nnc_tensor_data_add(tensor_variable->tensor_view->info, tensor_variable->alias_off, &tensor_variable->tensor_view->data, &tensor_variable->tensor_view->dataof); | |||
| 380 | return (ccv_nnc_tensor_t*)tensor_variable->tensor_view; | |||
| 381 | } | |||
| 382 | ||||
| 383 | void ccv_nnc_tensor_variable_wait(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
| 384 | { | |||
| 385 | if (!tensor_variable || !tensor_variable->tensor_view) | |||
| 386 | return; | |||
| 387 | _ccv_nnc_tensor_variable_wait_fast_fence(tensor_variable->tensor_view); | |||
| 388 | } | |||
| 389 | ||||
| 390 | static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol) | |||
| 391 | { | |||
| 392 | if (symbol.d >= graph->binds->rnum) | |||
| 393 | { | |||
| 394 | const int rnum = graph->binds->rnum; | |||
| 395 | ccv_array_resize(graph->binds, symbol.d + 1); | |||
| 396 | int i; | |||
| 397 | for (i = rnum; i < graph->binds->rnum; i++) | |||
| 398 | ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(i))))->index = CCV_NNC_TENSOR_NO_VARIABLE; | |||
| 399 | } | |||
| 400 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(symbol.d))); | |||
| 401 | bind->type = tensor_variable->type; | |||
| 402 | bind->index = tensor_variable->index; | |||
| 403 | if (tensor_variable->alias_index_ref) | |||
| 404 | { | |||
| 405 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 406 | .d = symbol.d, | |||
| 407 | .graph = graph->tape | |||
| 408 | }); | |||
| 409 | assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum)((void) sizeof ((alias_to.d >= 0 && alias_to.d < graph->binds->rnum) ? 1 : 0), __extension__ ({ if (alias_to .d >= 0 && alias_to.d < graph->binds->rnum ) ; else __assert_fail ("alias_to.d >= 0 && alias_to.d < graph->binds->rnum" , "ccv_nnc_dynamic_graph.c", 409, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 410 | bind->alias_ref = alias_to.d + 1; | |||
| 411 | } else | |||
| 412 | bind->alias_ref = 0; | |||
| 413 | if (bind->sources) | |||
| 414 | ccv_array_free(bind->sources); | |||
| 415 | bind->sources = 0; | |||
| 416 | if (bind->destinations) | |||
| 417 | ccv_array_free(bind->destinations); | |||
| 418 | bind->destinations = 0; | |||
| 419 | bind->destructor_hook.func = 0; | |||
| 420 | bind->destructor_hook.context = 0; | |||
| 421 | bind->tensor_view = 0; | |||
| 422 | } | |||
| 423 | ||||
| 424 | static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
| 425 | { | |||
| 426 | if (tensor_variable->symbol.d >= 0) | |||
| 427 | return tensor_variable->symbol; | |||
| 428 | if (!tensor_variable->alias_index_ref) | |||
| 429 | { | |||
| 430 | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0); | |||
| 431 | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); | |||
| 432 | return symbol; | |||
| 433 | } | |||
| 434 | const int alias_index = tensor_variable->alias_index_ref - 1; | |||
| 435 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 435, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 436 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
| 437 | assert(!variable_to->alias_index_ref)((void) sizeof ((!variable_to->alias_index_ref) ? 1 : 0), __extension__ ({ if (!variable_to->alias_index_ref) ; else __assert_fail ("!variable_to->alias_index_ref", "ccv_nnc_dynamic_graph.c" , 437, __extension__ __PRETTY_FUNCTION__); })); | |||
| 438 | int no_stride = 1; | |||
| 439 | int i; | |||
| 440 | for (i = 0; no_stride && i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
| 441 | no_stride = (tensor_variable->stride[i] == 0); | |||
| 442 | if (no_stride) | |||
| 443 | ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride); | |||
| 444 | const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, tensor_variable->stride, tensor_variable->info, 0); | |||
| 445 | _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol); | |||
| 446 | return symbol; | |||
| 447 | } | |||
| 448 | ||||
| 449 | // Return the tensor variable that is old (the provided tensor variable will have a new setting). | |||
| 450 | ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable) | |||
| 451 | { | |||
| 452 | struct ccv_nnc_tensor_variable_s x = *tensor_variable; | |||
| 453 | ccv_nnc_tensor_variable_t new_variable; | |||
| 454 | // Need to handle alias. | |||
| 455 | if (x.alias_index_ref) | |||
| 456 | new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(x.alias_index_ref - 1))), x.ofs, x.stride, x.info); | |||
| 457 | else | |||
| 458 | new_variable = ccv_nnc_tensor_variable_new(graph, x.info)ccv_nnc_tensor_variable_new_impl(graph, x.info); | |||
| 459 | *tensor_variable = *new_variable; | |||
| 460 | *new_variable = x; | |||
| 461 | // The index should be the same though. | |||
| 462 | const int index = new_variable->index; | |||
| 463 | new_variable->index = tensor_variable->index; | |||
| 464 | if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 465 | { | |||
| 466 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(new_variable->symbol.d))); | |||
| 467 | bind->index = new_variable->index; | |||
| 468 | } | |||
| 469 | tensor_variable->index = index; | |||
| 470 | return new_variable; | |||
| 471 | } | |||
| 472 | ||||
| 473 | void ccv_nnc_dynamic_graph_set_max_concurrency(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int max_stream_count) | |||
| 474 | { | |||
| 475 | dynamic_graph->max_stream_count = max_stream_count; | |||
| 476 | } | |||
| 477 | ||||
| 478 | int ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad) | |||
| 479 | { | |||
| 480 | if (dynamic_graph->no_grad == no_grad) | |||
| 481 | return -1; | |||
| 482 | dynamic_graph->no_grad = no_grad; | |||
| 483 | return 0; | |||
| 484 | } | |||
| 485 | ||||
| 486 | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type) | |||
| 487 | { | |||
| 488 | if (!graph->stream_map) | |||
| 489 | graph->stream_map = kh_init(stream_map)kh_init_stream_map(); | |||
| 490 | int ret = 0; | |||
| 491 | khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret)kh_put_stream_map(graph->stream_map, type, &ret); | |||
| 492 | assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if ( ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_nnc_dynamic_graph.c" , 492, __extension__ __PRETTY_FUNCTION__); })); | |||
| 493 | ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]); | |||
| 494 | // If ret == 0, the key already exist, we can return directly, otherwise, create and return. | |||
| 495 | if (ret != 0) | |||
| 496 | { | |||
| 497 | stream = ccv_nnc_stream_context_new(type); | |||
| 498 | kh_val(graph->stream_map, k)((graph->stream_map)->vals[k]) = stream; | |||
| 499 | } | |||
| 500 | return stream; | |||
| 501 | } | |||
| 502 | ||||
| 503 | typedef struct { | |||
| 504 | ccv_nnc_dynamic_graph_t* graph; | |||
| 505 | int stream_type; | |||
| 506 | } ccv_nnc_dynamic_graph_neighbor_context_discovery_t; | |||
| 507 | ||||
| 508 | static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context) | |||
| 509 | { | |||
| 510 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context; | |||
| 511 | int type = discovery->stream_type; | |||
| 512 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
| 513 | return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type); | |||
| 514 | } | |||
| 515 | ||||
| 516 | static int _ccv_nnc_dynamic_graph_mark_gpu_to_cpu_transfer(const ccv_nnc_cmd_t cmd, ccv_nnc_tensor_variable_t* const output_variables, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_tensor_t** const marked_tensors) | |||
| 517 | { | |||
| 518 | #ifdef HAVE_MPS | |||
| 519 | if (!stream_context || CCV_STREAM_GET_CONTEXT(stream_context->type)((stream_context->type) & 0x3) != CCV_STREAM_CONTEXT_GPU) | |||
| 520 | return 0; | |||
| 521 | if (cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD && cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD) | |||
| 522 | return 0; | |||
| 523 | int marked_size = 0; | |||
| 524 | int i; | |||
| 525 | for (i = 0; i < ccv_min(input_size, output_size)({ typeof (input_size) _a = (input_size); typeof (output_size ) _b = (output_size); (_a < _b) ? _a : _b; }); i++) | |||
| 526 | { | |||
| 527 | ccv_nnc_tensor_variable_t const output_variable = output_variables[i]; | |||
| 528 | ccv_nnc_tensor_view_t* const output_tensor_view = output_variable ? CCV_NNC_TENSOR_VIEW(output_variable->tensor_view)((ccv_nnc_tensor_view_t*)((uintptr_t)(output_variable->tensor_view ) & ~(uintptr_t)1)) : 0; | |||
| 529 | if (!inputs[i] || !outputs[i] || !output_variable || output_variable->alias_index_ref || | |||
| 530 | !output_tensor_view || CCV_IS_TENSOR_VIEW(output_tensor_view)((*(int*)(output_tensor_view)) & CCV_TENSOR_VIEW)) | |||
| 531 | continue; | |||
| 532 | if (CCV_TENSOR_GET_MEMORY(inputs[i]->info.type)((inputs[i]->info.type) & 0x3) != CCV_TENSOR_GPU_MEMORY || | |||
| 533 | CCV_TENSOR_GET_MEMORY(outputs[i]->info.type)((outputs[i]->info.type) & 0x3) != CCV_TENSOR_CPU_MEMORY) | |||
| 534 | continue; | |||
| 535 | if (ccv_nnc_tensor_data_size_without_padding(outputs[i]->info) == 0) | |||
| 536 | continue; | |||
| 537 | if (ccv_nnc_mps_tensor_fast_fence_mark_pending(outputs[i])) | |||
| 538 | marked_tensors[marked_size++] = outputs[i]; | |||
| 539 | } | |||
| 540 | return marked_size; | |||
| 541 | #else | |||
| 542 | return 0; | |||
| 543 | #endif | |||
| 544 | } | |||
| 545 | ||||
| 546 | static void _ccv_nnc_dynamic_graph_clear_fast_fence_marks(ccv_nnc_tensor_t* const* const marked_tensors, const int marked_size) | |||
| 547 | { | |||
| 548 | #ifdef HAVE_MPS | |||
| 549 | int i; | |||
| 550 | for (i = 0; i < marked_size; i++) | |||
| 551 | ccv_nnc_mps_tensor_fast_fence_clear(marked_tensors[i]); | |||
| 552 | #endif | |||
| 553 | } | |||
| 554 | ||||
| 555 | void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs) | |||
| 556 | { | |||
| 557 | int i, j; | |||
| 558 | for (i = 0; i < input_size; i++) | |||
| ||||
| 559 | if (inputs[i] && !inputs[i]->alias_index_ref) | |||
| 560 | { assert(inputs[i]->tensor_view)((void) sizeof ((inputs[i]->tensor_view) ? 1 : 0), __extension__ ({ if (inputs[i]->tensor_view) ; else __assert_fail ("inputs[i]->tensor_view" , "ccv_nnc_dynamic_graph.c", 560, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 561 | ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
| 562 | for (i = 0; i
| |||
| 563 | input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(graph, inputs[i], stream_context ) : 0; | |||
| 564 | ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
| 565 | for (i = 0; i
| |||
| 566 | input_symbols[i] = inputs[i] ? _ccv_nnc_tensor_symbol_from_variable(graph, inputs[i]) : NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 567 | ccv_array_t* input_sources[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
| 568 | ccv_array_t* input_alias_sources[ccv_max(1, input_size)({ typeof (1) _a = (1); typeof (input_size) _b = (input_size) ; (_a > _b) ? _a : _b; })]; | |||
| 569 | for (i = 0; i
| |||
| 570 | { | |||
| 571 | input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(input_symbols[i].d))))->sources : 0; | |||
| 572 | if (inputs[i] && inputs[i]->alias_index_ref) | |||
| 573 | { | |||
| 574 | const int alias_index_ref = inputs[i]->alias_index_ref - 1; | |||
| 575 | assert(alias_index_ref >= 0)((void) sizeof ((alias_index_ref >= 0) ? 1 : 0), __extension__ ({ if (alias_index_ref >= 0) ; else __assert_fail ("alias_index_ref >= 0" , "ccv_nnc_dynamic_graph.c", 575, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 576 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index_ref))); | |||
| 577 | input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))))->sources; | |||
| 578 | } else | |||
| 579 | input_alias_sources[i] = 0; | |||
| 580 | } | |||
| 581 | const int parallel_count = ccv_max(1, parallel)({ typeof (1) _a = (1); typeof (parallel) _b = (parallel); (_a > _b) ? _a : _b; }); | |||
| 582 | assert(input_size % parallel_count == 0)((void) sizeof ((input_size % parallel_count == 0) ? 1 : 0), __extension__ ({ if (input_size % parallel_count == 0) ; else __assert_fail ("input_size % parallel_count == 0", "ccv_nnc_dynamic_graph.c" , 582, __extension__ __PRETTY_FUNCTION__); })); | |||
| 583 | const int per_input_size = input_size / parallel_count; | |||
| 584 | assert(output_size % parallel_count == 0)((void) sizeof ((output_size % parallel_count == 0) ? 1 : 0), __extension__ ({ if (output_size % parallel_count == 0) ; else __assert_fail ("output_size % parallel_count == 0", "ccv_nnc_dynamic_graph.c" , 584, __extension__ __PRETTY_FUNCTION__); })); | |||
| 585 | const int per_output_size = output_size / parallel_count; | |||
| 586 | int output_auto = 0; | |||
| 587 | for (i = 0; !output_auto
| |||
| 588 | output_auto = outputs[i] ? ccv_nnc_is_tensor_auto(outputs[i]->info) : 0; | |||
| 589 | // One extra step, infer the parameters for outputs. | |||
| 590 | if (output_auto
| |||
| 591 | { | |||
| 592 | ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)({ typeof (1) _a = (1); typeof (per_input_size) _b = (per_input_size ); (_a > _b) ? _a : _b; })]; | |||
| 593 | ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
| 594 | for (i = 0; i < parallel_count; i++) | |||
| 595 | { | |||
| 596 | for (j = 0; j < per_input_size; j++) | |||
| 597 | input_params[j] = inputs[j + i * per_input_size] ? inputs[j + i * per_input_size]->info : ccv_nnc_tensor_auto; | |||
| 598 | for (j = 0; j < per_output_size; j++) | |||
| 599 | output_params[j] = outputs[j + i * per_output_size] ? outputs[j + i * per_output_size]->info : ccv_nnc_tensor_auto; | |||
| 600 | ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size); | |||
| 601 | for (j = 0; j < per_output_size; j++) | |||
| 602 | if (outputs[j + i * per_output_size]) | |||
| 603 | outputs[j + i * per_output_size]->info = output_params[j]; | |||
| 604 | } | |||
| 605 | } | |||
| 606 | int freeable_size = 0; | |||
| 607 | ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)({ typeof (1) _a = (1); typeof (output_size) _b = (output_size ); (_a > _b) ? _a : _b; })]; | |||
| 608 | // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee. | |||
| 609 | for (i = 0; i
| |||
| 610 | { | |||
| 611 | // First, go over to see whether there is enforce inplace. | |||
| 612 | int enforce_idx = -1; | |||
| 613 | for (j = 0; enforce_idx < 0 && j < input_size; j++) | |||
| 614 | if (inputs[j] && ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)) | |||
| 615 | enforce_idx = j; | |||
| 616 | if (enforce_idx >= 0) | |||
| 617 | { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) ? 1 : 0 ), __extension__ ({ if (outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) ; else __assert_fail ("outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL" , "ccv_nnc_dynamic_graph.c", 617, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 618 | // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic. | |||
| 619 | if (outputs[i] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 620 | { | |||
| 621 | const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[i]->symbol.d))); | |||
| 622 | if (enforce_idx >= 0) | |||
| 623 | { assert(!bind->destinations || bind->destinations->rnum == 0)((void) sizeof ((!bind->destinations || bind->destinations ->rnum == 0) ? 1 : 0), __extension__ ({ if (!bind->destinations || bind->destinations->rnum == 0) ; else __assert_fail ("!bind->destinations || bind->destinations->rnum == 0" , "ccv_nnc_dynamic_graph.c", 623, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 624 | if (bind->sources && bind->sources->rnum > 0) | |||
| 625 | { | |||
| 626 | const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]); | |||
| 627 | // If this is enforce output, make sure the tensor view is taken by the output. | |||
| 628 | if (enforce_idx >= 0) | |||
| 629 | { | |||
| 630 | outputs[i]->destructor_hook = old_var->destructor_hook; | |||
| 631 | outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output. | |||
| 632 | old_var->tensor_view = 0; | |||
| 633 | } | |||
| 634 | } | |||
| 635 | } | |||
| 636 | } | |||
| 637 | ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
| 638 | if (parallel_count > 1) | |||
| 639 | { | |||
| 640 | const int max_device_id_size = per_input_size + per_output_size; | |||
| 641 | assert(max_device_id_size > 0)((void) sizeof ((max_device_id_size > 0) ? 1 : 0), __extension__ ({ if (max_device_id_size > 0) ; else __assert_fail ("max_device_id_size > 0" , "ccv_nnc_dynamic_graph.c", 641, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 642 | int device_ids[max_device_id_size]; | |||
| 643 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
| 644 | ccv_nnc_stream_signal_t* signal; | |||
| 645 | if (stream_context) | |||
| 646 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
| 647 | for (i = 0; i
| |||
| 648 | { | |||
| 649 | int flag = 0; | |||
| 650 | for (j = 0; !flag
| |||
| 651 | if (input_tensors[i * per_input_size + j]) | |||
| ||||
| 652 | flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type)((input_tensors[i * per_input_size + j]->info.type) & 0x3 ) == CCV_TENSOR_GPU_MEMORY); | |||
| 653 | for (j = 0; j < per_output_size; j++) | |||
| 654 | { | |||
| 655 | output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context)ccv_nnc_tensor_from_variable_impl(graph, outputs[j + i * per_output_size ], stream_context) : 0; | |||
| 656 | if (output_tensors[j] && !flag) | |||
| 657 | flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type)((output_tensors[j]->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY); | |||
| 658 | } | |||
| 659 | const int stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
| 660 | const int tensor_type = flag ? CCV_TENSOR_GPU_MEMORY : CCV_TENSOR_CPU_MEMORY; | |||
| 661 | const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size); | |||
| 662 | ccv_nnc_stream_context_t* stream_0 = 0; | |||
| 663 | for (j = 0; j < device_id_size; j++) | |||
| 664 | { | |||
| 665 | int type = stream_type; | |||
| 666 | CCV_STREAM_SET_DEVICE_ID(type, device_ids[j])(type) = (((type) & ~0xfff00) | (((device_ids[j]) & 0xfff ) << 8)); | |||
| 667 | ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type); | |||
| 668 | if (!stream_0) | |||
| 669 | stream_0 = stream; | |||
| 670 | } | |||
| 671 | // Wait signal to finish. | |||
| 672 | if (stream_context) | |||
| 673 | { | |||
| 674 | if (stream_0) | |||
| 675 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
| 676 | else | |||
| 677 | ccv_nnc_stream_context_wait(stream_context); | |||
| 678 | } | |||
| 679 | if (stream_0) | |||
| 680 | { | |||
| 681 | ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = { | |||
| 682 | .graph = graph, | |||
| 683 | .stream_type = stream_type | |||
| 684 | }; | |||
| 685 | ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery); | |||
| 686 | } | |||
| 687 | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size , per_output_size); fflush(stdout); } } while (0); | |||
| 688 | int k; | |||
| 689 | for (k = 0; k < per_input_size; k++) | |||
| 690 | { | |||
| 691 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? CCV_TENSOR_GET_DEVICE_ID(input_tensors[k + i * per_input_size]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", k + 1, input_tensors[k + i * per_input_size ], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? (((input_tensors[k + i * per_input_size]-> info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | |||
| 692 | if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
| 693 | ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]); | |||
| 694 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
| 695 | } | |||
| 696 | for (k = 0; k < per_output_size; k++) | |||
| 697 | { | |||
| 698 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors [k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? (((output_tensors[k]->info.type) & 0xfff00) >> 8 ) : -1)); fflush(stdout); } } while (0); | |||
| 699 | if (output_tensors[k] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
| 700 | ccv_nnc_print_tensor_shape(output_tensors[k]); | |||
| 701 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
| 702 | } | |||
| 703 | ccv_nnc_tensor_t* marked_tensors[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
| 704 | const int marked_size = _ccv_nnc_dynamic_graph_mark_gpu_to_cpu_transfer(cmd, outputs + i * per_output_size, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0, marked_tensors); | |||
| 705 | const int status = ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0); | |||
| 706 | if (status != 0) | |||
| 707 | _ccv_nnc_dynamic_graph_clear_fast_fence_marks(marked_tensors, marked_size); | |||
| 708 | if (status != 0) | |||
| 709 | PRINT(CCV_CLI_INFO, "Invalid Status: %d\n", status)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("Invalid Status: %d\n", status); fflush(stdout); } } while ( 0); | |||
| 710 | if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE)(CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) | |||
| 711 | { | |||
| 712 | for (k = 0; k < per_output_size; k++) | |||
| 713 | { | |||
| 714 | PRINT(CCV_CLI_VERBOSE, "POST: |<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1))do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("POST: |<- %d. %p (%p:%d)", k + 1, output_tensors [k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? (((output_tensors[k]->info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | |||
| 715 | if (output_tensors[k]) | |||
| 716 | ccv_nnc_print_tensor_info(output_tensors[k]); | |||
| 717 | PRINT(CCV_CLI_VERBOSE, "\n")do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels())) { printf("\n"); fflush(stdout); } } while (0); | |||
| 718 | } | |||
| 719 | } | |||
| 720 | if (stream_context && stream_0) | |||
| 721 | { | |||
| 722 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
| 723 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
| 724 | } | |||
| 725 | streams[i] = stream_0; | |||
| 726 | } | |||
| 727 | if (!stream_context) | |||
| 728 | for (i = 0; i < parallel_count; i++) | |||
| 729 | if (streams[i]) | |||
| 730 | ccv_nnc_stream_context_wait(streams[i]); | |||
| 731 | } else { | |||
| 732 | for (i = 0; i < per_output_size; i++) | |||
| 733 | output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(graph, outputs[i], stream_context ) : 0; | |||
| 734 | PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size , per_output_size); fflush(stdout); } } while (0); | |||
| 735 | for (i = 0; i < per_input_size; i++) | |||
| 736 | { | |||
| 737 | PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? CCV_TENSOR_GET_DEVICE_ID(input_tensors[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|-> %d. %p (%p:%d)", i + 1, input_tensors[i], (input_tensors [i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? ( ((input_tensors[i]->info.type) & 0xfff00) >> 8) : -1)); fflush(stdout); } } while (0); | |||
| 738 | if (input_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
| 739 | ccv_nnc_print_tensor_info(input_tensors[i]); | |||
| 740 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
| 741 | } | |||
| 742 | ccv_nnc_tensor_t* marked_tensors[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
| 743 | const int marked_size = _ccv_nnc_dynamic_graph_mark_gpu_to_cpu_transfer(cmd, outputs, input_tensors, per_input_size, output_tensors, per_output_size, stream_context, marked_tensors); | |||
| 744 | const int status = ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context); | |||
| 745 | if (status != 0) | |||
| 746 | _ccv_nnc_dynamic_graph_clear_fast_fence_marks(marked_tensors, marked_size); | |||
| 747 | for (i = 0; i < per_output_size; i++) | |||
| 748 | { | |||
| 749 | PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[i]->info.type) : -1))do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("|<- %d. %p (%p:%d)", i + 1, output_tensors[i], (output_tensors [i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? (((output_tensors[i]->info.type) & 0xfff00) >> 8 ) : -1)); fflush(stdout); } } while (0); | |||
| 750 | if (output_tensors[i] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO)(CCV_CLI_INFO & ccv_cli_get_output_levels())) | |||
| 751 | ccv_nnc_print_tensor_info(output_tensors[i]); | |||
| 752 | PRINT(CCV_CLI_INFO, "\n")do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { printf ("\n"); fflush(stdout); } } while (0); | |||
| 753 | } | |||
| 754 | } | |||
| 755 | int inputs_are_constants = 1; | |||
| 756 | for (i = 0; inputs_are_constants && i < input_size; i++) | |||
| 757 | if (inputs[i] && inputs[i]->type != CCV_NNC_TENSOR_CONSTANT) | |||
| 758 | inputs_are_constants = 0; | |||
| 759 | if (input_size > 0 && !inputs_are_constants && !graph->no_grad) // No need to record the execution if there is no input or we disabled gradient computation. | |||
| 760 | { | |||
| 761 | ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)({ typeof (1) _a = (1); typeof (output_size) _b = (output_size ); (_a > _b) ? _a : _b; })]; | |||
| 762 | for (i = 0; i < output_size; i++) | |||
| 763 | if (outputs[i]) | |||
| 764 | { | |||
| 765 | assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT)((void) sizeof ((outputs[i]->type != CCV_NNC_TENSOR_CONSTANT ) ? 1 : 0), __extension__ ({ if (outputs[i]->type != CCV_NNC_TENSOR_CONSTANT ) ; else __assert_fail ("outputs[i]->type != CCV_NNC_TENSOR_CONSTANT" , "ccv_nnc_dynamic_graph.c", 765, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 766 | output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]); | |||
| 767 | } else | |||
| 768 | output_symbols[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 769 | int t; | |||
| 770 | for (t = 0; t < parallel_count; t++) | |||
| 771 | { | |||
| 772 | ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0); | |||
| 773 | if (graph_execs) | |||
| 774 | graph_execs[t] = graph_exec; | |||
| 775 | // This needs to be done before we set the new sources on the outputs. | |||
| 776 | for (i = 0; i < per_input_size; i++) | |||
| 777 | { | |||
| 778 | ccv_array_t* const input_source = input_sources[i + t * per_input_size]; | |||
| 779 | if (input_source) | |||
| 780 | for (j = 0; j < input_source->rnum; j++) | |||
| 781 | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ | |||
| 782 | .d = *(int*)ccv_array_get(input_source, j)((void*)(((char*)((input_source)->data)) + (size_t)(input_source )->rsize * (size_t)(j))), | |||
| 783 | .graph = graph->tape | |||
| 784 | }, graph_exec); | |||
| 785 | ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size]; | |||
| 786 | if (input_alias_source) | |||
| 787 | for (j = 0; j < input_alias_source->rnum; j++) | |||
| 788 | ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){ | |||
| 789 | .d = *(int*)ccv_array_get(input_alias_source, j)((void*)(((char*)((input_alias_source)->data)) + (size_t)( input_alias_source)->rsize * (size_t)(j))), | |||
| 790 | .graph = graph->tape | |||
| 791 | }, graph_exec); | |||
| 792 | } | |||
| 793 | for (i = 0; i < per_input_size; i++) | |||
| 794 | { | |||
| 795 | ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size]; | |||
| 796 | if (!input || input_symbols[i + t * per_input_size].d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 797 | continue; | |||
| 798 | // Constant inputs still need lifetime tracking while this exec is alive because | |||
| 799 | // backward may read their concrete tensor buffers even though they do not require | |||
| 800 | // gradients themselves. | |||
| 801 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(input_symbols[i + t * per_input_size ].d))); | |||
| 802 | if (!bind->destinations) | |||
| 803 | bind->destinations = ccv_array_new(sizeof(int), 1, 0); | |||
| 804 | ccv_array_add_unique_int(bind->destinations, graph_exec.d); | |||
| 805 | if (input->alias_index_ref) | |||
| 806 | { | |||
| 807 | const int alias_index = input->alias_index_ref - 1; | |||
| 808 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 808, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 809 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
| 810 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))); | |||
| 811 | if (!root_bind->destinations) | |||
| 812 | root_bind->destinations = ccv_array_new(sizeof(int), 1, 0); | |||
| 813 | ccv_array_add_unique_int(root_bind->destinations, graph_exec.d); | |||
| 814 | } | |||
| 815 | } | |||
| 816 | for (i = 0; i < per_output_size; i++) | |||
| 817 | { | |||
| 818 | ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size]; | |||
| 819 | if (!output) | |||
| 820 | continue; | |||
| 821 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(output_symbols[i + t * per_output_size ].d))); | |||
| 822 | assert(!bind->sources)((void) sizeof ((!bind->sources) ? 1 : 0), __extension__ ( { if (!bind->sources) ; else __assert_fail ("!bind->sources" , "ccv_nnc_dynamic_graph.c", 822, __extension__ __PRETTY_FUNCTION__ ); })); // This is a new symbol, therefore, no binded sources associated yet. | |||
| 823 | bind->sources = ccv_array_new(sizeof(int), 1, 0); | |||
| 824 | ccv_array_add_unique_int(bind->sources, graph_exec.d); | |||
| 825 | if (output->alias_index_ref) | |||
| 826 | { | |||
| 827 | const int alias_index = output->alias_index_ref - 1; | |||
| 828 | assert(alias_index >= 0)((void) sizeof ((alias_index >= 0) ? 1 : 0), __extension__ ({ if (alias_index >= 0) ; else __assert_fail ("alias_index >= 0" , "ccv_nnc_dynamic_graph.c", 828, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 829 | ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(alias_index))); | |||
| 830 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(variable_to->symbol.d))); | |||
| 831 | if (!root_bind->sources) | |||
| 832 | root_bind->sources = ccv_array_new(sizeof(int), 1, 0); | |||
| 833 | ccv_array_add_unique_int(root_bind->sources, graph_exec.d); | |||
| 834 | } | |||
| 835 | } | |||
| 836 | } | |||
| 837 | } | |||
| 838 | // Now, able to free some of the reused outputs. | |||
| 839 | for (i = 0; i < freeable_size; i++) | |||
| 840 | ccv_nnc_tensor_variable_free(graph, freeables[i]); | |||
| 841 | } | |||
| 842 | ||||
| 843 | int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context) | |||
| 844 | { | |||
| 845 | ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0); | |||
| 846 | return CCV_NNC_EXEC_SUCCESS; | |||
| 847 | } | |||
| 848 | ||||
| 849 | static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d) | |||
| 850 | { | |||
| 851 | if (bind->alias_ref) | |||
| 852 | bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(bind->alias_ref - 1))); | |||
| 853 | if (!bind->sources || bind->sources->rnum == 0) | |||
| 854 | return 1; | |||
| 855 | int i; | |||
| 856 | for (i = 0; i < bind->sources->rnum; i++) | |||
| 857 | { | |||
| 858 | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | |||
| 859 | const ccv_nnc_graph_exec_symbol_t exec_symbol = { | |||
| 860 | .d = exec_symbol_d, | |||
| 861 | .graph = graph->tape | |||
| 862 | }; | |||
| 863 | const int* outputs; int output_size; | |||
| 864 | ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size); | |||
| 865 | int j; | |||
| 866 | for (j = 0; j < output_size; j++) | |||
| 867 | if (outputs[j] >= 0 && outputs[j] != symbol_d) // If output is me, it is the only output. | |||
| 868 | { | |||
| 869 | assert(outputs[j] < graph->binds->rnum)((void) sizeof ((outputs[j] < graph->binds->rnum) ? 1 : 0), __extension__ ({ if (outputs[j] < graph->binds-> rnum) ; else __assert_fail ("outputs[j] < graph->binds->rnum" , "ccv_nnc_dynamic_graph.c", 869, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 870 | const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | |||
| 871 | // This is in use and is it not a constant symbol. | |||
| 872 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
| 873 | return 0; | |||
| 874 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
| 875 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
| 876 | // The original is in use and is it not a constant symbol. | |||
| 877 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
| 878 | return 0; | |||
| 879 | if (other_bind->destinations && other_bind->destinations->rnum > 0) | |||
| 880 | return 0; | |||
| 881 | } | |||
| 882 | } | |||
| 883 | return 1; | |||
| 884 | } | |||
| 885 | ||||
| 886 | static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) | |||
| 887 | { | |||
| 888 | int i; | |||
| 889 | if (bind->destinations) | |||
| 890 | { | |||
| 891 | int flag = 0; | |||
| 892 | for (i = 0; !flag && i < bind->destinations->rnum; i++) | |||
| 893 | { | |||
| 894 | const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(i))); | |||
| 895 | if (exec_symbol_d == freed_exec_symbol_d) | |||
| 896 | { | |||
| 897 | if (i < bind->destinations->rnum - 1) | |||
| 898 | *(int*)ccv_array_get(bind->destinations, i)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(i))) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(bind->destinations ->rnum - 1))); | |||
| 899 | --bind->destinations->rnum; | |||
| 900 | flag = 1; | |||
| 901 | } | |||
| 902 | } | |||
| 903 | // This symbol can be freed. | |||
| 904 | if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) | |||
| 905 | { | |||
| 906 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | |||
| 907 | if (bind->alias_ref) | |||
| 908 | { | |||
| 909 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(bind->alias_ref - 1))); | |||
| 910 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
| 911 | root_bind = bind; | |||
| 912 | } | |||
| 913 | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. | |||
| 914 | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. | |||
| 915 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && | |||
| 916 | ((!root_bind->sources || root_bind->sources->rnum == 0) || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | |||
| 917 | root_bind->destinations->rnum == 0) | |||
| 918 | { | |||
| 919 | if (root_bind->sources) | |||
| 920 | for (i = 0; i < root_bind->sources->rnum; i++) | |||
| 921 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | |||
| 922 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 923 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 924 | .d = tensor_index, | |||
| 925 | .graph = graph->tape | |||
| 926 | }); | |||
| 927 | } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. | |||
| 928 | bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) { | |||
| 929 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 930 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 931 | .d = tensor_index, | |||
| 932 | .graph = graph->tape | |||
| 933 | }); | |||
| 934 | } | |||
| 935 | } | |||
| 936 | } | |||
| 937 | } | |||
| 938 | ||||
| 939 | static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws) | |||
| 940 | { | |||
| 941 | int i; | |||
| 942 | if (bind->sources) | |||
| 943 | { | |||
| 944 | int flag = 0; | |||
| 945 | for (i = 0; !flag && i < bind->sources->rnum; i++) | |||
| 946 | { | |||
| 947 | const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | |||
| 948 | if (exec_symbol_d == freed_exec_symbol_d) | |||
| 949 | { | |||
| 950 | if (i < bind->sources->rnum - 1) | |||
| 951 | *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(bind->sources->rnum - 1))); | |||
| 952 | --bind->sources->rnum; | |||
| 953 | flag = 1; | |||
| 954 | } | |||
| 955 | } | |||
| 956 | if (flag && !bind->alias_ref && bind->index >= 0 && bind->type == CCV_NNC_TENSOR_CONSTANT && // If it is detached (constant but previously has sources). Now can check again. | |||
| 957 | (bind->sources->rnum == 0 || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | |||
| 958 | (!bind->destinations || bind->destinations->rnum == 0)) | |||
| 959 | { | |||
| 960 | // If this is constant, set it to be no symbol again. | |||
| 961 | ccv_nnc_tensor_variable_t tv = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, bind->index)((void*)(((char*)((graph->vars)->data)) + (size_t)(graph ->vars)->rsize * (size_t)(bind->index))); | |||
| 962 | tv->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 963 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 964 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 965 | .d = tensor_index, | |||
| 966 | .graph = graph->tape | |||
| 967 | }); | |||
| 968 | } else if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) { | |||
| 969 | // This symbol can be freed. | |||
| 970 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | |||
| 971 | if (bind->alias_ref) | |||
| 972 | { | |||
| 973 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(bind->alias_ref - 1))); | |||
| 974 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
| 975 | root_bind = bind; | |||
| 976 | } | |||
| 977 | // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more. | |||
| 978 | // It is possible because exec will be freed already, thus, it is safe to remove this alias out. | |||
| 979 | if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && | |||
| 980 | (root_bind->sources->rnum == 0 || _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)) && | |||
| 981 | (!root_bind->destinations || root_bind->destinations->rnum == 0)) | |||
| 982 | { | |||
| 983 | for (i = 0; i < root_bind->sources->rnum; i++) | |||
| 984 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | |||
| 985 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 986 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 987 | .d = tensor_index, | |||
| 988 | .graph = graph->tape | |||
| 989 | }); | |||
| 990 | } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations. | |||
| 991 | bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) { | |||
| 992 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 993 | ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 994 | .d = tensor_index, | |||
| 995 | .graph = graph->tape | |||
| 996 | }); | |||
| 997 | } | |||
| 998 | } | |||
| 999 | } | |||
| 1000 | } | |||
| 1001 | ||||
| 1002 | static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws) | |||
| 1003 | { | |||
| 1004 | int i; | |||
| 1005 | for (i = 0; i < input_size; i++) | |||
| 1006 | if (inputs[i] >= 0 && inputs[i] < binds->rnum) | |||
| 1007 | { | |||
| 1008 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i])((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(inputs[i]))); | |||
| 1009 | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
| 1010 | continue; | |||
| 1011 | if (bind->alias_ref) | |||
| 1012 | { | |||
| 1013 | const int alias_to = bind->alias_ref - 1; | |||
| 1014 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(alias_to))); | |||
| 1015 | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) | |||
| 1016 | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); | |||
| 1017 | } | |||
| 1018 | _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws); | |||
| 1019 | } | |||
| 1020 | // Note that this works because there is no overlap of inputs / outputs. (What about alias?). | |||
| 1021 | for (i = 0; i < output_size; i++) | |||
| 1022 | if (outputs[i] >= 0 && outputs[i] < binds->rnum) | |||
| 1023 | { | |||
| 1024 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i])((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(outputs[i]))); | |||
| 1025 | if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE) | |||
| 1026 | continue; | |||
| 1027 | if (bind->alias_ref) | |||
| 1028 | { | |||
| 1029 | const int alias_to = bind->alias_ref - 1; | |||
| 1030 | ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to)((void*)(((char*)((binds)->data)) + (size_t)(binds)->rsize * (size_t)(alias_to))); | |||
| 1031 | if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE) | |||
| 1032 | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws); | |||
| 1033 | } | |||
| 1034 | _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws); | |||
| 1035 | } | |||
| 1036 | } | |||
| 1037 | ||||
| 1038 | static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol) | |||
| 1039 | { | |||
| 1040 | if (!graph->stateful_execs) | |||
| 1041 | return; | |||
| 1042 | assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({ if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0" , "ccv_nnc_dynamic_graph.c", 1042, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1043 | ccv_array_t* const stateful_execs = graph->stateful_execs; | |||
| 1044 | ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol); | |||
| 1045 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
| 1046 | if (!stateful_exec) | |||
| 1047 | return; | |||
| 1048 | // If there is no backward, no need to apply gradients. | |||
| 1049 | // Otherwise, if we applied gradients, we can free it as well. | |||
| 1050 | // We don't free this stateful exec because apply gradients doesn't require any variables alive. | |||
| 1051 | if (!stateful_exec->did_backward_but_not_apply_gradients) | |||
| 1052 | { | |||
| 1053 | const int index = stateful_exec->index; | |||
| 1054 | ccfreefree(stateful_exec); | |||
| 1055 | if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0) | |||
| 1056 | graph->reuse_stateful_exec = index; | |||
| 1057 | *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index)((void*)(((char*)((stateful_execs)->data)) + (size_t)(stateful_execs )->rsize * (size_t)(index))) = 0; | |||
| 1058 | } else | |||
| 1059 | stateful_exec->should_free = 1; | |||
| 1060 | } | |||
| 1061 | ||||
| 1062 | static int _ccv_nnc_tensor_bind_trace_forward_to_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_graph_bind_t* const bind, ccv_nnc_tensor_variable_graph_bind_t* const root_bind, int* const ws_start, const int assuming_no_source) // assuming_no_source means we are going to remove sources if possible, thus, it is irrelevant. | |||
| 1063 | { | |||
| 1064 | int can_free_symbol = 0; | |||
| 1065 | const int sources_and_is_only_output = (root_bind->sources && root_bind->sources->rnum > 0) && _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d); | |||
| 1066 | if (!root_bind->sources || root_bind->sources->rnum == 0 || sources_and_is_only_output || assuming_no_source) | |||
| 1067 | { | |||
| 1068 | int i, j; | |||
| 1069 | can_free_symbol = 1; // Assume we can free this symbol. | |||
| 1070 | if (!graph->ws) | |||
| 1071 | graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? root_bind->destinations->rnum : 0, 0); | |||
| 1072 | ccv_array_t* const ws = graph->ws; | |||
| 1073 | ccv_array_clear(ws); | |||
| 1074 | if (root_bind->destinations) | |||
| 1075 | for (i = 0; i < root_bind->destinations->rnum; i++) | |||
| 1076 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i)((void*)(((char*)((root_bind->destinations)->data)) + ( size_t)(root_bind->destinations)->rsize * (size_t)(i)))); | |||
| 1077 | const int ws_init_size = ws->rnum; | |||
| 1078 | *ws_start = ws_init_size; | |||
| 1079 | // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free. | |||
| 1080 | if (root_bind->sources) | |||
| 1081 | for (i = 0; i < root_bind->sources->rnum; i++) | |||
| 1082 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i)((void*)(((char*)((root_bind->sources)->data)) + (size_t )(root_bind->sources)->rsize * (size_t)(i)))); | |||
| 1083 | // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want | |||
| 1084 | // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol. | |||
| 1085 | if (ws_init_size == 0) | |||
| 1086 | can_free_symbol = (!bind->alias_ref || root_bind->index < 0); | |||
| 1087 | // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources. | |||
| 1088 | for (i = 0; i < ws_init_size; i++) | |||
| 1089 | { | |||
| 1090 | const int exec_symbol_d = *(int*)ccv_array_get(ws, i)((void*)(((char*)((ws)->data)) + (size_t)(ws)->rsize * ( size_t)(i))); | |||
| 1091 | const ccv_nnc_graph_exec_symbol_t symbol = { | |||
| 1092 | .d = exec_symbol_d, | |||
| 1093 | .graph = graph->tape | |||
| 1094 | }; | |||
| 1095 | const int* inputs; int input_size; | |||
| 1096 | const int* outputs; int output_size; | |||
| 1097 | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); | |||
| 1098 | int flag = 0; // flag denotes whether there are cases to keep this exec symbol. | |||
| 1099 | if (!root_bind->sources || root_bind->sources->rnum == 0 || assuming_no_source) | |||
| 1100 | { | |||
| 1101 | // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this. | |||
| 1102 | for (j = 0; !flag && j < input_size; j++) | |||
| 1103 | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d) | |||
| 1104 | { | |||
| 1105 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(inputs[j]))); | |||
| 1106 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
| 1107 | flag = 1; | |||
| 1108 | else { | |||
| 1109 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
| 1110 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
| 1111 | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT && other_bind->sources && other_bind->sources->rnum > 0); // Constant should have no source, or it is detached. | |||
| 1112 | } | |||
| 1113 | } | |||
| 1114 | } else { | |||
| 1115 | // If there are sources, check whether we have outputs or not. If we do, we cannot free this. | |||
| 1116 | for (j = 0; !flag && j < output_size; j++) | |||
| 1117 | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum) | |||
| 1118 | { | |||
| 1119 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | |||
| 1120 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
| 1121 | flag = 1; | |||
| 1122 | else { | |||
| 1123 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
| 1124 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
| 1125 | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->destinations && other_bind->destinations->rnum > 0); | |||
| 1126 | } | |||
| 1127 | } | |||
| 1128 | } | |||
| 1129 | // This exec can be freed if there is no input required or there is no output required. | |||
| 1130 | can_free_symbol = (can_free_symbol && !flag); | |||
| 1131 | if (!flag) | |||
| 1132 | { | |||
| 1133 | // Go over inputs and remove all references from binded destinations. | |||
| 1134 | // and go over outputs remove all references from binded sources. | |||
| 1135 | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); | |||
| 1136 | const int* outgoings; int outgoing_size; | |||
| 1137 | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); | |||
| 1138 | for (j = 0; j < outgoing_size; j++) | |||
| 1139 | ccv_array_add_unique_int(ws, outgoings[j]); | |||
| 1140 | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); | |||
| 1141 | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); | |||
| 1142 | } | |||
| 1143 | } | |||
| 1144 | } | |||
| 1145 | return can_free_symbol; | |||
| 1146 | } | |||
| 1147 | ||||
| 1148 | static void _ccv_nnc_tensor_bind_trace_backward_to_free(ccv_nnc_dynamic_graph_t* const graph, ccv_array_t* const ws, const int ws_start) | |||
| 1149 | { | |||
| 1150 | int i, j; | |||
| 1151 | // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over. | |||
| 1152 | for (i = ws_start; i < ws->rnum; i++) | |||
| 1153 | { | |||
| 1154 | const int exec_symbol_d = *(int*)ccv_array_get(ws, i)((void*)(((char*)((ws)->data)) + (size_t)(ws)->rsize * ( size_t)(i))); | |||
| 1155 | const ccv_nnc_graph_exec_symbol_t symbol = { | |||
| 1156 | .d = exec_symbol_d, | |||
| 1157 | .graph = graph->tape | |||
| 1158 | }; | |||
| 1159 | const int* inputs; int input_size; | |||
| 1160 | const int* outputs; int output_size; | |||
| 1161 | ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size); | |||
| 1162 | int flag = 0; | |||
| 1163 | for (j = 0; !flag && j < input_size; j++) | |||
| 1164 | if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum) | |||
| 1165 | { | |||
| 1166 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(inputs[j]))); | |||
| 1167 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
| 1168 | flag = 1; | |||
| 1169 | else { | |||
| 1170 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
| 1171 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
| 1172 | flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT && other_bind->sources && other_bind->sources->rnum > 0); | |||
| 1173 | } | |||
| 1174 | } | |||
| 1175 | if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done. | |||
| 1176 | { | |||
| 1177 | int output_flag = 0; | |||
| 1178 | for (j = 0; !output_flag && j < output_size; j++) | |||
| 1179 | if (outputs[j] >= 0 && outputs[j] < graph->binds->rnum) | |||
| 1180 | { | |||
| 1181 | ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j])((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(outputs[j]))); | |||
| 1182 | if (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) | |||
| 1183 | output_flag = 1; | |||
| 1184 | else { | |||
| 1185 | if (other_bind->alias_ref) // If this is alias, use its original's destinations. | |||
| 1186 | other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(other_bind->alias_ref - 1 ))); | |||
| 1187 | output_flag = (other_bind->index >= 0 && other_bind->type != CCV_NNC_TENSOR_CONSTANT) || (other_bind->destinations && other_bind->destinations->rnum > 0); | |||
| 1188 | } | |||
| 1189 | } | |||
| 1190 | if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination). | |||
| 1191 | flag = 0; | |||
| 1192 | } | |||
| 1193 | // Went over all the inputs, it turns out no more inputs has other references, safe to remove. | |||
| 1194 | if (!flag) | |||
| 1195 | { | |||
| 1196 | _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws); | |||
| 1197 | const int* outgoings; int outgoing_size; | |||
| 1198 | ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size); | |||
| 1199 | // It it has outgoings, add that for further inspection. | |||
| 1200 | for (j = 0; j < outgoing_size; j++) | |||
| 1201 | ccv_array_add_unique_int(ws, outgoings[j]); | |||
| 1202 | _ccv_nnc_stateful_exec_free_if_possible(graph, symbol); | |||
| 1203 | ccv_nnc_graph_exec_symbol_free(graph->tape, symbol); | |||
| 1204 | } | |||
| 1205 | } | |||
| 1206 | } | |||
| 1207 | ||||
| 1208 | void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
| 1209 | { | |||
| 1210 | // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output. | |||
| 1211 | if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1212 | { | |||
| 1213 | // If it is not a free variable, when can we free the symbol and the underlying variable? | |||
| 1214 | // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one; | |||
| 1215 | // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well. | |||
| 1216 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(tensor_variable->symbol.d ))); | |||
| 1217 | // There should be no source associated with it no more. | |||
| 1218 | // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to | |||
| 1219 | // compute gradient because I am the only variable it can compute gradient for). | |||
| 1220 | ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind; | |||
| 1221 | if (bind->alias_ref) | |||
| 1222 | { | |||
| 1223 | const int alias_to = bind->alias_ref - 1; | |||
| 1224 | root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(alias_to))); | |||
| 1225 | } | |||
| 1226 | int ws_start; | |||
| 1227 | const int can_free_symbol = _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, root_bind, &ws_start, 0); | |||
| 1228 | if (can_free_symbol) | |||
| 1229 | { | |||
| 1230 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 1231 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | |||
| 1232 | _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start); | |||
| 1233 | } else { // If this symbol is not freed, move the tensor view to the bind. | |||
| 1234 | // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias | |||
| 1235 | // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this | |||
| 1236 | // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the | |||
| 1237 | // alias in that process. | |||
| 1238 | if (bind->alias_ref && (!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) | |||
| 1239 | { | |||
| 1240 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 1241 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | |||
| 1242 | } else { | |||
| 1243 | bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists. | |||
| 1244 | bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback. | |||
| 1245 | bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context. | |||
| 1246 | bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind. | |||
| 1247 | tensor_variable->tensor_view = 0; | |||
| 1248 | } | |||
| 1249 | } | |||
| 1250 | } | |||
| 1251 | _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1); | |||
| 1252 | } | |||
| 1253 | ||||
| 1254 | void ccv_nnc_tensor_variable_detach(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable) | |||
| 1255 | { | |||
| 1256 | // This cannot be an alias. | |||
| 1257 | assert(!tensor_variable->alias_index_ref)((void) sizeof ((!tensor_variable->alias_index_ref) ? 1 : 0 ), __extension__ ({ if (!tensor_variable->alias_index_ref) ; else __assert_fail ("!tensor_variable->alias_index_ref" , "ccv_nnc_dynamic_graph.c", 1257, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1258 | // If no computation done yet, mark this as constant. | |||
| 1259 | if (tensor_variable->symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1260 | { | |||
| 1261 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | |||
| 1262 | return; | |||
| 1263 | } | |||
| 1264 | // Otherwise, we need to do some book keeping updates to make sure it doesn't participate gradient computation any more. | |||
| 1265 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(tensor_variable->symbol.d ))); | |||
| 1266 | // Because tensor variable cannot be alias, its bind cannot have alias pointer. | |||
| 1267 | assert(!bind->alias_ref)((void) sizeof ((!bind->alias_ref) ? 1 : 0), __extension__ ({ if (!bind->alias_ref) ; else __assert_fail ("!bind->alias_ref" , "ccv_nnc_dynamic_graph.c", 1267, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1268 | // Go through to break ties between sources and destinations. | |||
| 1269 | int i, j; | |||
| 1270 | if (bind->sources && bind->destinations) | |||
| 1271 | { | |||
| 1272 | for (i = 0; i < bind->sources->rnum; i++) | |||
| 1273 | { | |||
| 1274 | const int s = *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i))); | |||
| 1275 | const int* outputs; int output_size; | |||
| 1276 | const ccv_nnc_graph_exec_symbol_t s_symbol = { | |||
| 1277 | .d = s, | |||
| 1278 | .graph = graph->tape | |||
| 1279 | }; | |||
| 1280 | ccv_nnc_graph_exec_symbol_io(graph->tape, s_symbol, 0, 0, &outputs, &output_size); | |||
| 1281 | for (j = 0; j < bind->destinations->rnum; j++) | |||
| 1282 | { | |||
| 1283 | const int d = *(int*)ccv_array_get(bind->destinations, j)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(j))); | |||
| 1284 | const ccv_nnc_graph_exec_symbol_t d_symbol = { | |||
| 1285 | .d = d, | |||
| 1286 | .graph = graph->tape | |||
| 1287 | }; | |||
| 1288 | const int* inputs; int input_size; | |||
| 1289 | ccv_nnc_graph_exec_symbol_io(graph->tape, d_symbol, &inputs, &input_size, 0, 0); | |||
| 1290 | int x, y; | |||
| 1291 | int flag = 0; // Whether we find a symbol that connects source and destination but not the current one we detach. If found, we cannot break the tie between s_symbol and d_symbol. | |||
| 1292 | for (x = 0; !flag && x < output_size; x++) | |||
| 1293 | { | |||
| 1294 | ccv_nnc_tensor_symbol_t x_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 1295 | .d = outputs[x], | |||
| 1296 | .graph = graph->tape | |||
| 1297 | }); | |||
| 1298 | if (x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1299 | { | |||
| 1300 | x_symbol.d = outputs[x]; | |||
| 1301 | x_symbol.graph = graph->tape; | |||
| 1302 | } | |||
| 1303 | if (x_symbol.d == tensor_variable->symbol.d || x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1304 | continue; | |||
| 1305 | for (y = 0; !flag && y < input_size; y++) | |||
| 1306 | { | |||
| 1307 | ccv_nnc_tensor_symbol_t y_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){ | |||
| 1308 | .d = inputs[y], | |||
| 1309 | .graph = graph->tape | |||
| 1310 | }); | |||
| 1311 | if (y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1312 | { | |||
| 1313 | y_symbol.d = inputs[y]; | |||
| 1314 | y_symbol.graph = graph->tape; | |||
| 1315 | } | |||
| 1316 | if (y_symbol.d == tensor_variable->symbol.d || y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1317 | continue; | |||
| 1318 | flag = (x_symbol.d == y_symbol.d); | |||
| 1319 | } | |||
| 1320 | } | |||
| 1321 | if (!flag) | |||
| 1322 | ccv_nnc_graph_exec_symbol_disjoin(graph->tape, s_symbol, d_symbol); | |||
| 1323 | } | |||
| 1324 | } | |||
| 1325 | } | |||
| 1326 | const int sources_and_is_only_output = (bind->sources && bind->sources->rnum > 0) && _ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d); | |||
| 1327 | if (!bind->sources || bind->sources->rnum == 0 || sources_and_is_only_output) | |||
| 1328 | { | |||
| 1329 | int ws_start = -1; | |||
| 1330 | _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, bind, &ws_start, 1); | |||
| 1331 | // Because we are detaching from the graph, there is no need to forward trace to see if it is not used and | |||
| 1332 | // then to remove the source execs. We can remove them right now, breaking the graph in two. That is why | |||
| 1333 | // we called trace backward to free regardless the outcome of the forward to free. | |||
| 1334 | if (ws_start == -1) | |||
| 1335 | { | |||
| 1336 | if (!graph->ws) | |||
| 1337 | graph->ws = ccv_array_new(sizeof(int), bind->destinations ? bind->destinations->rnum : 0, 0); | |||
| 1338 | ccv_array_t* const ws = graph->ws; | |||
| 1339 | ccv_array_clear(ws); | |||
| 1340 | if (bind->sources) | |||
| 1341 | for (i = 0; i < bind->sources->rnum; i++) | |||
| 1342 | ccv_array_add_unique_int(ws, *(int*)ccv_array_get(bind->sources, i)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(i)))); | |||
| 1343 | ws_start = 0; | |||
| 1344 | } | |||
| 1345 | _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start); | |||
| 1346 | } | |||
| 1347 | // If now bind has no relevant sources or destinations, we can safely free the underlying tensor symbol. | |||
| 1348 | if ((!bind->sources || bind->sources->rnum == 0) && (!bind->destinations || bind->destinations->rnum == 0)) | |||
| 1349 | { | |||
| 1350 | _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1); | |||
| 1351 | ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol); | |||
| 1352 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | |||
| 1353 | tensor_variable->symbol = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 1354 | return; | |||
| 1355 | } | |||
| 1356 | // Mark both as constant, such that even if it cannot be freed now, it can be freed as soon as possible later. | |||
| 1357 | bind->type = CCV_NNC_TENSOR_CONSTANT; | |||
| 1358 | tensor_variable->type = CCV_NNC_TENSOR_CONSTANT; | |||
| 1359 | } | |||
| 1360 | ||||
| 1361 | void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask) | |||
| 1362 | { | |||
| 1363 | int i, j; | |||
| 1364 | ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0); | |||
| 1365 | for (i = 0; i < source_variable_size; i++) | |||
| 1366 | { | |||
| 1367 | if (source_variables[i]->symbol.d < 0) | |||
| 1368 | continue; | |||
| 1369 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(source_variables[i]->symbol .d))); | |||
| 1370 | if (bind->destinations && bind->destinations->rnum > 0) | |||
| 1371 | for (j = 0; j < bind->destinations->rnum; j++) | |||
| 1372 | { | |||
| 1373 | // It is ok to have duplicate symbols. | |||
| 1374 | const int d = *(int*)ccv_array_get(bind->destinations, j)((void*)(((char*)((bind->destinations)->data)) + (size_t )(bind->destinations)->rsize * (size_t)(j))); | |||
| 1375 | ccv_nnc_graph_exec_symbol_t symbol = { | |||
| 1376 | .d = d, | |||
| 1377 | .graph = graph->tape | |||
| 1378 | }; | |||
| 1379 | ccv_array_push(sources_destinations, &symbol); | |||
| 1380 | } | |||
| 1381 | } | |||
| 1382 | const int source_size = sources_destinations->rnum; | |||
| 1383 | for (i = 0; i < destination_variable_size; i++) | |||
| 1384 | { | |||
| 1385 | if (destination_variables[i]->symbol.d < 0) | |||
| 1386 | continue; | |||
| 1387 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(destination_variables[i]-> symbol.d))); | |||
| 1388 | if (bind->sources && bind->sources->rnum > 0) | |||
| 1389 | for (j = 0; j < bind->sources->rnum; j++) | |||
| 1390 | { | |||
| 1391 | // It is ok to have duplicate symbols. | |||
| 1392 | const int d = *(int*)ccv_array_get(bind->sources, j)((void*)(((char*)((bind->sources)->data)) + (size_t)(bind ->sources)->rsize * (size_t)(j))); | |||
| 1393 | ccv_nnc_graph_exec_symbol_t symbol = { | |||
| 1394 | .d = d, | |||
| 1395 | .graph = graph->tape | |||
| 1396 | }; | |||
| 1397 | ccv_array_push(sources_destinations, &symbol); | |||
| 1398 | } | |||
| 1399 | } | |||
| 1400 | const int destination_size = sources_destinations->rnum - source_size; | |||
| 1401 | if (source_size == 0 || destination_size == 0) | |||
| 1402 | { | |||
| 1403 | ccv_array_free(sources_destinations); | |||
| 1404 | return; | |||
| 1405 | } | |||
| 1406 | const int bitmask_size = ((source_size + 63) >> 6); | |||
| 1407 | assert(bitmask_size < 256)((void) sizeof ((bitmask_size < 256) ? 1 : 0), __extension__ ({ if (bitmask_size < 256) ; else __assert_fail ("bitmask_size < 256" , "ccv_nnc_dynamic_graph.c", 1407, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1408 | uint64_t exec_bitmask[bitmask_size]; | |||
| 1409 | ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0)((void*)(((char*)((sources_destinations)->data)) + (size_t )(sources_destinations)->rsize * (size_t)(0))), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size)((void*)(((char*)((sources_destinations)->data)) + (size_t )(sources_destinations)->rsize * (size_t)(source_size))), destination_size, exec_bitmask); | |||
| 1410 | int k = 0; | |||
| 1411 | for (i = 0; i < source_variable_size; i++) | |||
| 1412 | { | |||
| 1413 | if (source_variables[i]->symbol.d < 0) | |||
| 1414 | { | |||
| 1415 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 1416 | continue; | |||
| 1417 | } | |||
| 1418 | ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d)((void*)(((char*)((graph->binds)->data)) + (size_t)(graph ->binds)->rsize * (size_t)(source_variables[i]->symbol .d))); | |||
| 1419 | int flag = 0; | |||
| 1420 | if (bind->destinations && bind->destinations->rnum > 0) | |||
| 1421 | { | |||
| 1422 | assert(k <= source_size - bind->destinations->rnum)((void) sizeof ((k <= source_size - bind->destinations-> rnum) ? 1 : 0), __extension__ ({ if (k <= source_size - bind ->destinations->rnum) ; else __assert_fail ("k <= source_size - bind->destinations->rnum" , "ccv_nnc_dynamic_graph.c", 1422, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1423 | for (j = 0; !flag && j < bind->destinations->rnum; j++) | |||
| 1424 | flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]); | |||
| 1425 | k += bind->destinations->rnum; | |||
| 1426 | } | |||
| 1427 | if (flag) | |||
| 1428 | bitmask[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 1429 | else | |||
| 1430 | bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 1431 | } | |||
| 1432 | ccv_array_free(sources_destinations); | |||
| 1433 | } | |||
| 1434 | ||||
| 1435 | int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type) | |||
| 1436 | { | |||
| 1437 | return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type); | |||
| 1438 | } | |||
| 1439 | ||||
| 1440 | void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out) | |||
| 1441 | { | |||
| 1442 | ccv_nnc_symbolic_graph_dot(graph->tape, flags, out); | |||
| 1443 | } | |||
| 1444 | ||||
| 1445 | void ccv_nnc_dynamic_graph_format(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context) | |||
| 1446 | { | |||
| 1447 | ccv_nnc_symbolic_graph_format(graph->tape, 0, 0, 0, 0, format_fn, context); | |||
| 1448 | } |