| File: | nnc/ccv_cnnp_model.c |
| Warning: | line 2728, column 13 Array access (via field 'vals') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" | |||
| 2 | #include "ccv_nnc_easy.h" | |||
| 3 | #include "ccv_nnc_internal.h" | |||
| 4 | #include "ccv_internal.h" | |||
| 5 | #include "_ccv_cnnp_model.h" | |||
| 6 | #include "_ccv_nnc_graph.h" | |||
| 7 | #ifdef HAVE_CUDA1 | |||
| 8 | #include "gpu/ccv_nnc_compat.h" | |||
| 9 | #endif | |||
| 10 | ||||
| 11 | // MARK - Level-5 API | |||
| 12 | ||||
| 13 | ccv_cnnp_model_io_t ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size) | |||
| 14 | { | |||
| 15 | if (!model->io) | |||
| 16 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | |||
| 17 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s) + sizeof(ccv_nnc_tensor_symbol_t) * model->output_size); | |||
| 18 | model_io->param_ref = 0; | |||
| 19 | model_io->param_sel = 0; | |||
| 20 | model_io->visit = 0; | |||
| 21 | model_io->model = model; | |||
| 22 | model_io->dependencies = 0; | |||
| 23 | model_io->dependents = 0; | |||
| 24 | model_io->outgoings = 0; | |||
| 25 | model_io->outputs = (ccv_nnc_tensor_symbol_t*)(model_io + 1); | |||
| 26 | ccv_array_push(model->io, &model_io); | |||
| 27 | if (input_size > 0) | |||
| 28 | { | |||
| 29 | model_io->incomings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), input_size, 0); | |||
| 30 | ccv_array_resize(model_io->incomings, input_size); | |||
| 31 | int i; | |||
| 32 | memcpy(ccv_array_get(model_io->incomings, 0)((void*)(((char*)((model_io->incomings)->data)) + (size_t )(model_io->incomings)->rsize * (size_t)(0))), inputs, sizeof(ccv_cnnp_model_io_t) * input_size); | |||
| 33 | for (i = 0; i < input_size; i++) | |||
| 34 | { | |||
| 35 | if (!inputs[i]->outgoings) | |||
| 36 | inputs[i]->outgoings = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | |||
| 37 | ccv_array_push(inputs[i]->outgoings, &model_io); | |||
| 38 | } | |||
| 39 | } else { | |||
| 40 | model_io->incomings = 0; | |||
| 41 | } | |||
| 42 | return model_io; | |||
| 43 | } | |||
| 44 | ||||
| 45 | void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size) | |||
| 46 | { | |||
| 47 | assert(dependency_size > 0)((void) sizeof ((dependency_size > 0) ? 1 : 0), __extension__ ({ if (dependency_size > 0) ; else __assert_fail ("dependency_size > 0" , "ccv_cnnp_model.c", 47, __extension__ __PRETTY_FUNCTION__); })); | |||
| 48 | if (!model_io->dependencies) | |||
| 49 | model_io->dependencies = ccv_array_new(sizeof(ccv_cnnp_model_io_t), dependency_size, 0); | |||
| 50 | int i, j; | |||
| 51 | for (i = 0; i < dependency_size; i++) | |||
| 52 | { | |||
| 53 | int flag = 0; | |||
| 54 | // Check if it is already exist or not. | |||
| 55 | for (j = 0; !flag && j < model_io->dependencies->rnum; j++) | |||
| 56 | if (*(ccv_cnnp_model_io_t*)ccv_array_get(model_io->dependencies, j)((void*)(((char*)((model_io->dependencies)->data)) + (size_t )(model_io->dependencies)->rsize * (size_t)(j))) == dependencies[i]) | |||
| 57 | flag = 1; | |||
| 58 | if (flag) | |||
| 59 | continue; | |||
| 60 | ccv_array_push(model_io->dependencies, dependencies + i); | |||
| 61 | ++dependencies[i]->dependents; | |||
| 62 | } | |||
| 63 | } | |||
| 64 | ||||
| 65 | int ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model) | |||
| 66 | { | |||
| 67 | return model->output_size; | |||
| 68 | } | |||
| 69 | ||||
| 70 | int ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model) | |||
| 71 | { | |||
| 72 | // If the model is compiled, it is default to 1 unless it is not. | |||
| 73 | if (model->compiled_data) | |||
| 74 | return model->is_trainable >= 0 ? model->is_trainable : 1; | |||
| 75 | return model->is_trainable; | |||
| 76 | } | |||
| 77 | ||||
| 78 | ccv_cnnp_model_io_t ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index) | |||
| 79 | { | |||
| 80 | if (!model->io) | |||
| 81 | model->io = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 1, 0); | |||
| 82 | ccv_cnnp_model_io_t model_io = ccmallocmalloc(sizeof(struct ccv_cnnp_model_io_s)); | |||
| 83 | model_io->param_ref = index >= 0 ? index + 1 : ALL_PARAMETERS-1; | |||
| 84 | model_io->param_sel = selector >= 0 ? selector + 1 : ALL_PARAMETERS-1; | |||
| 85 | model_io->visit = 0; | |||
| 86 | model_io->model = model; | |||
| 87 | model_io->outputs = 0; | |||
| 88 | model_io->dependencies = 0; | |||
| 89 | model_io->dependents = 0; | |||
| 90 | model_io->incomings = 0; | |||
| 91 | model_io->outgoings = 0; | |||
| 92 | ccv_array_push(model->io, &model_io); | |||
| 93 | return model_io; | |||
| 94 | } | |||
| 95 | ||||
| 96 | void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context) | |||
| 97 | { | |||
| 98 | model->notify_hook.func = func; | |||
| 99 | model->notify_hook.context = context; | |||
| 100 | } | |||
| 101 | ||||
| 102 | void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload) | |||
| 103 | { | |||
| 104 | if (model->notify_hook.func) | |||
| 105 | model->notify_hook.func(model, tag, payload, model->notify_hook.context); | |||
| 106 | if (model->isa->notify) | |||
| 107 | model->isa->notify(model, tag, payload); | |||
| 108 | } | |||
| 109 | ||||
| 110 | static int _ccv_nnc_array_dedup_graph_exec_symbols(ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols, int graph_exec_symbol_size) | |||
| 111 | { | |||
| 112 | int i, j; | |||
| 113 | for (i = 0; i < graph_exec_symbol_size; i++) | |||
| 114 | { | |||
| 115 | ccv_nnc_graph_exec_symbol_t* const graph_exec_symbol = graph_exec_symbols + i; | |||
| 116 | // Check whether this tensor symbol has any duplicate. | |||
| 117 | for (j = i + 1; j < graph_exec_symbol_size;) | |||
| 118 | { | |||
| 119 | ccv_nnc_graph_exec_symbol_t* const other_symbol = graph_exec_symbols + j; | |||
| 120 | // If there is a same tensor symbol, remove it. | |||
| 121 | if (other_symbol->d == graph_exec_symbol->d && other_symbol->graph == graph_exec_symbol->graph) | |||
| 122 | { | |||
| 123 | if (j + 1 < graph_exec_symbol_size) | |||
| 124 | *other_symbol = graph_exec_symbols[graph_exec_symbol_size - 1]; | |||
| 125 | --graph_exec_symbol_size; | |||
| 126 | continue; | |||
| 127 | } | |||
| 128 | ++j; | |||
| 129 | } | |||
| 130 | } | |||
| 131 | return graph_exec_symbol_size; | |||
| 132 | } | |||
| 133 | ||||
| 134 | void ccv_cnnp_model_add_to_array(void* const context, const ccv_nnc_tensor_symbol_t symbol, const int is_trainable) | |||
| 135 | { | |||
| 136 | ccv_cnnp_model_add_to_array_context_t* const add_to_array_context = (ccv_cnnp_model_add_to_array_context_t*)context; | |||
| 137 | ccv_cnnp_model_t* const model = add_to_array_context->sequence->model; | |||
| 138 | int i; | |||
| 139 | if (add_to_array_context->add_parameter_indices && !model->parameter_indices) | |||
| 140 | model->parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 141 | for (i = 0; i < add_to_array_context->symbols->rnum; i++) | |||
| 142 | { | |||
| 143 | const ccv_nnc_tensor_symbol_t other_symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(add_to_array_context->symbols, i)((void*)(((char*)((add_to_array_context->symbols)->data )) + (size_t)(add_to_array_context->symbols)->rsize * ( size_t)(i))); | |||
| 144 | if (other_symbol.d == symbol.d && other_symbol.graph == symbol.graph) | |||
| 145 | { | |||
| 146 | // Only add to parameter_indices if it is trainable. | |||
| 147 | if (add_to_array_context->add_parameter_indices) | |||
| 148 | ccv_array_add_unique_int(model->parameter_indices, i); | |||
| 149 | // Found it, return, don't add it. | |||
| 150 | return; | |||
| 151 | } | |||
| 152 | } | |||
| 153 | // Only add to parameter_indices if it is trainable. | |||
| 154 | if (add_to_array_context->add_parameter_indices) | |||
| 155 | ccv_array_push(model->parameter_indices, &add_to_array_context->symbols->rnum); | |||
| 156 | // This is a new one, no need to add_unique_int, it is unique. | |||
| 157 | ccv_array_push(add_to_array_context->symbols, &symbol); | |||
| 158 | if (add_to_array_context->trainables) | |||
| 159 | ccv_array_push(add_to_array_context->trainables, &is_trainable); | |||
| 160 | char id[2048]; | |||
| 161 | id[0] = add_to_array_context->prefix; | |||
| 162 | id[1] = '-'; | |||
| 163 | int total_len = 2; | |||
| 164 | for (i = 0; i < add_to_array_context->sequence->sequences->rnum; i++) | |||
| 165 | { | |||
| 166 | const ccv_cnnp_model_name_t* const name = (ccv_cnnp_model_name_t*)ccv_array_get(add_to_array_context->sequence->sequences, i)((void*)(((char*)((add_to_array_context->sequence->sequences )->data)) + (size_t)(add_to_array_context->sequence-> sequences)->rsize * (size_t)(i))); | |||
| 167 | int len; | |||
| 168 | if (name->name && name->name[0] != '\0') | |||
| 169 | len = snprintf(id + total_len, 2048 - total_len, "%s-%d-", name->name, name->sequence); | |||
| 170 | else | |||
| 171 | len = snprintf(id + total_len, 2048 - total_len, "%d-", name->sequence); | |||
| 172 | total_len += len; | |||
| 173 | if (total_len >= 2047) | |||
| 174 | break; | |||
| 175 | } | |||
| 176 | if (total_len < 2047) | |||
| 177 | total_len += snprintf(id + total_len, 2048 - total_len, "%d", add_to_array_context->sequence->it); | |||
| 178 | assert(total_len < 2048)((void) sizeof ((total_len < 2048) ? 1 : 0), __extension__ ({ if (total_len < 2048) ; else __assert_fail ("total_len < 2048" , "ccv_cnnp_model.c", 178, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 179 | char *heap_id = (char*)ccmallocmalloc(total_len + 1); | |||
| 180 | memcpy(heap_id, id, total_len + 1); | |||
| 181 | ccv_array_push(add_to_array_context->ids, &heap_id); | |||
| 182 | ++add_to_array_context->sequence->it; | |||
| 183 | } | |||
| 184 | ||||
| 185 | static void _ccv_cnnp_compiled_data_init(ccv_cnnp_compiled_data_t* const compiled_data, const int output_size, ccv_array_t* const gradient_checkpoints) | |||
| 186 | { | |||
| 187 | compiled_data->f = compiled_data->fits + output_size; | |||
| 188 | compiled_data->xpu_alloc.mp_hdr = -1; | |||
| 189 | compiled_data->xpu_alloc.freed = kh_init(dy_str)kh_init_dy_str(); | |||
| 190 | compiled_data->xpu_alloc.allocd = kh_init(dy_alloc)kh_init_dy_alloc(); | |||
| 191 | compiled_data->gradient_checkpoints = gradient_checkpoints; | |||
| 192 | } | |||
| 193 | ||||
| 194 | typedef struct { | |||
| 195 | void* old_graph_exec_symbol_new_hook_context; | |||
| 196 | ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook; | |||
| 197 | ccv_nnc_symbolic_graph_t* graph; | |||
| 198 | ccv_cnnp_model_build_data_t* build_data; | |||
| 199 | } ccv_cnnp_model_set_exec_flags_context_t; | |||
| 200 | ||||
| 201 | static void _ccv_cnnp_model_set_exec_flags(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) | |||
| 202 | { | |||
| 203 | ccv_cnnp_model_set_exec_flags_context_t* flags_context = (ccv_cnnp_model_set_exec_flags_context_t*)context; | |||
| 204 | if (flags_context->build_data->exec_flags) | |||
| 205 | ccv_nnc_graph_exec_symbol_set_flags(flags_context->graph, symbol, flags_context->build_data->exec_flags); | |||
| 206 | if (flags_context->old_graph_exec_symbol_new_hook) | |||
| 207 | flags_context->old_graph_exec_symbol_new_hook(flags_context->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name); | |||
| 208 | } | |||
| 209 | ||||
| 210 | static void _ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t loss) | |||
| 211 | { | |||
| 212 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 212, __extension__ __PRETTY_FUNCTION__); })); | |||
| 213 | model->inputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * input_size); | |||
| 214 | int i; | |||
| 215 | for (i = 0; i < input_size; i++) | |||
| 216 | model->inputs[i] = ccv_nnc_tensor_symbol_new(model->graph, inputs[i], 0); | |||
| 217 | ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 218 | ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0); | |||
| 219 | ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0); | |||
| 220 | ccv_cnnp_model_sequence_t model_sequence = { | |||
| 221 | .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank() | |||
| 222 | }; | |||
| 223 | ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = { | |||
| 224 | .add_parameter_indices = 1, | |||
| 225 | .prefix = 't', | |||
| 226 | .sequence = &model_sequence, | |||
| 227 | .symbols = parameters, | |||
| 228 | .ids = parameter_ids, | |||
| 229 | .trainables = parameter_trainables, | |||
| 230 | }; | |||
| 231 | ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 232 | ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0); | |||
| 233 | ccv_cnnp_model_add_to_array_context_t add_to_output_context = { | |||
| 234 | .add_parameter_indices = 0, | |||
| 235 | .prefix = 'r', | |||
| 236 | .sequence = &model_sequence, | |||
| 237 | .symbols = internals, | |||
| 238 | .ids = internal_ids, | |||
| 239 | .trainables = 0, | |||
| 240 | }; | |||
| 241 | ccv_cnnp_model_build_data_t build_data = { | |||
| 242 | .exec_flags = 0, | |||
| 243 | .is_trainable = model->is_trainable >= 0 ? model->is_trainable : 1, | |||
| 244 | .model_sequence = &model_sequence, | |||
| 245 | .add_to_array = ccv_cnnp_model_add_to_array, | |||
| 246 | .parameters = parameters, | |||
| 247 | .context = { | |||
| 248 | .add_to_parameter = &add_to_parameter_context, | |||
| 249 | .add_to_output = &add_to_output_context, | |||
| 250 | }, | |||
| 251 | .gradient_checkpoints = 0, | |||
| 252 | }; | |||
| 253 | model->data = &build_data; | |||
| 254 | ccv_cnnp_model_set_exec_flags_context_t flags_context = { | |||
| 255 | .graph = model->graph, | |||
| 256 | .build_data = &build_data, | |||
| 257 | .old_graph_exec_symbol_new_hook = 0, | |||
| 258 | .old_graph_exec_symbol_new_hook_context = 0 | |||
| 259 | }; | |||
| 260 | flags_context.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_set_exec_flags, &flags_context, &flags_context.old_graph_exec_symbol_new_hook); | |||
| 261 | ccv_cnnp_model_build(model, model->graph, model->inputs, input_size, 0, 0); | |||
| 262 | // Reset back to previous hook. | |||
| 263 | ccv_nnc_graph_exec_symbol_new_hook(model->graph, flags_context.old_graph_exec_symbol_new_hook, flags_context.old_graph_exec_symbol_new_hook_context, 0); | |||
| 264 | for (i = 0; i < model->output_size; i++) | |||
| 265 | { | |||
| 266 | const ccv_nnc_tensor_symbol_t output = model->outputs[i]; | |||
| 267 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, output); | |||
| 268 | if (alias_to.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 269 | continue; | |||
| 270 | // If output is an alias, insert data transform regardless for result correctness (we cannot bind an alias). You can check ccv_nnc_tensor_bind_symbol method | |||
| 271 | // to see that we can correctly bind a tensor which from it, has aliases, but we cannot bind an alias tensor correctly (this is expected, sort of, to be | |||
| 272 | // honest, because we cannot handle cases of alias is part of the original tensor but bind differently). | |||
| 273 | const ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(model->graph, output); | |||
| 274 | model->outputs[i] = ccv_nnc_tensor_symbol_new(model->graph, output_params, 0); | |||
| 275 | ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(model->graph, CMD_FORMAT_TRANSFORM_FORWARD()ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto , 0), &output, 1, model->outputs + i, 1, "contiguous"); | |||
| 276 | ccv_nnc_graph_exec_symbol_set_flags(model->graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT); | |||
| 277 | } | |||
| 278 | model->data = 0; | |||
| 279 | kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank); | |||
| 280 | if (model_sequence.sequences) | |||
| 281 | ccv_array_free(model_sequence.sequences); | |||
| 282 | // Check if there are parameters that are not trainables. If there are, we will allocate uint64 bitmap to record that. | |||
| 283 | int not_trainables = 0; | |||
| 284 | // Assert no parameter is alias. | |||
| 285 | for (i = 0; i < parameters->rnum; i++) | |||
| 286 | { | |||
| 287 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); | |||
| 288 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(parameter.graph, parameter); | |||
| 289 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 289, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. | |||
| 290 | if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t )(parameter_trainables)->rsize * (size_t)(i))) == 0) | |||
| 291 | not_trainables = 1; | |||
| 292 | } | |||
| 293 | assert(parameters->rnum == parameter_trainables->rnum)((void) sizeof ((parameters->rnum == parameter_trainables-> rnum) ? 1 : 0), __extension__ ({ if (parameters->rnum == parameter_trainables ->rnum) ; else __assert_fail ("parameters->rnum == parameter_trainables->rnum" , "ccv_cnnp_model.c", 293, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 294 | uint64_t* parameter_flags = 0; | |||
| 295 | if (not_trainables) | |||
| 296 | { | |||
| 297 | parameter_flags = (uint64_t*)cccalloccalloc(((parameters->rnum + 63) >> 6), sizeof(uint64_t)); | |||
| 298 | for (i = 0; i < parameter_trainables->rnum; i++) | |||
| 299 | if (*(int*)ccv_array_get(parameter_trainables, i)((void*)(((char*)((parameter_trainables)->data)) + (size_t )(parameter_trainables)->rsize * (size_t)(i)))) | |||
| 300 | parameter_flags[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 301 | } | |||
| 302 | ccv_array_free(parameter_trainables); | |||
| 303 | // Assert no internal is alias. | |||
| 304 | for (i = 0; i < internals->rnum; i++) | |||
| 305 | { | |||
| 306 | const ccv_nnc_tensor_symbol_t internal = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(internals, i)((void*)(((char*)((internals)->data)) + (size_t)(internals )->rsize * (size_t)(i))); | |||
| 307 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(internal.graph, internal); | |||
| 308 | assert(alias_to.graph == 0)((void) sizeof ((alias_to.graph == 0) ? 1 : 0), __extension__ ({ if (alias_to.graph == 0) ; else __assert_fail ("alias_to.graph == 0" , "ccv_cnnp_model.c", 308, __extension__ __PRETTY_FUNCTION__) ; })); // Cannot find the one alias to. | |||
| 309 | } | |||
| 310 | const int output_size = model->output_size; | |||
| 311 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 312 | const int parameters_rnum = parameters->rnum; | |||
| 313 | if (input_size > 0) | |||
| 314 | { | |||
| 315 | ccv_array_resize(parameters, parameters_rnum + input_size); | |||
| 316 | memcpy(ccv_array_get(parameters, parameters_rnum)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(parameters_rnum))), model->inputs, input_size * sizeof(ccv_nnc_tensor_symbol_t)); | |||
| 317 | } | |||
| 318 | ccv_nnc_symbolic_graph_simplify(model->graph, | |||
| 319 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | |||
| 320 | CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | |||
| 321 | CCV_NNC_SIMPLIFY_OPS_FUSION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) | |||
| 322 | CCV_NNC_SIMPLIFY_GRAPH_PRUNING)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION , CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION , CCV_NNC_SIMPLIFY_GRAPH_PRUNING}, (1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), | |||
| 323 | ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(0))), parameters_rnum + input_size, | |||
| 324 | model->outputs, output_size, | |||
| 325 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
| 326 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 327 | // Size it down. | |||
| 328 | parameters->rnum = parameters_rnum; | |||
| 329 | ccv_cnnp_compiled_data_t* compiled_data = model->compiled_data = cccalloccalloc(1, sizeof(ccv_cnnp_compiled_data_t) + sizeof(ccv_nnc_tensor_symbol_t) * (output_size * 2 - 1)); | |||
| 330 | _ccv_cnnp_compiled_data_init(compiled_data, output_size, build_data.gradient_checkpoints); | |||
| 331 | const int evaluate_to_size = compiled_data->evaluate.to_size = ccv_nnc_symbolic_graph_destination_size(model->graph); | |||
| 332 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 332, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 333 | compiled_data->evaluate.tos = ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); | |||
| 334 | memcpy(compiled_data->evaluate.tos, ccv_nnc_symbolic_graph_destinations(model->graph), sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size); | |||
| 335 | compiled_data->loss = loss; | |||
| 336 | if (loss.cmd == CCV_NNC_NOOP) | |||
| 337 | { | |||
| 338 | // If no loss function provided, there is no fits. | |||
| 339 | for (i = 0; i < output_size; i++) | |||
| 340 | { | |||
| 341 | compiled_data->fits[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 342 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(model->graph, model->outputs[i]); | |||
| 343 | if (alias_to.d < 0) | |||
| 344 | compiled_data->f[i] = model->outputs[i]; | |||
| 345 | else { // We cannot differentiate against an alias, therefore, we have to verify this output is full, and we can diff against the original. | |||
| 346 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 347 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 348 | ccv_nnc_tensor_symbol_alias_params(model->graph, model->outputs[i], ofs, inc); | |||
| 349 | int j; | |||
| 350 | for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++) | |||
| 351 | { assert(ofs[j] == 0)((void) sizeof ((ofs[j] == 0) ? 1 : 0), __extension__ ({ if ( ofs[j] == 0) ; else __assert_fail ("ofs[j] == 0", "ccv_cnnp_model.c" , 351, __extension__ __PRETTY_FUNCTION__); })); } // There is no ofs. | |||
| 352 | compiled_data->f[i] = alias_to; // Unfortunately, I cannot assert the size yet. | |||
| 353 | } | |||
| 354 | } | |||
| 355 | } else { | |||
| 356 | for (i = 0; i < output_size; i++) | |||
| 357 | { | |||
| 358 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(model->graph, model->outputs[i]); | |||
| 359 | const ccv_nnc_tensor_symbol_t fit = compiled_data->fits[i] = ccv_nnc_tensor_symbol_new(model->graph, info, 0); | |||
| 360 | compiled_data->f[i] = ccv_nnc_tensor_symbol_new(model->graph, ccv_nnc_tensor_auto, 0); | |||
| 361 | ccv_nnc_graph_exec_symbol_new(model->graph, loss, TENSOR_SYMBOL_LIST(model->outputs[i], fit)(const ccv_nnc_tensor_symbol_t []){model->outputs[i], fit} , (1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_SYMBOL_LIST(compiled_data->f[i])(const ccv_nnc_tensor_symbol_t []){compiled_data->f[i]}, ( 1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 362 | } | |||
| 363 | } | |||
| 364 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 365 | ccv_nnc_symbolic_graph_simplify(model->graph, | |||
| 366 | SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_OPS_FUSION}, (1 +1 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), // Only do Ops fusion, in this way, we can fuse the loss function. | |||
| 367 | 0, 0, // No need to provide binds at this point. | |||
| 368 | compiled_data->f, model->output_size, | |||
| 369 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
| 370 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 371 | // If inputs are from GPU, stream type is GPU. | |||
| 372 | compiled_data->parameters = parameters; | |||
| 373 | compiled_data->parameter_flags = parameter_flags; | |||
| 374 | compiled_data->internals = internals; | |||
| 375 | compiled_data->ids.parameters = parameter_ids; | |||
| 376 | compiled_data->ids.internals = internal_ids; | |||
| 377 | ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(compiled_data, model->graph); | |||
| 378 | } | |||
| 379 | ||||
| 380 | static void _ccv_cnnp_graph_push_graph_exec_symbol(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) | |||
| 381 | { | |||
| 382 | ccv_array_t* const stack = (ccv_array_t*)context; | |||
| 383 | ccv_array_push(stack, &symbol.d); | |||
| 384 | } | |||
| 385 | ||||
| 386 | static void _ccv_nnc_tensor_symbol_reinit(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) | |||
| 387 | { | |||
| 388 | const ccv_nnc_tensor_symbol_t src_symbol = { | |||
| 389 | .d = src_index, | |||
| 390 | .graph = src_graph | |||
| 391 | }; | |||
| 392 | const ccv_nnc_tensor_symbol_t dest_symbol = { | |||
| 393 | .d = dest_index, | |||
| 394 | .graph = dest_graph | |||
| 395 | }; | |||
| 396 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); | |||
| 397 | ccv_nnc_tensor_symbol_set(dest_graph, dest_symbol, params); | |||
| 398 | int ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 399 | int inc[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 400 | if (0 == ccv_nnc_tensor_symbol_alias_params(src_graph, src_symbol, ofs, inc)) | |||
| 401 | ccv_nnc_tensor_symbol_alias_set(dest_graph, dest_symbol, ofs, inc); | |||
| 402 | } | |||
| 403 | ||||
| 404 | static int _ccv_nnc_tensor_symbol_check_dim(const ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const int src_index, const int dest_index) | |||
| 405 | { | |||
| 406 | const ccv_nnc_tensor_symbol_t src_symbol = { | |||
| 407 | .d = src_index, | |||
| 408 | .graph = src_graph | |||
| 409 | }; | |||
| 410 | const ccv_nnc_tensor_param_t src_params = ccv_nnc_tensor_symbol_params(src_graph, src_symbol); | |||
| 411 | const ccv_nnc_tensor_symbol_t dest_symbol = { | |||
| 412 | .d = dest_index, | |||
| 413 | .graph = dest_graph | |||
| 414 | }; | |||
| 415 | const ccv_nnc_tensor_param_t dest_params = ccv_nnc_tensor_symbol_params(dest_graph, dest_symbol); | |||
| 416 | return memcmp(src_params.dim, dest_params.dim, sizeof(src_params.dim)) == 0; | |||
| 417 | } | |||
| 418 | ||||
| 419 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size); | |||
| 420 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data); | |||
| 421 | ||||
| 422 | typedef struct { | |||
| 423 | int parallel_count; | |||
| 424 | ccv_nnc_symbolic_graph_t* graph; | |||
| 425 | ccv_nnc_graph_exec_arena_t* graph_exec_arena; | |||
| 426 | } ccv_nnc_graph_exec_update_t; | |||
| 427 | ||||
| 428 | static void _ccv_cnnp_cmd_update_for_execs(void* const context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint) | |||
| 429 | { | |||
| 430 | ccv_nnc_graph_exec_update_t* const graph_exec_update = (ccv_nnc_graph_exec_update_t*)context; | |||
| 431 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = graph_exec_update->graph_exec_arena; | |||
| 432 | ccv_nnc_graph_exec_t graph_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, symbol); | |||
| 433 | ccv_nnc_graph_exec_set(graph_exec.graph, graph_exec, cmd); | |||
| 434 | ccv_nnc_graph_exec_set_hint(graph_exec.graph, graph_exec, hint); | |||
| 435 | const ccv_nnc_symbolic_graph_t* const graph = graph_exec_update->graph; | |||
| 436 | const int parallel_count = graph_exec_update->parallel_count; | |||
| 437 | int i; | |||
| 438 | for (i = 1; i < parallel_count; i++) | |||
| 439 | { | |||
| 440 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, ccv_nnc_graph_exec_symbol_copy(graph, symbol, i)); | |||
| 441 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) | |||
| 442 | { | |||
| 443 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); | |||
| 444 | ccv_nnc_graph_exec_set_hint(copy.graph, copy, hint); | |||
| 445 | } | |||
| 446 | } | |||
| 447 | } | |||
| 448 | ||||
| 449 | void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size) | |||
| 450 | { | |||
| 451 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 451, __extension__ __PRETTY_FUNCTION__); })); | |||
| 452 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 452, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 453 | assert(!init->graph)((void) sizeof ((!init->graph) ? 1 : 0), __extension__ ({ if (!init->graph) ; else __assert_fail ("!init->graph", "ccv_cnnp_model.c" , 453, __extension__ __PRETTY_FUNCTION__); })); | |||
| 454 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 455 | init->graph = ccv_nnc_symbolic_graph_new(); | |||
| 456 | ccv_array_t* const stack = ccv_array_new(sizeof(int), 0, 0); | |||
| 457 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, _ccv_cnnp_graph_push_graph_exec_symbol, stack, 0); | |||
| 458 | _ccv_cnnp_model_compile(init, inputs, input_size, compiled_data->loss); | |||
| 459 | init->parallel_count = model->parallel_count; | |||
| 460 | init->memory_compression = model->memory_compression; | |||
| 461 | init->memory_reduction = model->memory_reduction; | |||
| 462 | init->gradient_checkpointing = model->gradient_checkpointing; | |||
| 463 | init->compiled_data->stream_type = model->compiled_data->stream_type; | |||
| 464 | init->compiled_data->minimize.minimizer = model->compiled_data->minimize.minimizer; | |||
| 465 | init->compiled_data->minimize.max_saved_aux_size = model->compiled_data->minimize.max_saved_aux_size; | |||
| 466 | if (model->compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
| 467 | _ccv_cnnp_model_gradient_init(init, model->compiled_data->gradient_mode, model->compiled_data->disable_outgrad, 0, 0); | |||
| 468 | ccv_nnc_graph_exec_symbol_new_hook(init->graph, 0, 0, 0); | |||
| 469 | ccv_nnc_symbolic_graph_tensor_auto(init->graph, TRAVERSE_FULL0,0,0,0); | |||
| 470 | int i, j; | |||
| 471 | // Verify parameters, internals and saved_aux in both graph has the same dimensionality. | |||
| 472 | for (i = 0; i < compiled_data->parameters->rnum; i++) | |||
| 473 | { | |||
| 474 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | |||
| 475 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 475, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 476 | } | |||
| 477 | for (i = 0; i < compiled_data->internals->rnum; i++) | |||
| 478 | { | |||
| 479 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; | |||
| 480 | assert(_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d))((void) sizeof ((_ccv_nnc_tensor_symbol_check_dim(model->graph , init->graph, d, d)) ? 1 : 0), __extension__ ({ if (_ccv_nnc_tensor_symbol_check_dim (model->graph, init->graph, d, d)) ; else __assert_fail ("_ccv_nnc_tensor_symbol_check_dim(model->graph, init->graph, d, d)" , "ccv_cnnp_model.c", 480, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 481 | } | |||
| 482 | // Update inputs. | |||
| 483 | assert(model->input_size == init->input_size)((void) sizeof ((model->input_size == init->input_size) ? 1 : 0), __extension__ ({ if (model->input_size == init-> input_size) ; else __assert_fail ("model->input_size == init->input_size" , "ccv_cnnp_model.c", 483, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 484 | for (i = 0; i < model->input_size; i++) | |||
| 485 | if (model->inputs[i].d >= 0) | |||
| 486 | { | |||
| 487 | assert(init->inputs[i].d >= 0)((void) sizeof ((init->inputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->inputs[i].d >= 0) ; else __assert_fail ("init->inputs[i].d >= 0" , "ccv_cnnp_model.c", 487, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 488 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->inputs[i].d, model->inputs[i].d); | |||
| 489 | } | |||
| 490 | // Update outputs. | |||
| 491 | assert(model->output_size == init->output_size)((void) sizeof ((model->output_size == init->output_size ) ? 1 : 0), __extension__ ({ if (model->output_size == init ->output_size) ; else __assert_fail ("model->output_size == init->output_size" , "ccv_cnnp_model.c", 491, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 492 | for (i = 0; i < model->output_size; i++) | |||
| 493 | { | |||
| 494 | if (model->outputs[i].d >= 0) | |||
| 495 | { | |||
| 496 | assert(init->outputs[i].d >= 0)((void) sizeof ((init->outputs[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->outputs[i].d >= 0) ; else __assert_fail ( "init->outputs[i].d >= 0", "ccv_cnnp_model.c", 496, __extension__ __PRETTY_FUNCTION__); })); | |||
| 497 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->outputs[i].d, model->outputs[i].d); | |||
| 498 | } | |||
| 499 | if (model->outputs[i].d != model->compiled_data->f[i].d) | |||
| 500 | { | |||
| 501 | assert(init->outputs[i].d != init->compiled_data->f[i].d)((void) sizeof ((init->outputs[i].d != init->compiled_data ->f[i].d) ? 1 : 0), __extension__ ({ if (init->outputs[ i].d != init->compiled_data->f[i].d) ; else __assert_fail ("init->outputs[i].d != init->compiled_data->f[i].d" , "ccv_cnnp_model.c", 501, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 502 | if (model->compiled_data->f[i].d >= 0) | |||
| 503 | { | |||
| 504 | assert(init->compiled_data->f[i].d >= 0)((void) sizeof ((init->compiled_data->f[i].d >= 0) ? 1 : 0), __extension__ ({ if (init->compiled_data->f[i] .d >= 0) ; else __assert_fail ("init->compiled_data->f[i].d >= 0" , "ccv_cnnp_model.c", 504, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 505 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, init->compiled_data->f[i].d, model->compiled_data->f[i].d); | |||
| 506 | } | |||
| 507 | } | |||
| 508 | } | |||
| 509 | // Go through the graph to set tensor on matching symbols | |||
| 510 | for (i = 0; i < stack->rnum; i++) | |||
| 511 | { | |||
| 512 | const int d = *(int*)ccv_array_get(stack, i)((void*)(((char*)((stack)->data)) + (size_t)(stack)->rsize * (size_t)(i))); | |||
| 513 | // If exceed range, skip. | |||
| 514 | if (d >= ccv_nnc_graph_exec_symbol_count(init->graph) || | |||
| 515 | d >= ccv_nnc_graph_exec_symbol_count(model->graph)) | |||
| 516 | continue; | |||
| 517 | const ccv_nnc_graph_exec_symbol_t src_symbol = { | |||
| 518 | .d = d, | |||
| 519 | .graph = init->graph | |||
| 520 | }; | |||
| 521 | const ccv_nnc_graph_exec_symbol_t dest_symbol = { | |||
| 522 | .d = d, | |||
| 523 | .graph = model->graph | |||
| 524 | }; | |||
| 525 | const ccv_nnc_cmd_t src_cmd = ccv_nnc_graph_exec_symbol_cmd(init->graph, src_symbol); | |||
| 526 | const ccv_nnc_cmd_t dest_cmd = ccv_nnc_graph_exec_symbol_cmd(model->graph, dest_symbol); | |||
| 527 | // If the name doesn't match, skip. | |||
| 528 | if (dest_cmd.cmd != src_cmd.cmd && src_cmd.cmd != CCV_NNC_NOOP) | |||
| 529 | continue; | |||
| 530 | // Now get all the inputs and outputs, if matches, set them. | |||
| 531 | const int* src_inputs; | |||
| 532 | int src_input_size; | |||
| 533 | const int* src_outputs; | |||
| 534 | int src_output_size; | |||
| 535 | ccv_nnc_graph_exec_symbol_io(init->graph, src_symbol, &src_inputs, &src_input_size, &src_outputs, &src_output_size); | |||
| 536 | const int* dest_inputs; | |||
| 537 | int dest_input_size; | |||
| 538 | const int* dest_outputs; | |||
| 539 | int dest_output_size; | |||
| 540 | ccv_nnc_graph_exec_symbol_io(model->graph, dest_symbol, &dest_inputs, &dest_input_size, &dest_outputs, &dest_output_size); | |||
| 541 | // We may have unmatched input / output size because this is the minimizer and it has | |||
| 542 | // different saved_aux (for example, when we shrunk with CMD_NOOP). | |||
| 543 | if (src_input_size != dest_input_size) | |||
| 544 | continue; | |||
| 545 | if (src_output_size != dest_output_size) | |||
| 546 | continue; | |||
| 547 | ccv_nnc_graph_exec_symbol_set(model->graph, dest_symbol, src_cmd); | |||
| 548 | // There may be mismatches of the source tensor symbols and destination tensor symbols. The reason is because | |||
| 549 | // we may later passed-in the minimizer, therefore, we may allocate tensors for minimizer later in the original | |||
| 550 | // graph whereas in the newly created graph, it is streamlined (the minimizer exists from the beginning). That | |||
| 551 | // will make the order of tensor symbols creation different, therefore, exact which tensor is which wrong as | |||
| 552 | // well. However, set a new minimizer won't change the exec symbol ordering, because we never create new exec | |||
| 553 | // symbols after gradient init step. Changing a new minimizer just updated that exec symbols setting, it is not | |||
| 554 | // a new exec symbol. | |||
| 555 | for (j = 0; j < src_input_size; j++) | |||
| 556 | if (src_inputs[j] >= 0) | |||
| 557 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_inputs[j], dest_inputs[j]); | |||
| 558 | for (j = 0; j < src_output_size; j++) | |||
| 559 | if (src_outputs[j] >= 0) | |||
| 560 | _ccv_nnc_tensor_symbol_reinit(init->graph, model->graph, src_outputs[j], dest_outputs[j]); | |||
| 561 | } | |||
| 562 | ccv_array_free(stack); | |||
| 563 | // After this, we get all tensors in the model graph resolved through tensor_auto. | |||
| 564 | ccv_nnc_symbolic_graph_tensor_auto(model->graph, TRAVERSE_FULL0,0,0,0); | |||
| 565 | // Verify symbols we get matches. | |||
| 566 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 567 | for (i = 0; i < parameter_size; i++) | |||
| 568 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->parameters)->data)) + (size_t)(compiled_data ->parameters)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if ( ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((compiled_data-> parameters)->data)) + (size_t)(compiled_data->parameters )->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->parameters)-> data)) + (size_t)(init->compiled_data->parameters)-> rsize * (size_t)(i))))->d) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->parameters, i))->d" , "ccv_cnnp_model.c", 568, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
| 569 | const int internal_size = compiled_data->internals->rnum; | |||
| 570 | for (i = 0; i < internal_size; i++) | |||
| 571 | { assert(((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d)((void) sizeof ((((ccv_nnc_tensor_symbol_t*)((void*)(((char*) ((compiled_data->internals)->data)) + (size_t)(compiled_data ->internals)->rsize * (size_t)(i))))->d == ((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((init->compiled_data->internals)-> data)) + (size_t)(init->compiled_data->internals)->rsize * (size_t)(i))))->d) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_symbol_t *)((void*)(((char*)((compiled_data->internals)->data)) + (size_t)(compiled_data->internals)->rsize * (size_t)(i ))))->d == ((ccv_nnc_tensor_symbol_t*)((void*)(((char*)((init ->compiled_data->internals)->data)) + (size_t)(init-> compiled_data->internals)->rsize * (size_t)(i))))->d ) ; else __assert_fail ("((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d == ((ccv_nnc_tensor_symbol_t*)ccv_array_get(init->compiled_data->internals, i))->d" , "ccv_cnnp_model.c", 571, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
| 572 | // Go through compiled data. | |||
| 573 | if (compiled_data->tensor_arena) | |||
| 574 | { | |||
| 575 | const int flag = ccv_nnc_tensor_arena_reinit(compiled_data->tensor_arena, model->graph); | |||
| 576 | if (flag == 0 && compiled_data->graph_exec_arena) | |||
| 577 | { | |||
| 578 | ccv_nnc_graph_exec_reinit(compiled_data->graph_exec_arena, compiled_data->graph, model->graph); | |||
| 579 | // Since we will reinit, if we previously set is_test, we need to set it again. | |||
| 580 | if (compiled_data->is_test) | |||
| 581 | { | |||
| 582 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 583 | ccv_nnc_graph_exec_update_t update = { | |||
| 584 | .parallel_count = parallel_count, | |||
| 585 | .graph = model->graph, | |||
| 586 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
| 587 | }; | |||
| 588 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); | |||
| 589 | } | |||
| 590 | } else | |||
| 591 | // Free-up tensor arena & graph exec arena. | |||
| 592 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
| 593 | } | |||
| 594 | // There are other compiled graphs, for accum and apply gradients. | |||
| 595 | // However, the main conclusion is, these absorb operations shouldn't impact parameters. | |||
| 596 | // Thus, it won't impact the shape of gradients (only outgrad). Since for outgrad, we | |||
| 597 | // don't allocate ourselves, it is not a concern. For normal gradients, the shape cannot | |||
| 598 | // be changed otherwise parameters' shape will be meaningless. The same goes to internals. | |||
| 599 | // That is why we don't update these compiled graphs at all this point. | |||
| 600 | // Free the model, we've already "absorbed" it. | |||
| 601 | ccv_cnnp_model_free(init); | |||
| 602 | } | |||
| 603 | ||||
| 604 | void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss) | |||
| 605 | { | |||
| 606 | assert(input_size == model->input_size || model->input_size == 0)((void) sizeof ((input_size == model->input_size || model-> input_size == 0) ? 1 : 0), __extension__ ({ if (input_size == model->input_size || model->input_size == 0) ; else __assert_fail ("input_size == model->input_size || model->input_size == 0" , "ccv_cnnp_model.c", 606, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 607 | if (model->input_size == 0) | |||
| 608 | model->input_size = input_size; | |||
| 609 | if (!model->graph) // The graph is not compiled yet. | |||
| 610 | { | |||
| 611 | model->graph = ccv_nnc_symbolic_graph_new(); | |||
| 612 | _ccv_cnnp_model_compile(model, inputs, input_size, loss); | |||
| 613 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 613, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 614 | int i, flag = 0; | |||
| 615 | for (i = 0; !flag && i < input_size; i++) | |||
| 616 | flag = (CCV_TENSOR_GET_MEMORY(inputs[i].type)((inputs[i].type) & 0x3) == CCV_TENSOR_GPU_MEMORY); | |||
| 617 | // If inputs are from GPU, stream type is GPU. | |||
| 618 | model->compiled_data->stream_type = flag ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
| 619 | model->compiled_data->minimize.minimizer = minimizer; | |||
| 620 | model->compiled_data->minimize.max_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer); | |||
| 621 | } else { | |||
| 622 | // Now, finally fill in this part. If the graph is already compiled, we make a copy of the model. | |||
| 623 | // And then absorb the "new model" to the old one. | |||
| 624 | ccv_cnnp_model_t* const init = ccv_cnnp_model_copy(model, model->is_trainable); | |||
| 625 | ccv_cnnp_model_absorb(model, init, inputs, input_size); | |||
| 626 | // Reset minimizer. | |||
| 627 | ccv_cnnp_model_set_minimizer(model, minimizer, 1, 0, 0); | |||
| 628 | } | |||
| 629 | } | |||
| 630 | ||||
| 631 | ccv_cnnp_model_t* ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable) | |||
| 632 | { | |||
| 633 | ccv_cnnp_model_t* const new_model = _ccv_cnnp_model_copy(model, 0); | |||
| 634 | new_model->is_trainable = is_trainable; | |||
| 635 | return new_model; | |||
| 636 | } | |||
| 637 | ||||
| 638 | void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size) | |||
| 639 | { | |||
| 640 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 640, __extension__ __PRETTY_FUNCTION__); })); | |||
| 641 | assert(output_size == model->output_size)((void) sizeof ((output_size == model->output_size) ? 1 : 0 ), __extension__ ({ if (output_size == model->output_size) ; else __assert_fail ("output_size == model->output_size" , "ccv_cnnp_model.c", 641, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 642 | ccv_nnc_symbolic_graph_t* const graph = model->graph; | |||
| 643 | ccv_nnc_symbolic_graph_tensor_auto(graph, TRAVERSE_FULL0,0,0,0); | |||
| 644 | int i; | |||
| 645 | for (i = 0; i < output_size; i++) | |||
| 646 | { | |||
| 647 | assert(model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL)((void) sizeof ((model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ? 1 : 0), __extension__ ({ if (model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL ) ; else __assert_fail ("model->outputs[i].d != CCV_NNC_NO_TENSOR_SYMBOL" , "ccv_cnnp_model.c", 647, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 648 | outputs[i] = ccv_nnc_tensor_symbol_params(graph, model->outputs[i]); | |||
| 649 | } | |||
| 650 | } | |||
| 651 | ||||
| 652 | void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size) | |||
| 653 | { | |||
| 654 | if (workspace_size == model->workspace_size) | |||
| 655 | return; | |||
| 656 | model->workspace_size = workspace_size; | |||
| 657 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 658 | if (compiled_data && compiled_data->graph) | |||
| 659 | ccv_nnc_graph_autotune(compiled_data->graph, workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
| 660 | } | |||
| 661 | ||||
| 662 | size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model) | |||
| 663 | { | |||
| 664 | return model->workspace_size; | |||
| 665 | } | |||
| 666 | ||||
| 667 | void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel) | |||
| 668 | { | |||
| 669 | if (parallel == 0) | |||
| 670 | model->parallel_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); | |||
| 671 | else | |||
| 672 | model->parallel_count = parallel; | |||
| 673 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 674 | if (compiled_data) | |||
| 675 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 675, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
| 676 | } | |||
| 677 | ||||
| 678 | void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count) | |||
| 679 | { | |||
| 680 | model->max_stream_count = max_stream_count; | |||
| 681 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 682 | if (compiled_data) | |||
| 683 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 683, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
| 684 | } | |||
| 685 | ||||
| 686 | void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression) | |||
| 687 | { | |||
| 688 | model->memory_compression = memory_compression; | |||
| 689 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 690 | if (compiled_data) | |||
| 691 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 691, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
| 692 | } | |||
| 693 | ||||
| 694 | void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction) | |||
| 695 | { | |||
| 696 | model->memory_reduction = memory_reduction; | |||
| 697 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 698 | if (compiled_data) | |||
| 699 | { assert(!compiled_data->graph)((void) sizeof ((!compiled_data->graph) ? 1 : 0), __extension__ ({ if (!compiled_data->graph) ; else __assert_fail ("!compiled_data->graph" , "ccv_cnnp_model.c", 699, __extension__ __PRETTY_FUNCTION__) ; })); } | |||
| 700 | } | |||
| 701 | ||||
| 702 | void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing) | |||
| 703 | { | |||
| 704 | model->gradient_checkpointing = gradient_checkpointing; | |||
| 705 | } | |||
| 706 | ||||
| 707 | int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model) | |||
| 708 | { | |||
| 709 | return model->gradient_checkpointing; | |||
| 710 | } | |||
| 711 | ||||
| 712 | typedef struct { | |||
| 713 | int parallel_count; | |||
| 714 | ccv_nnc_symbolic_graph_t* graph; | |||
| 715 | ccv_cnnp_compiled_data_t* compiled_data; | |||
| 716 | ccv_nnc_tensor_arena_t* tensor_arena; | |||
| 717 | } ccv_nnc_tensor_init_states_t; | |||
| 718 | ||||
| 719 | static int _ccv_cnnp_any_to_init(const ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 720 | { | |||
| 721 | int i; | |||
| 722 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 723 | for (i = 0; i < compiled_data->parameters->rnum; i++) | |||
| 724 | { | |||
| 725 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | |||
| 726 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) | |||
| 727 | return 1; | |||
| 728 | } | |||
| 729 | for (i = 0; i < compiled_data->internals->rnum; i++) | |||
| 730 | { | |||
| 731 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ))->d; | |||
| 732 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) | |||
| 733 | return 1; | |||
| 734 | } | |||
| 735 | return 0; | |||
| 736 | } | |||
| 737 | ||||
| 738 | static void _ccv_cnnp_init_states_for_tensors(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol) | |||
| 739 | { | |||
| 740 | ccv_nnc_tensor_init_states_t* const tensor_init_states = (ccv_nnc_tensor_init_states_t*)context; | |||
| 741 | ccv_nnc_tensor_arena_t* const tensor_arena = tensor_init_states->tensor_arena; | |||
| 742 | ccv_nnc_tensor_t* const output_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, output_symbol); | |||
| 743 | if (!output_tensor) | |||
| 744 | return; | |||
| 745 | const int d = output_symbol.d; | |||
| 746 | assert(d < tensor_init_states->compiled_data->tensors_init.size)((void) sizeof ((d < tensor_init_states->compiled_data-> tensors_init.size) ? 1 : 0), __extension__ ({ if (d < tensor_init_states ->compiled_data->tensors_init.size) ; else __assert_fail ("d < tensor_init_states->compiled_data->tensors_init.size" , "ccv_cnnp_model.c", 746, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 747 | uint32_t* const init_v = CCV_NNC_INIT_V(tensor_init_states->compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(tensor_init_states->compiled_data ->tensors_init.v) & ~(uintptr_t)1)); | |||
| 748 | if (init_v[d >> 5] & (1u << (d & 0x1f))) | |||
| 749 | return; | |||
| 750 | init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
| 751 | ccv_nnc_cmd_exec(cmd, hint, flags, &input, input ? 1 : 0, &output_tensor, 1, 0); | |||
| 752 | const ccv_nnc_symbolic_graph_t* const graph = tensor_init_states->graph; | |||
| 753 | const int parallel_count = tensor_init_states->parallel_count; | |||
| 754 | int i; | |||
| 755 | for (i = 1; i < parallel_count; i++) | |||
| 756 | { | |||
| 757 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(graph, output_symbol, i)); | |||
| 758 | if (copy) | |||
| 759 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &output_tensor, 1, ©, 1, 0); | |||
| 760 | } | |||
| 761 | } | |||
| 762 | ||||
| 763 | // This method can only handle cases we added new tensors and exec, never delete. This invariant is true because | |||
| 764 | // we setup everything (including calling simplify method) in ccv_cnnp_model_compile method, before this rewind setup. | |||
| 765 | static void _ccv_cnnp_model_rewind_graph(ccv_cnnp_model_t* const model) | |||
| 766 | { | |||
| 767 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 767, __extension__ __PRETTY_FUNCTION__); })); | |||
| 768 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 768, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 769 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 770 | assert(compiled_data->rewindables)((void) sizeof ((compiled_data->rewindables) ? 1 : 0), __extension__ ({ if (compiled_data->rewindables) ; else __assert_fail ( "compiled_data->rewindables", "ccv_cnnp_model.c", 770, __extension__ __PRETTY_FUNCTION__); })); | |||
| 771 | int i; | |||
| 772 | for (i = 0; i < compiled_data->rewindables->rnum; i++) | |||
| 773 | { | |||
| 774 | const ccv_cnnp_rewind_symbol_t* const rewind_symbol = (ccv_cnnp_rewind_symbol_t*)ccv_array_get(compiled_data->rewindables, i)((void*)(((char*)((compiled_data->rewindables)->data)) + (size_t)(compiled_data->rewindables)->rsize * (size_t) (i))); | |||
| 775 | if (rewind_symbol->type == CCV_CNNP_REWIND_GRAPH_EXEC) | |||
| 776 | ccv_nnc_graph_exec_symbol_free(model->graph, rewind_symbol->graph_exec); | |||
| 777 | else if (rewind_symbol->type == CCV_CNNP_REWIND_TENSOR) | |||
| 778 | ccv_nnc_tensor_symbol_free(model->graph, rewind_symbol->tensor); | |||
| 779 | } | |||
| 780 | ccv_array_clear(compiled_data->rewindables); | |||
| 781 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 782 | } | |||
| 783 | ||||
| 784 | static void _ccv_cnnp_model_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name) | |||
| 785 | { | |||
| 786 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | |||
| 787 | .type = CCV_CNNP_REWIND_TENSOR, | |||
| 788 | .tensor = symbol | |||
| 789 | }; | |||
| 790 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | |||
| 791 | ccv_array_push(rewind_symbols, &rewind_symbol); | |||
| 792 | } | |||
| 793 | ||||
| 794 | static void _ccv_cnnp_model_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name) | |||
| 795 | { | |||
| 796 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | |||
| 797 | .type = CCV_CNNP_REWIND_TENSOR, | |||
| 798 | .tensor = symbol | |||
| 799 | }; | |||
| 800 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | |||
| 801 | ccv_array_push(rewind_symbols, &rewind_symbol); | |||
| 802 | } | |||
| 803 | ||||
| 804 | static void _ccv_cnnp_model_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name) | |||
| 805 | { | |||
| 806 | const ccv_cnnp_rewind_symbol_t rewind_symbol = { | |||
| 807 | .type = CCV_CNNP_REWIND_GRAPH_EXEC, | |||
| 808 | .graph_exec = symbol | |||
| 809 | }; | |||
| 810 | ccv_array_t* const rewind_symbols = (ccv_array_t*)context; | |||
| 811 | ccv_array_push(rewind_symbols, &rewind_symbol); | |||
| 812 | } | |||
| 813 | ||||
| 814 | static void _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd, ccv_nnc_symbolic_graph_t* const symbolic_graph) | |||
| 815 | { | |||
| 816 | ccv_nnc_graph_exec_t const update_exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, exec_symbol); | |||
| 817 | if (!CCV_NO_GRAPH_EXEC(update_exec)((update_exec).graph == 0)) | |||
| 818 | ccv_nnc_graph_exec_set(update_exec.graph, update_exec, cmd); | |||
| 819 | int i; | |||
| 820 | for (i = 1; i < parallel_count; i++) | |||
| 821 | { | |||
| 822 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); | |||
| 823 | const ccv_nnc_graph_exec_t copy = ccv_nnc_graph_exec_from_symbol(graph_exec_arena, copy_symbol); | |||
| 824 | if (!CCV_NO_GRAPH_EXEC(copy)((copy).graph == 0)) | |||
| 825 | ccv_nnc_graph_exec_set(copy.graph, copy, cmd); | |||
| 826 | } | |||
| 827 | } | |||
| 828 | ||||
| 829 | static void _ccv_cnnp_model_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const symbolic_graph, ccv_cnnp_compiled_data_t* const compiled_data, const int parallel_count, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_cmd_t cmd) | |||
| 830 | { | |||
| 831 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 831, __extension__ __PRETTY_FUNCTION__); })); | |||
| 832 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 832, __extension__ __PRETTY_FUNCTION__); })); | |||
| 833 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, exec_symbol, cmd); | |||
| 834 | int i; | |||
| 835 | for (i = 1; i < parallel_count; i++) | |||
| 836 | { | |||
| 837 | ccv_nnc_graph_exec_symbol_t copy_symbol = ccv_nnc_graph_exec_symbol_copy(symbolic_graph, exec_symbol, i); | |||
| 838 | if (copy_symbol.graph) | |||
| 839 | ccv_nnc_graph_exec_symbol_set(symbolic_graph, copy_symbol, cmd); | |||
| 840 | } | |||
| 841 | ccv_nnc_graph_exec_arena_t* const graph_exec_arena = compiled_data->graph_exec_arena; | |||
| 842 | if (graph_exec_arena) | |||
| 843 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); | |||
| 844 | // Skip backward graph exec arena because it is for a specific accum symbolic graph, not the main graph (model->graph) | |||
| 845 | ccv_nnc_graph_exec_arena_t* const gradient_graph_exec_arena = compiled_data->apply_gradients.graph_exec_arena; | |||
| 846 | if (gradient_graph_exec_arena) | |||
| 847 | _ccv_cnnp_model_graph_symbol_exec_set_for_graph_exec_arena(gradient_graph_exec_arena, parallel_count, exec_symbol, cmd, symbolic_graph); | |||
| 848 | } | |||
| 849 | ||||
| 850 | static int _ccv_cnnp_set_minimizer_for_parameter(ccv_nnc_symbolic_graph_t* const graph, ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_graph_exec_symbol_t* const update_nodes, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parallel_count, const ccv_nnc_cmd_t minimizer, const int saved_aux_size, const int max_saved_aux_size, const int parameter_indice) | |||
| 851 | { | |||
| 852 | int this_parameter_flag = 0; | |||
| 853 | if (update_nodes[parameter_indice].d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 854 | return this_parameter_flag; | |||
| 855 | const ccv_nnc_cmd_t old_minimizer = ccv_nnc_graph_exec_symbol_cmd(graph, update_nodes[parameter_indice]); | |||
| 856 | int j, k; | |||
| 857 | // For no-op, we can preserve previous saved_aux_size. | |||
| 858 | if (old_minimizer.cmd != minimizer.cmd && minimizer.cmd != CCV_NNC_NOOP) | |||
| 859 | { | |||
| 860 | // If the old minimizer is a noop, then the old_saved_aux_size should be whatever its previous | |||
| 861 | // saved_aux_size is, otherwise we will reinit the saved_aux repeatedly if you switch between | |||
| 862 | // noop and a minimizer. We don't want that because we do that in high-level frameworks to | |||
| 863 | // make sure some model parameters don't update if we don't want them to. | |||
| 864 | int old_saved_aux_size; | |||
| 865 | if (old_minimizer.cmd == CCV_NNC_NOOP) | |||
| 866 | { | |||
| 867 | int input_size; | |||
| 868 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], 0, &input_size, 0, 0); | |||
| 869 | if (input_size < 2) // This is not legit. | |||
| 870 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); | |||
| 871 | else // See ccv_nnc_minimizer_saved_aux_size, the saved_aux is inputs excluding gradients and parameters. | |||
| 872 | old_saved_aux_size = input_size - 2; | |||
| 873 | } else | |||
| 874 | old_saved_aux_size = ccv_nnc_minimizer_saved_aux_size(old_minimizer); | |||
| 875 | if (old_saved_aux_size != saved_aux_size) | |||
| 876 | { | |||
| 877 | this_parameter_flag = 1; | |||
| 878 | if (saved_aux_size > old_saved_aux_size) | |||
| 879 | { | |||
| 880 | // Allocate new tensor symbols. | |||
| 881 | const ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(graph, updated_parameters[parameter_indice]); | |||
| 882 | for (j = old_saved_aux_size; j < saved_aux_size; j++) | |||
| 883 | { | |||
| 884 | saved_aux[parameter_indice * max_saved_aux_size + j].source = ccv_nnc_tensor_symbol_new(graph, info, 0); | |||
| 885 | saved_aux[parameter_indice * max_saved_aux_size + j].destination = ccv_nnc_tensor_symbol_new(graph, info, 0); | |||
| 886 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
| 887 | for (k = 1; k < parallel_count; k++) | |||
| 888 | { | |||
| 889 | ccv_nnc_tensor_param_t dev_info = info; | |||
| 890 | if (k != device_id) | |||
| 891 | CCV_TENSOR_SET_DEVICE_ID(dev_info.type, k)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((k) & 0xfff) << 8)); | |||
| 892 | else | |||
| 893 | CCV_TENSOR_SET_DEVICE_ID(dev_info.type, 0)(dev_info.type) = (((dev_info.type) & ~0xfff00) | (((0) & 0xfff) << 8)); | |||
| 894 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); | |||
| 895 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_new(graph, dev_info, 0); | |||
| 896 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, src_copy); | |||
| 897 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, dest_copy); | |||
| 898 | } | |||
| 899 | } | |||
| 900 | } else { | |||
| 901 | for (j = saved_aux_size; j < old_saved_aux_size; j++) | |||
| 902 | { | |||
| 903 | for (k = 1; k < parallel_count; k++) | |||
| 904 | { | |||
| 905 | const ccv_nnc_tensor_symbol_t src_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); | |||
| 906 | if (src_copy.d >= 0) | |||
| 907 | { | |||
| 908 | ccv_nnc_tensor_symbol_free(graph, src_copy); | |||
| 909 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
| 910 | } | |||
| 911 | const ccv_nnc_tensor_symbol_t dest_copy = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); | |||
| 912 | if (dest_copy.d >= 0) | |||
| 913 | { | |||
| 914 | ccv_nnc_tensor_symbol_free(graph, dest_copy); | |||
| 915 | ccv_nnc_tensor_symbol_set_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k, NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
| 916 | } | |||
| 917 | } | |||
| 918 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source); | |||
| 919 | ccv_nnc_tensor_symbol_free(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination); | |||
| 920 | saved_aux[parameter_indice * max_saved_aux_size + j].source = saved_aux[parameter_indice * max_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 921 | } | |||
| 922 | } | |||
| 923 | } | |||
| 924 | } | |||
| 925 | _ccv_cnnp_model_graph_exec_symbol_set(graph, compiled_data, parallel_count, update_nodes[parameter_indice], minimizer); | |||
| 926 | if (this_parameter_flag) | |||
| 927 | { | |||
| 928 | ccv_nnc_tensor_symbol_t update_inputs[saved_aux_size + 2]; | |||
| 929 | ccv_nnc_tensor_symbol_t update_outputs[saved_aux_size + 1]; | |||
| 930 | const int* inputs = 0; | |||
| 931 | int input_size = 0; | |||
| 932 | ccv_nnc_graph_exec_symbol_io(graph, update_nodes[parameter_indice], &inputs, &input_size, 0, 0); | |||
| 933 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 933, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 934 | update_inputs[0].d = inputs[0]; | |||
| 935 | update_inputs[0].graph = graph; | |||
| 936 | update_inputs[1].d = inputs[1]; | |||
| 937 | update_inputs[1].graph = graph; | |||
| 938 | update_outputs[0] = updated_parameters[parameter_indice]; | |||
| 939 | for (j = 0; j < saved_aux_size; j++) | |||
| 940 | { | |||
| 941 | update_inputs[j + 2] = saved_aux[parameter_indice * max_saved_aux_size + j].source; | |||
| 942 | update_outputs[j + 1] = saved_aux[parameter_indice * max_saved_aux_size + j].destination; | |||
| 943 | } | |||
| 944 | ccv_nnc_graph_exec_symbol_set_io(graph, update_nodes[parameter_indice], update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); | |||
| 945 | for (k = 1; k < parallel_count; k++) | |||
| 946 | { | |||
| 947 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(graph, update_nodes[parameter_indice], k); | |||
| 948 | assert(copy.d >= 0)((void) sizeof ((copy.d >= 0) ? 1 : 0), __extension__ ({ if (copy.d >= 0) ; else __assert_fail ("copy.d >= 0", "ccv_cnnp_model.c" , 948, __extension__ __PRETTY_FUNCTION__); })); | |||
| 949 | ccv_nnc_graph_exec_symbol_io(graph, copy, &inputs, &input_size, 0, 0); | |||
| 950 | assert(input_size >= 1)((void) sizeof ((input_size >= 1) ? 1 : 0), __extension__ ( { if (input_size >= 1) ; else __assert_fail ("input_size >= 1" , "ccv_cnnp_model.c", 950, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 951 | update_inputs[0].d = inputs[0]; | |||
| 952 | update_inputs[0].graph = graph; | |||
| 953 | update_inputs[1].d = inputs[1]; | |||
| 954 | update_inputs[1].graph = graph; | |||
| 955 | update_outputs[0] = ccv_nnc_tensor_symbol_copy(graph, updated_parameters[parameter_indice], k); | |||
| 956 | for (j = 0; j < saved_aux_size; j++) | |||
| 957 | { | |||
| 958 | update_inputs[j + 2] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].source, k); | |||
| 959 | update_outputs[j + 1] = ccv_nnc_tensor_symbol_copy(graph, saved_aux[parameter_indice * max_saved_aux_size + j].destination, k); | |||
| 960 | } | |||
| 961 | ccv_nnc_graph_exec_symbol_set_io(graph, copy, update_inputs, saved_aux_size + 2, update_outputs, saved_aux_size + 1); | |||
| 962 | } | |||
| 963 | } | |||
| 964 | return this_parameter_flag; | |||
| 965 | } | |||
| 966 | ||||
| 967 | typedef struct { | |||
| 968 | int parameter_size; | |||
| 969 | ccv_nnc_cmd_t minimizer; | |||
| 970 | ccv_cnnp_model_io_t parameters[1]; | |||
| 971 | } ccv_cnnp_set_minimizer_for_parameter_t; | |||
| 972 | ||||
| 973 | static int _ccv_cnnp_apply_parameters_with_minimizer(ccv_cnnp_model_t* const model) | |||
| 974 | { | |||
| 975 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 976 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 976, __extension__ __PRETTY_FUNCTION__); })); | |||
| 977 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
| 978 | // We update all parameters, at this point, we have one minimizer. | |||
| 979 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 980 | ccv_nnc_graph_exec_symbol_t* const update_nodes = compiled_data->update_nodes; | |||
| 981 | ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph; | |||
| 982 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 982, __extension__ __PRETTY_FUNCTION__); })); | |||
| 983 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 984 | ccv_array_t* const parameters = compiled_data->minimize.parameters; | |||
| 985 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 986 | int i, j, flag = 0; | |||
| 987 | for (i = 0; i < parameters->rnum; i++) | |||
| 988 | { | |||
| 989 | ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = *(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(parameters, i)((void*)(((char*)((parameters)->data)) + (size_t)(parameters )->rsize * (size_t)(i))); | |||
| 990 | for (j = 0; j < set_minimizer_for_parameter->parameter_size; j++) | |||
| 991 | { | |||
| 992 | const int param_sel = set_minimizer_for_parameter->parameters[j]->param_sel > 0 ? set_minimizer_for_parameter->parameters[j]->param_sel - 1 : set_minimizer_for_parameter->parameters[j]->param_sel; | |||
| 993 | assert(set_minimizer_for_parameter->parameters[j]->param_sel != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_sel != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_sel != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_sel != 0" , "ccv_cnnp_model.c", 993, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 994 | const int old_rnum = parameter_indices->rnum; | |||
| 995 | ccv_cnnp_model_add_to_parameter_indices(set_minimizer_for_parameter->parameters[j]->model, param_sel, parameter_indices); | |||
| 996 | const int param_ref = set_minimizer_for_parameter->parameters[j]->param_ref > 0 ? set_minimizer_for_parameter->parameters[j]->param_ref - 1 : set_minimizer_for_parameter->parameters[j]->param_ref; | |||
| 997 | assert(set_minimizer_for_parameter->parameters[j]->param_ref != 0)((void) sizeof ((set_minimizer_for_parameter->parameters[j ]->param_ref != 0) ? 1 : 0), __extension__ ({ if (set_minimizer_for_parameter ->parameters[j]->param_ref != 0) ; else __assert_fail ( "set_minimizer_for_parameter->parameters[j]->param_ref != 0" , "ccv_cnnp_model.c", 997, __extension__ __PRETTY_FUNCTION__) ; })); | |||
| 998 | if (param_ref >= 0) | |||
| 999 | { | |||
| 1000 | assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices-> rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum < parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum" , "ccv_cnnp_model.c", 1000, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1001 | *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(old_rnum))) = *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref + old_rnum))); | |||
| 1002 | parameter_indices->rnum = old_rnum + 1; | |||
| 1003 | } | |||
| 1004 | } | |||
| 1005 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(set_minimizer_for_parameter->minimizer); | |||
| 1006 | // We may have duplicated indices, but that is OK, we will set it twice. | |||
| 1007 | for (j = 0; j < parameter_indices->rnum; j++) | |||
| 1008 | { | |||
| 1009 | const int d = *(int*)ccv_array_get(parameter_indices, j)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(j))); | |||
| 1010 | assert(d <= parameter_size)((void) sizeof ((d <= parameter_size) ? 1 : 0), __extension__ ({ if (d <= parameter_size) ; else __assert_fail ("d <= parameter_size" , "ccv_cnnp_model.c", 1010, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1011 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, set_minimizer_for_parameter->minimizer, saved_aux_size, max_saved_aux_size, d)) | |||
| 1012 | flag = 1; | |||
| 1013 | } | |||
| 1014 | ccv_array_clear(parameter_indices); | |||
| 1015 | } | |||
| 1016 | ccv_array_free(parameter_indices); | |||
| 1017 | return flag; | |||
| 1018 | } | |||
| 1019 | ||||
| 1020 | static void _ccv_cnnp_scatter_saved_aux(ccv_nnc_tensor_symbol_map_t* const saved_aux, const int parameter_size, const int old_saved_aux_size, const int new_saved_aux_size) | |||
| 1021 | { | |||
| 1022 | if (new_saved_aux_size == old_saved_aux_size) | |||
| 1023 | return; | |||
| 1024 | assert(new_saved_aux_size > old_saved_aux_size)((void) sizeof ((new_saved_aux_size > old_saved_aux_size) ? 1 : 0), __extension__ ({ if (new_saved_aux_size > old_saved_aux_size ) ; else __assert_fail ("new_saved_aux_size > old_saved_aux_size" , "ccv_cnnp_model.c", 1024, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1025 | int i, j; | |||
| 1026 | for (i = parameter_size - 1; i >= 0; i--) | |||
| 1027 | { | |||
| 1028 | for (j = new_saved_aux_size - 1; j >= old_saved_aux_size; j--) | |||
| 1029 | saved_aux[i * new_saved_aux_size + j].source = saved_aux[i * new_saved_aux_size + j].destination = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 1030 | for (j = old_saved_aux_size - 1; j >= 0; j--) | |||
| 1031 | saved_aux[i * new_saved_aux_size + j] = saved_aux[i * old_saved_aux_size + j]; | |||
| 1032 | } | |||
| 1033 | } | |||
| 1034 | ||||
| 1035 | static void _ccv_cnnp_model_set_rewindables(ccv_cnnp_model_t* const model) | |||
| 1036 | { | |||
| 1037 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1038 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1038, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1039 | if (!compiled_data->rewindables) | |||
| 1040 | compiled_data->rewindables = ccv_array_new(sizeof(ccv_cnnp_rewind_symbol_t), 0, 0); | |||
| 1041 | ccv_nnc_tensor_symbol_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_new_hook, compiled_data->rewindables, 0); | |||
| 1042 | ccv_nnc_tensor_symbol_alias_new_hook(model->graph, _ccv_cnnp_model_tensor_symbol_alias_new_hook, compiled_data->rewindables, 0); | |||
| 1043 | ccv_nnc_graph_exec_symbol_new_hook(model->graph, _ccv_cnnp_model_graph_exec_symbol_new_hook, compiled_data->rewindables, 0); | |||
| 1044 | } | |||
| 1045 | ||||
| 1046 | static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const int gradient_mode, const uint64_t disable_outgrad, ccv_nnc_tensor_t* const* const fits, const int fit_size) | |||
| 1047 | { | |||
| 1048 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1049 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 1049, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1050 | assert(gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE)((void) sizeof ((gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ? 1 : 0), __extension__ ({ if (gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE ) ; else __assert_fail ("gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_NONE" , "ccv_cnnp_model.c", 1050, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1051 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
| 1052 | assert(evaluate_to_size > 0)((void) sizeof ((evaluate_to_size > 0) ? 1 : 0), __extension__ ({ if (evaluate_to_size > 0) ; else __assert_fail ("evaluate_to_size > 0" , "ccv_cnnp_model.c", 1052, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1053 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1054 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); | |||
| 1055 | compiled_data->evaluate.to_ops = (ccv_nnc_graph_exec_t*)(compiled_data->evaluate.tos + evaluate_to_size * parallel_count); | |||
| 1056 | int i, j; | |||
| 1057 | const int output_size = model->output_size; | |||
| 1058 | assert(!fits || fit_size == output_size * parallel_count)((void) sizeof ((!fits || fit_size == output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size * parallel_count) ; else __assert_fail ("!fits || fit_size == output_size * parallel_count" , "ccv_cnnp_model.c", 1058, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1059 | if (fits) | |||
| 1060 | for (i = 0; i < output_size; i++) | |||
| 1061 | ccv_nnc_tensor_symbol_set(model->graph, compiled_data->fits[i], fits[i]->info); | |||
| 1062 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
| 1063 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1064 | compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * max_saved_aux_size * parameter_size); | |||
| 1065 | compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size); | |||
| 1066 | compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size); | |||
| 1067 | int parameter_size_maybe_more = parameter_size; | |||
| 1068 | compiled_data->disable_outgrad = disable_outgrad; | |||
| 1069 | int outgrad_size; | |||
| 1070 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0) | |||
| 1071 | outgrad_size = 0; | |||
| 1072 | else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs. | |||
| 1073 | outgrad_size = model->input_size; | |||
| 1074 | else { | |||
| 1075 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 1075, __extension__ __PRETTY_FUNCTION__ ); })); // If it is disable all, gradient mode won't be this. | |||
| 1076 | outgrad_size = 0; | |||
| 1077 | for (i = 0; i < model->input_size; i++) | |||
| 1078 | if (!(disable_outgrad & ((uint64_t)1 << i))) | |||
| 1079 | ++outgrad_size; | |||
| 1080 | } | |||
| 1081 | compiled_data->outgrad_size = outgrad_size; | |||
| 1082 | parameter_size_maybe_more += outgrad_size; | |||
| 1083 | compiled_data->gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size_maybe_more + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size_maybe_more * parallel_count); | |||
| 1084 | compiled_data->outgrads = parameter_size_maybe_more > parameter_size ? compiled_data->gradients + parameter_size : 0; | |||
| 1085 | compiled_data->backward.tos = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->gradients + parameter_size_maybe_more); | |||
| 1086 | compiled_data->backward.to_size = parameter_size_maybe_more; | |||
| 1087 | ccv_nnc_tensor_symbol_t* parameters = (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))); | |||
| 1088 | if (compiled_data->parameter_flags) | |||
| 1089 | { | |||
| 1090 | parameters = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size); | |||
| 1091 | for (i = 0; i < parameter_size; i++) | |||
| 1092 | if (compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 1093 | parameters[i] = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | |||
| 1094 | else | |||
| 1095 | parameters[i] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 1096 | } | |||
| 1097 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || model->input_size == 0) | |||
| 1098 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | |||
| 1099 | else if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) // Compute minimize with gradients including inputs. | |||
| 1100 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, model->inputs, model->input_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | |||
| 1101 | else { // Compute minimize with gradients including selected inputs. | |||
| 1102 | assert(model->input_size > 0)((void) sizeof ((model->input_size > 0) ? 1 : 0), __extension__ ({ if (model->input_size > 0) ; else __assert_fail ("model->input_size > 0" , "ccv_cnnp_model.c", 1102, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1103 | assert(disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL)((void) sizeof ((disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ? 1 : 0), __extension__ ({ if (disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL ) ; else __assert_fail ("disable_outgrad != CCV_CNNP_DISABLE_OUTGRAD_ALL" , "ccv_cnnp_model.c", 1103, __extension__ __PRETTY_FUNCTION__ ); })); // If it is disable all, gradient mode won't be this. | |||
| 1104 | assert(outgrad_size > 0)((void) sizeof ((outgrad_size > 0) ? 1 : 0), __extension__ ({ if (outgrad_size > 0) ; else __assert_fail ("outgrad_size > 0" , "ccv_cnnp_model.c", 1104, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1105 | ccv_nnc_tensor_symbol_t outgrads[outgrad_size]; | |||
| 1106 | j = 0; | |||
| 1107 | for (i = 0; i < model->input_size; i++) | |||
| 1108 | if (!(disable_outgrad & ((uint64_t)1 << i))) | |||
| 1109 | outgrads[j++] = model->inputs[i]; | |||
| 1110 | ccv_nnc_symbolic_graph_minimize(model->graph, compiled_data->minimize.minimizer, compiled_data->f, output_size, parameters, parameter_size, outgrads, outgrad_size, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), compiled_data->gradients, compiled_data->updated_parameters, compiled_data->saved_aux, compiled_data->update_nodes); | |||
| 1111 | } | |||
| 1112 | if (compiled_data->parameter_flags) | |||
| 1113 | ccfreefree(parameters); | |||
| 1114 | _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer), compiled_data->minimize.max_saved_aux_size); | |||
| 1115 | if (compiled_data->minimize.parameters) | |||
| 1116 | _ccv_cnnp_apply_parameters_with_minimizer(model); | |||
| 1117 | // Go through gradient checkpoints to generate tensor inputs for backward pass just before executing the backward pass. | |||
| 1118 | ccv_cnnp_model_apply_gradient_checkpoints(compiled_data, model->graph); | |||
| 1119 | for (i = 0; i < output_size; i++) | |||
| 1120 | { | |||
| 1121 | const ccv_nnc_tensor_symbol_t df = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); | |||
| 1122 | // Init this to 1 so we can backprop. | |||
| 1123 | ccv_nnc_tensor_symbol_set_flags(model->graph, df, CCV_NNC_TENSOR_SYMBOL_INIT_ONES); | |||
| 1124 | } | |||
| 1125 | compiled_data->backward.to_size = 0; | |||
| 1126 | for (i = 0; i < parameter_size_maybe_more; i++) | |||
| 1127 | if (compiled_data->gradients[i].d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1128 | compiled_data->backward.tos[compiled_data->backward.to_size++] = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->gradients[i]); | |||
| 1129 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS); | |||
| 1130 | ccv_nnc_symbolic_graph_set_destinations(model->graph, compiled_data->update_nodes, parameter_size); | |||
| 1131 | for (i = 0; i < parameter_size_maybe_more - parameter_size; i++) | |||
| 1132 | { | |||
| 1133 | if (compiled_data->outgrads[i].d < 0) // When we go through input, we might find zero-length inputs, and for these, we cannot have any outgrads. | |||
| 1134 | continue; | |||
| 1135 | const ccv_nnc_graph_exec_symbol_t outgrad = ccv_nnc_graph_exec_symbol_for_backward(model->graph, compiled_data->outgrads[i]); | |||
| 1136 | const int* tos; | |||
| 1137 | int to_size; | |||
| 1138 | ccv_nnc_graph_exec_symbol_to(model->graph, outgrad, &tos, &to_size); | |||
| 1139 | if (to_size == 0) // If this is the end (no minimizers afterwards). We need to attach this as a destination. Otherwise this is covered in update_nodes. | |||
| 1140 | { | |||
| 1141 | const ccv_nnc_graph_exec_symbol_t* destinations = ccv_nnc_symbolic_graph_destinations(model->graph); | |||
| 1142 | const int destination_count = ccv_nnc_symbolic_graph_destination_size(model->graph); | |||
| 1143 | int flag = 0; | |||
| 1144 | const int outgrad_destination_start = ccv_max(0, destination_count - i)({ typeof (0) _a = (0); typeof (destination_count - i) _b = ( destination_count - i); (_a > _b) ? _a : _b; }); | |||
| 1145 | for (j = i - 1; !flag && j >= 0; j--) | |||
| 1146 | if (j + outgrad_destination_start < destination_count) | |||
| 1147 | flag = (destinations[j + outgrad_destination_start].d == outgrad.d); | |||
| 1148 | if (!flag) // Only if we cannot find it, we add it. | |||
| 1149 | ccv_nnc_symbolic_graph_add_destination(model->graph, outgrad); | |||
| 1150 | } | |||
| 1151 | } | |||
| 1152 | if (parallel_count > 1) | |||
| 1153 | { | |||
| 1154 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, | |||
| 1155 | 0, 0, | |||
| 1156 | compiled_data->gradients, parameter_size /* No need to deal with outgrads, we don't allreduce outgrads */, | |||
| 1157 | compiled_data->gradients /* We only care about gradients before allreduce, thus, update our current pointers */, | |||
| 1158 | 0, 0, 0, | |||
| 1159 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, | |||
| 1160 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
| 1161 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 1162 | for (i = 0; i < evaluate_to_size; i++) | |||
| 1163 | for (j = 1; j < parallel_count; j++) | |||
| 1164 | { | |||
| 1165 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); | |||
| 1166 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | |||
| 1167 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; | |||
| 1168 | } | |||
| 1169 | const int backward_to_size = compiled_data->backward.to_size; | |||
| 1170 | for (i = 0; i < backward_to_size; i++) | |||
| 1171 | for (j = 1; j < parallel_count; j++) | |||
| 1172 | { | |||
| 1173 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->backward.tos[i], j); | |||
| 1174 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | |||
| 1175 | compiled_data->backward.tos[compiled_data->backward.to_size++] = copy; | |||
| 1176 | } | |||
| 1177 | } | |||
| 1178 | // Only use memory compression if we are in gradient parameter mode. | |||
| 1179 | if (gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS) | |||
| 1180 | { | |||
| 1181 | if (model->memory_compression) | |||
| 1182 | ccv_nnc_symbolic_graph_memory_compression(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
| 1183 | if (model->memory_reduction) | |||
| 1184 | ccv_nnc_symbolic_graph_memory_reduction(model->graph, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
| 1185 | } | |||
| 1186 | compiled_data->backward.to_size = _ccv_nnc_array_dedup_graph_exec_symbols(compiled_data->backward.tos, compiled_data->backward.to_size); | |||
| 1187 | compiled_data->gradient_mode = gradient_mode; | |||
| 1188 | } | |||
| 1189 | ||||
| 1190 | void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1191 | { | |||
| 1192 | assert(!compiled_data->tensors.parameters)((void) sizeof ((!compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (!compiled_data->tensors.parameters ) ; else __assert_fail ("!compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 1192, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1193 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1194 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1195 | const int internal_size = compiled_data->internals->rnum; | |||
| 1196 | compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph); | |||
| 1197 | compiled_data->tensors_init.v = cccalloccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t)); | |||
| 1198 | compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*)); | |||
| 1199 | compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count; | |||
| 1200 | } | |||
| 1201 | ||||
| 1202 | int ccv_cnnp_model_tensors_any_to_alloc(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1203 | { | |||
| 1204 | int i, j; | |||
| 1205 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1206 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1207 | const int internal_size = compiled_data->internals->rnum; | |||
| 1208 | for (i = 0; i < parameter_size; i++) | |||
| 1209 | { | |||
| 1210 | // parameters has to be allocated all together. | |||
| 1211 | if (compiled_data->tensors.parameters[i]) | |||
| 1212 | { | |||
| 1213 | for (j = 1; j < parallel_count; j++) | |||
| 1214 | { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j * parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data ->tensors.parameters[i + j * parameter_size]) ; else __assert_fail ("compiled_data->tensors.parameters[i + j * parameter_size]" , "ccv_cnnp_model.c", 1214, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 1215 | continue; | |||
| 1216 | } | |||
| 1217 | return 1; | |||
| 1218 | } | |||
| 1219 | for (i = 0; i < internal_size; i++) | |||
| 1220 | { | |||
| 1221 | if (!compiled_data->tensors.internals[i]) | |||
| 1222 | return 1; | |||
| 1223 | for (j = 1; j < parallel_count; j++) | |||
| 1224 | if (!compiled_data->tensors.internals[i + j * internal_size]) | |||
| 1225 | return 1; | |||
| 1226 | } | |||
| 1227 | return 0; | |||
| 1228 | } | |||
| 1229 | ||||
| 1230 | void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1231 | { | |||
| 1232 | int i, j; | |||
| 1233 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1234 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1235 | const int internal_size = compiled_data->internals->rnum; | |||
| 1236 | for (i = 0; i < parameter_size; i++) | |||
| 1237 | { | |||
| 1238 | // parameters has to be allocated all together. | |||
| 1239 | if (compiled_data->tensors.parameters[i]) | |||
| 1240 | { | |||
| 1241 | for (j = 1; j < parallel_count; j++) | |||
| 1242 | { assert(compiled_data->tensors.parameters[i + j * parameter_size])((void) sizeof ((compiled_data->tensors.parameters[i + j * parameter_size]) ? 1 : 0), __extension__ ({ if (compiled_data ->tensors.parameters[i + j * parameter_size]) ; else __assert_fail ("compiled_data->tensors.parameters[i + j * parameter_size]" , "ccv_cnnp_model.c", 1242, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 1243 | continue; | |||
| 1244 | } | |||
| 1245 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | |||
| 1246 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); | |||
| 1247 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
| 1248 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 1249 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
| 1250 | compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0); | |||
| 1251 | for (j = 1; j < parallel_count; j++) | |||
| 1252 | { | |||
| 1253 | if (j != device_id) | |||
| 1254 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | |||
| 1255 | else | |||
| 1256 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 1257 | compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | |||
| 1258 | } | |||
| 1259 | } | |||
| 1260 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 1261 | for (i = 0; i < internal_size; i++) | |||
| 1262 | { | |||
| 1263 | const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(i)) ); | |||
| 1264 | const int d = retained.d; | |||
| 1265 | if (init_v[d >> 5] & (1u << (d & 0x1f))) | |||
| 1266 | continue; | |||
| 1267 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained); | |||
| 1268 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
| 1269 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 1270 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
| 1271 | if (!compiled_data->tensors.internals[i]) | |||
| 1272 | compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0); | |||
| 1273 | for (j = 1; j < parallel_count; j++) | |||
| 1274 | { | |||
| 1275 | if (j != device_id) | |||
| 1276 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | |||
| 1277 | else | |||
| 1278 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 1279 | if (!compiled_data->tensors.internals[i + j * internal_size]) | |||
| 1280 | compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0); | |||
| 1281 | } | |||
| 1282 | } | |||
| 1283 | compiled_data->tensors_init.v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); // Remove 1 if any. | |||
| 1284 | } | |||
| 1285 | ||||
| 1286 | static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1287 | { | |||
| 1288 | ccv_cnnp_model_tensors_init_0(model, compiled_data); | |||
| 1289 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
| 1290 | } | |||
| 1291 | ||||
| 1292 | static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) | |||
| 1293 | { | |||
| 1294 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1294, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1295 | int i, j; | |||
| 1296 | for (i = 0; i < tensor_size; i++) | |||
| 1297 | { | |||
| 1298 | if (!tensors[i]) | |||
| 1299 | continue; | |||
| 1300 | const int d = tensor_symbols[i].d; | |||
| 1301 | if (!(tensors_init[d >> 5] & (1u << (d & 0x1f)))) | |||
| 1302 | continue; | |||
| 1303 | for (j = 1; j < parallel_count; j++) | |||
| 1304 | if (tensors[i + j * tensor_size]) | |||
| 1305 | { | |||
| 1306 | ccv_nnc_tensor_t* const input = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t )1)); | |||
| 1307 | ccv_nnc_tensor_t* const output = CCV_NNC_TENSOR(tensors[i + j * tensor_size])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i + j * tensor_size] ) & ~(uintptr_t)1)); | |||
| 1308 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &input, 1, &output, 1, 0); | |||
| 1309 | } | |||
| 1310 | } | |||
| 1311 | } | |||
| 1312 | ||||
| 1313 | static void _ccv_cnnp_model_remove_nocopies(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t** const tensors, const int tensor_size, const int parallel_count) | |||
| 1314 | { | |||
| 1315 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1315, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1316 | int i, j; | |||
| 1317 | for (i = 0; i < tensor_size; i++) | |||
| 1318 | { | |||
| 1319 | const ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | |||
| 1320 | for (j = 1; j < parallel_count; j++) | |||
| 1321 | { | |||
| 1322 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | |||
| 1323 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; | |||
| 1324 | if (copy_tensor && copy.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1325 | { // We shouldn't allocate this, free it up. | |||
| 1326 | ccv_nnc_tensor_free(tensors[i + j * tensor_size]); | |||
| 1327 | tensors[i + j * tensor_size] = 0; | |||
| 1328 | } | |||
| 1329 | } | |||
| 1330 | } | |||
| 1331 | } | |||
| 1332 | ||||
| 1333 | static void _ccv_cnnp_model_bind_tensors(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count, ccv_array_t* const tensor_binds) | |||
| 1334 | { | |||
| 1335 | assert(parallel_count > 0)((void) sizeof ((parallel_count > 0) ? 1 : 0), __extension__ ({ if (parallel_count > 0) ; else __assert_fail ("parallel_count > 0" , "ccv_cnnp_model.c", 1335, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1336 | int i, j; | |||
| 1337 | for (i = 0; i < tensor_size; i++) | |||
| 1338 | { | |||
| 1339 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | |||
| 1340 | if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1341 | continue; | |||
| 1342 | if (graph) | |||
| 1343 | { | |||
| 1344 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); | |||
| 1345 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1346 | tensor_symbol = alias_to; | |||
| 1347 | } | |||
| 1348 | ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(tensors[i])((ccv_nnc_tensor_t*)((uintptr_t)(tensors[i]) & ~(uintptr_t )1)); | |||
| 1349 | if (tensor && tensor_symbol.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1350 | { | |||
| 1351 | const ccv_nnc_tensor_bind_t retained_bind = { | |||
| 1352 | .symbol = tensor_symbol, | |||
| 1353 | .tensor = tensor | |||
| 1354 | }; | |||
| 1355 | ccv_array_push(tensor_binds, &retained_bind); | |||
| 1356 | } | |||
| 1357 | for (j = 1; j < parallel_count; j++) | |||
| 1358 | { | |||
| 1359 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | |||
| 1360 | ccv_nnc_tensor_t* copy_tensor = tensors[i + j * tensor_size]; | |||
| 1361 | if (copy_tensor && copy.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1362 | { | |||
| 1363 | const ccv_nnc_tensor_bind_t bind = { | |||
| 1364 | .symbol = copy, | |||
| 1365 | .tensor = tensors[i + j * tensor_size] | |||
| 1366 | }; | |||
| 1367 | ccv_array_push(tensor_binds, &bind); | |||
| 1368 | } | |||
| 1369 | } | |||
| 1370 | } | |||
| 1371 | } | |||
| 1372 | ||||
| 1373 | static void _ccv_cnnp_compiled_data_graph_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1374 | { | |||
| 1375 | if (compiled_data->graph) | |||
| 1376 | ccv_nnc_graph_free(compiled_data->graph); | |||
| 1377 | compiled_data->graph = 0; | |||
| 1378 | compiled_data->is_test = 0; | |||
| 1379 | if (compiled_data->tensor_arena) | |||
| 1380 | ccv_nnc_tensor_arena_free(compiled_data->tensor_arena); | |||
| 1381 | compiled_data->tensor_arena = 0; | |||
| 1382 | if (compiled_data->graph_exec_arena) | |||
| 1383 | ccv_nnc_graph_exec_arena_free(compiled_data->graph_exec_arena); | |||
| 1384 | compiled_data->graph_exec_arena = 0; | |||
| 1385 | if (compiled_data->backward.from_ops) | |||
| 1386 | ccfreefree(compiled_data->backward.from_ops); | |||
| 1387 | compiled_data->backward.from_ops = 0; | |||
| 1388 | if (compiled_data->evaluate.schedule) | |||
| 1389 | ccv_nnc_graph_static_schedule_free(compiled_data->evaluate.schedule); | |||
| 1390 | compiled_data->evaluate.schedule = 0; | |||
| 1391 | if (compiled_data->backward.schedule) | |||
| 1392 | ccv_nnc_graph_static_schedule_free(compiled_data->backward.schedule); | |||
| 1393 | compiled_data->backward.schedule = 0; | |||
| 1394 | } | |||
| 1395 | ||||
| 1396 | static void _ccv_cnnp_compiled_data_gradient_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1397 | { | |||
| 1398 | if (compiled_data->gradients) | |||
| 1399 | ccfreefree(compiled_data->gradients); | |||
| 1400 | compiled_data->gradients = 0; | |||
| 1401 | if (compiled_data->updated_parameters) | |||
| 1402 | ccfreefree(compiled_data->updated_parameters); | |||
| 1403 | compiled_data->updated_parameters = 0; | |||
| 1404 | compiled_data->update_nodes = 0; | |||
| 1405 | compiled_data->saved_aux = 0; | |||
| 1406 | } | |||
| 1407 | ||||
| 1408 | static void _ccv_cnnp_compiled_data_backward_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1409 | { | |||
| 1410 | if (compiled_data->backward.gradients) | |||
| 1411 | ccfreefree(compiled_data->backward.gradients); | |||
| 1412 | compiled_data->backward.gradients = 0; | |||
| 1413 | if (compiled_data->backward.accum) | |||
| 1414 | ccv_nnc_graph_free(compiled_data->backward.accum); | |||
| 1415 | compiled_data->backward.accum = 0; | |||
| 1416 | if (compiled_data->backward.tensor_arena) | |||
| 1417 | ccv_nnc_tensor_arena_free(compiled_data->backward.tensor_arena); | |||
| 1418 | compiled_data->backward.tensor_arena = 0; | |||
| 1419 | if (compiled_data->backward.graph_exec_arena) | |||
| 1420 | ccv_nnc_graph_exec_arena_free(compiled_data->backward.graph_exec_arena); | |||
| 1421 | compiled_data->backward.graph_exec_arena = 0; | |||
| 1422 | } | |||
| 1423 | ||||
| 1424 | static void _ccv_cnnp_compiled_data_apply_gradients_free(ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1425 | { | |||
| 1426 | if (compiled_data->apply_gradients.graph) | |||
| 1427 | ccv_nnc_graph_free(compiled_data->apply_gradients.graph); | |||
| 1428 | compiled_data->apply_gradients.graph = 0; | |||
| 1429 | if (compiled_data->apply_gradients.tensor_arena) | |||
| 1430 | ccv_nnc_tensor_arena_free(compiled_data->apply_gradients.tensor_arena); | |||
| 1431 | compiled_data->apply_gradients.tensor_arena = 0; | |||
| 1432 | if (compiled_data->apply_gradients.graph_exec_arena) | |||
| 1433 | ccv_nnc_graph_exec_arena_free(compiled_data->apply_gradients.graph_exec_arena); | |||
| 1434 | compiled_data->apply_gradients.graph_exec_arena = 0; | |||
| 1435 | } | |||
| 1436 | ||||
| 1437 | // Compile the graph to run ccv_cnnp_model_fit | |||
| 1438 | static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
| 1439 | { | |||
| 1440 | int i, j; | |||
| 1441 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1442 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) ; else __assert_fail ("!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE" , "ccv_cnnp_model.c", 1442, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1443 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_FIT_MODE; | |||
| 1444 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1445 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1445, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1446 | assert(!fits || output_size == fit_size)((void) sizeof ((!fits || output_size == fit_size) ? 1 : 0), __extension__ ({ if (!fits || output_size == fit_size) ; else __assert_fail ("!fits || output_size == fit_size", "ccv_cnnp_model.c", 1446 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 1447 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1447, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1448 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
| 1449 | { | |||
| 1450 | _ccv_cnnp_model_set_rewindables(model); | |||
| 1451 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); | |||
| 1452 | } else if (compiled_data->gradient_mode != CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES) { | |||
| 1453 | _ccv_cnnp_model_rewind_graph(model); | |||
| 1454 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | |||
| 1455 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; | |||
| 1456 | _ccv_cnnp_model_gradient_init(model, CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES, CCV_CNNP_DISABLE_OUTGRAD_ALL, fits, fit_size); | |||
| 1457 | } | |||
| 1458 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
| 1459 | if (!tensors_init) | |||
| 1460 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
| 1461 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
| 1462 | // Check if it is not fully allocated, if it is not, init_1. | |||
| 1463 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
| 1464 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
| 1465 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1465, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1466 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1466, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1467 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1467 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 1468 | const int input_size_per_p = input_size / parallel_count; | |||
| 1469 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | |||
| 1470 | const int output_size_per_p = output_size / parallel_count; | |||
| 1471 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | |||
| 1472 | const int fit_size_per_p = fit_size / parallel_count; | |||
| 1473 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count, tensor_binds); | |||
| 1474 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1475 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
| 1476 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
| 1477 | const int internal_size = compiled_data->internals->rnum; | |||
| 1478 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | |||
| 1479 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | |||
| 1480 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph), &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
| 1481 | ccv_array_free(tensor_binds); | |||
| 1482 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 1483 | if (tensors_init && parallel_count > 1) | |||
| 1484 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | |||
| 1485 | // If tensor is not init'ed, we need to init states first. | |||
| 1486 | if (_ccv_cnnp_any_to_init(compiled_data)) | |||
| 1487 | { | |||
| 1488 | ccv_nnc_tensor_init_states_t tensor_init_states = { | |||
| 1489 | .parallel_count = parallel_count, | |||
| 1490 | .graph = model->graph, | |||
| 1491 | .compiled_data = compiled_data, | |||
| 1492 | .tensor_arena = compiled_data->tensor_arena | |||
| 1493 | }; | |||
| 1494 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | |||
| 1495 | } | |||
| 1496 | compiled_data->is_test = 0; | |||
| 1497 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(compiled_data->minimize.minimizer); | |||
| 1498 | // No need to set because it is default to training mode. | |||
| 1499 | // ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); | |||
| 1500 | for (i = 0; i < saved_aux_size * parameter_size; i++) | |||
| 1501 | { | |||
| 1502 | if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1503 | continue; | |||
| 1504 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, compiled_data->saved_aux[i].source); | |||
| 1505 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); | |||
| 1506 | for (j = 1; j < parallel_count; j++) | |||
| 1507 | { | |||
| 1508 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); | |||
| 1509 | if (copy) | |||
| 1510 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); | |||
| 1511 | } | |||
| 1512 | } | |||
| 1513 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
| 1514 | compiled_data->evaluate.to_op_size = 0; | |||
| 1515 | for (i = 0; i < evaluate_to_size; i++) | |||
| 1516 | { | |||
| 1517 | ccv_nnc_graph_exec_t const to = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); | |||
| 1518 | if (to.graph) | |||
| 1519 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to; | |||
| 1520 | } | |||
| 1521 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); | |||
| 1522 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
| 1523 | } | |||
| 1524 | ||||
| 1525 | ccv_nnc_stream_context_t* ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model) | |||
| 1526 | { | |||
| 1527 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1528 | if (!compiled_data || !compiled_data->graph) | |||
| 1529 | return 0; | |||
| 1530 | return ccv_nnc_graph_default_stream(compiled_data->graph); | |||
| 1531 | } | |||
| 1532 | ||||
| 1533 | uint64_t ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model) | |||
| 1534 | { | |||
| 1535 | const ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1536 | if (!compiled_data || !compiled_data->tensor_arena) | |||
| 1537 | return 0; | |||
| 1538 | return ccv_nnc_tensor_arena_size(compiled_data->tensor_arena); | |||
| 1539 | } | |||
| 1540 | ||||
| 1541 | static void _ccv_cnnp_bind_tensors_to_arena(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count) | |||
| 1542 | { | |||
| 1543 | int i, j; | |||
| 1544 | for (i = 0; i < tensor_size; i++) | |||
| 1545 | { | |||
| 1546 | ccv_nnc_tensor_symbol_t tensor_symbol = tensor_symbols[i]; | |||
| 1547 | if (tensor_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1548 | continue; | |||
| 1549 | if (graph) | |||
| 1550 | { | |||
| 1551 | const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph, tensor_symbol); | |||
| 1552 | if (alias_to.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1553 | tensor_symbol = alias_to; | |||
| 1554 | } | |||
| 1555 | ccv_nnc_tensor_bind_symbol(tensor_arena, tensor_symbol, tensors[i]); | |||
| 1556 | for (j = 1; j < parallel_count; j++) | |||
| 1557 | { | |||
| 1558 | const ccv_nnc_tensor_symbol_t copy = ccv_nnc_tensor_symbol_copy(graph, tensor_symbol, j); | |||
| 1559 | if (copy.d != CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 1560 | ccv_nnc_tensor_bind_symbol(tensor_arena, copy, tensors[i + tensor_size * j]); | |||
| 1561 | } | |||
| 1562 | } | |||
| 1563 | } | |||
| 1564 | ||||
| 1565 | void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
| 1566 | { | |||
| 1567 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1568 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1568, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1569 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1570 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1570, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1571 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1571, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1572 | assert(!fits || fit_size == output_size)((void) sizeof ((!fits || fit_size == output_size) ? 1 : 0), __extension__ ({ if (!fits || fit_size == output_size) ; else __assert_fail ("!fits || fit_size == output_size", "ccv_cnnp_model.c", 1572 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 1573 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1573, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1574 | if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_FIT_MODE) | |||
| 1575 | { | |||
| 1576 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
| 1577 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | |||
| 1578 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | |||
| 1579 | // Compile the symbolic graph down only when needed. | |||
| 1580 | _ccv_cnnp_model_fit_jit(model, inputs, input_size, fits, fit_size, outputs, output_size); | |||
| 1581 | } else { | |||
| 1582 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1582, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1583 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1583, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1584 | assert((fit_size % parallel_count) == 0)((void) sizeof (((fit_size % parallel_count) == 0) ? 1 : 0), __extension__ ({ if ((fit_size % parallel_count) == 0) ; else __assert_fail ("(fit_size % parallel_count) == 0", "ccv_cnnp_model.c", 1584 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 1585 | const int input_size_per_p = input_size / parallel_count; | |||
| 1586 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); | |||
| 1587 | const int output_size_per_p = output_size / parallel_count; | |||
| 1588 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); | |||
| 1589 | const int fit_size_per_p = fit_size / parallel_count; | |||
| 1590 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->fits, fits, fit_size_per_p, parallel_count); | |||
| 1591 | } | |||
| 1592 | if (compiled_data->is_test) | |||
| 1593 | { | |||
| 1594 | compiled_data->is_test = 0; | |||
| 1595 | ccv_nnc_graph_exec_update_t update = { | |||
| 1596 | .parallel_count = parallel_count, | |||
| 1597 | .graph = model->graph, | |||
| 1598 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
| 1599 | }; | |||
| 1600 | ccv_cnnp_model_set_is_test(model, 0, _ccv_cnnp_cmd_update_for_execs, &update); | |||
| 1601 | } | |||
| 1602 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); | |||
| 1603 | } | |||
| 1604 | ||||
| 1605 | // Compile the graph to run ccv_cnnp_model_evaluate with require_grad = false (MULTISTAGE_MODE_NO_GRAD). | |||
| 1606 | static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
| 1607 | { | |||
| 1608 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1609 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD; | |||
| 1610 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1611 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1611, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1612 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1612, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1613 | // If the gradient is not initialized, continue to setup parallel process. We don't init gradient here, but rather, | |||
| 1614 | // we setup proper rewindables so the graph can be rewinded to previous state before we run data parallel. | |||
| 1615 | if (parallel_count > 1 && compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
| 1616 | { | |||
| 1617 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
| 1618 | compiled_data->evaluate.tos = ccreallocrealloc(compiled_data->evaluate.tos, sizeof(ccv_nnc_graph_exec_symbol_t) * evaluate_to_size * parallel_count + sizeof(ccv_nnc_graph_exec_t) * evaluate_to_size * parallel_count); | |||
| 1619 | _ccv_cnnp_model_set_rewindables(model); | |||
| 1620 | ccv_nnc_symbolic_graph_data_parallel(model->graph, parallel_count, | |||
| 1621 | 0, 0, | |||
| 1622 | 0, 0, 0, | |||
| 1623 | 0, 0, 0, | |||
| 1624 | CCV_NNC_PARALLEL_REDUCE_OP_SUM, | |||
| 1625 | SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), SYMBOLIC_GRAPH_DESTINATIONS(model->graph)ccv_nnc_symbolic_graph_destinations(model->graph), ccv_nnc_symbolic_graph_destination_size (model->graph)); | |||
| 1626 | ccv_nnc_graph_exec_symbol_autogen(model->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 1627 | int i, j; | |||
| 1628 | for (i = 0; i < evaluate_to_size; i++) | |||
| 1629 | for (j = 1; j < parallel_count; j++) | |||
| 1630 | { | |||
| 1631 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->evaluate.tos[i], j); | |||
| 1632 | if (copy.d != CCV_NNC_NO_GRAPH_EXEC_SYMBOL) | |||
| 1633 | compiled_data->evaluate.tos[compiled_data->evaluate.to_size++] = copy; | |||
| 1634 | } | |||
| 1635 | } | |||
| 1636 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
| 1637 | if (!tensors_init) | |||
| 1638 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
| 1639 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
| 1640 | // Check if it is not fully allocated, if it is not, init_1. | |||
| 1641 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
| 1642 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
| 1643 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1643, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1644 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1644, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1645 | const int input_size_per_p = input_size / parallel_count; | |||
| 1646 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | |||
| 1647 | const int output_size_per_p = output_size / parallel_count; | |||
| 1648 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | |||
| 1649 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1650 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
| 1651 | const int internal_size = compiled_data->internals->rnum; | |||
| 1652 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | |||
| 1653 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | |||
| 1654 | // If we generated gradient for the graph, only compile part of the graph because the rest is irrelevant for evaluation. | |||
| 1655 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
| 1656 | ccv_array_free(tensor_binds); | |||
| 1657 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 1658 | // If tensor is not init'ed, we need to init states first. | |||
| 1659 | if (tensors_init && parallel_count > 1) | |||
| 1660 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | |||
| 1661 | if (_ccv_cnnp_any_to_init(compiled_data)) | |||
| 1662 | { | |||
| 1663 | ccv_nnc_tensor_init_states_t tensor_init_states = { | |||
| 1664 | .parallel_count = parallel_count, | |||
| 1665 | .graph = model->graph, | |||
| 1666 | .compiled_data = compiled_data, | |||
| 1667 | .tensor_arena = compiled_data->tensor_arena | |||
| 1668 | }; | |||
| 1669 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | |||
| 1670 | } | |||
| 1671 | compiled_data->is_test = 1; | |||
| 1672 | ccv_nnc_graph_exec_update_t update = { | |||
| 1673 | .parallel_count = parallel_count, | |||
| 1674 | .graph = model->graph, | |||
| 1675 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
| 1676 | }; | |||
| 1677 | ccv_cnnp_model_set_is_test(model, 1, _ccv_cnnp_cmd_update_for_execs, &update); | |||
| 1678 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); | |||
| 1679 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
| 1680 | } | |||
| 1681 | ||||
| 1682 | static void _ccv_cnnp_model_gradient_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 1683 | { | |||
| 1684 | assert(!compiled_data->tensors.gradients)((void) sizeof ((!compiled_data->tensors.gradients) ? 1 : 0 ), __extension__ ({ if (!compiled_data->tensors.gradients) ; else __assert_fail ("!compiled_data->tensors.gradients" , "ccv_cnnp_model.c", 1684, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1685 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1686 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1687 | compiled_data->tensors.gradients = (ccv_nnc_tensor_t**)ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * parameter_size * 2 * parallel_count); | |||
| 1688 | compiled_data->tensors.accum_gradients = compiled_data->tensors.gradients + parameter_size * parallel_count; | |||
| 1689 | int i, j; | |||
| 1690 | for (i = 0; i < parameter_size; i++) | |||
| 1691 | { | |||
| 1692 | if (compiled_data->parameter_flags && !(compiled_data->parameter_flags[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
| 1693 | { | |||
| 1694 | compiled_data->tensors.gradients[i] = 0; | |||
| 1695 | compiled_data->tensors.accum_gradients[i] = 0; | |||
| 1696 | for (j = 1; j < parallel_count; j++) | |||
| 1697 | { | |||
| 1698 | compiled_data->tensors.gradients[i + j * parameter_size] = 0; | |||
| 1699 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0; | |||
| 1700 | } | |||
| 1701 | continue; | |||
| 1702 | } | |||
| 1703 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | |||
| 1704 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); | |||
| 1705 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
| 1706 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 1707 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
| 1708 | compiled_data->tensors.gradients[i] = ccv_nnc_tensor_new(0, info, 0); | |||
| 1709 | compiled_data->tensors.accum_gradients[i] = 0; // delay the accumulated gradient allocation until when we need it. | |||
| 1710 | for (j = 1; j < parallel_count; j++) | |||
| 1711 | { | |||
| 1712 | if (j != device_id) | |||
| 1713 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | |||
| 1714 | else | |||
| 1715 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 1716 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | |||
| 1717 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = 0; | |||
| 1718 | } | |||
| 1719 | } | |||
| 1720 | } | |||
| 1721 | ||||
| 1722 | static int _ccv_cnnp_is_disable_outgrad_all(const uint64_t disable_outgrad, const int input_size) | |||
| 1723 | { | |||
| 1724 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL) | |||
| 1725 | return 1; | |||
| 1726 | if (disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) | |||
| 1727 | return 0; | |||
| 1728 | int i; | |||
| 1729 | for (i = 0; i < input_size; i++) | |||
| 1730 | if (!(disable_outgrad & ((uint64_t)1 << i))) | |||
| 1731 | return 0; | |||
| 1732 | return 1; | |||
| 1733 | } | |||
| 1734 | ||||
| 1735 | // Compile the graph to run ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). | |||
| 1736 | // Particularly, this method compiles the evaluation and backprop graph (the main graph). | |||
| 1737 | static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, const uint64_t disable_outgrad, const int is_test, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
| 1738 | { | |||
| 1739 | int i, j; | |||
| 1740 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1741 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; | |||
| 1742 | assert(!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode)((void) sizeof ((!compiled_data->graph || compiled_data-> graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data ->gradient_mode != target_gradient_mode) ? 1 : 0), __extension__ ({ if (!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data-> gradient_mode != target_gradient_mode) ; else __assert_fail ( "!compiled_data->graph || compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode" , "ccv_cnnp_model.c", 1742, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1743 | compiled_data->graph_mode = CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE; | |||
| 1744 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1745 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1745, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1746 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_model.c", 1746, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1747 | // There shouldn't be a loss function if we evaluate with multistage jit. | |||
| 1748 | assert(compiled_data->loss.cmd == CCV_NNC_NOOP)((void) sizeof ((compiled_data->loss.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__ ({ if (compiled_data->loss.cmd == CCV_NNC_NOOP ) ; else __assert_fail ("compiled_data->loss.cmd == CCV_NNC_NOOP" , "ccv_cnnp_model.c", 1748, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1749 | if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_NONE) | |||
| 1750 | { | |||
| 1751 | _ccv_cnnp_model_set_rewindables(model); | |||
| 1752 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. | |||
| 1753 | } else if (compiled_data->gradient_mode != target_gradient_mode) { | |||
| 1754 | _ccv_cnnp_model_rewind_graph(model); | |||
| 1755 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | |||
| 1756 | compiled_data->gradient_mode = CCV_CNNP_COMPILED_DATA_GRADIENT_NONE; | |||
| 1757 | _ccv_cnnp_model_gradient_init(model, target_gradient_mode, disable_outgrad, 0, 0); // The type of outputs and fits should be the same. We only use type here. | |||
| 1758 | } | |||
| 1759 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
| 1760 | if (!tensors_init) | |||
| 1761 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
| 1762 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
| 1763 | // Check if it is not fully allocated, if it is not, init_1. | |||
| 1764 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
| 1765 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
| 1766 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1766, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1767 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1767, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1768 | const int input_size_per_p = input_size / parallel_count; | |||
| 1769 | _ccv_cnnp_model_bind_tensors(model->graph, model->inputs, inputs, input_size_per_p, parallel_count, tensor_binds); | |||
| 1770 | const int output_size_per_p = output_size / parallel_count; | |||
| 1771 | _ccv_cnnp_model_bind_tensors(model->graph, model->outputs, outputs, output_size_per_p, parallel_count, tensor_binds); | |||
| 1772 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1773 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
| 1774 | const int internal_size = compiled_data->internals->rnum; | |||
| 1775 | _ccv_cnnp_model_remove_nocopies(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count); | |||
| 1776 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, 0)((void*)(((char*)((compiled_data->internals)->data)) + ( size_t)(compiled_data->internals)->rsize * (size_t)(0)) ), compiled_data->tensors.internals, internal_size, parallel_count, tensor_binds); | |||
| 1777 | if (!compiled_data->tensors.gradients) | |||
| 1778 | _ccv_cnnp_model_gradient_tensors_init(model, compiled_data); | |||
| 1779 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); | |||
| 1780 | if (compiled_data->backward.to_size > 0) | |||
| 1781 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->backward.tos, compiled_data->backward.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
| 1782 | else | |||
| 1783 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(model->graph)ccv_nnc_symbolic_graph_sources(model->graph), ccv_nnc_symbolic_graph_source_size (model->graph), compiled_data->evaluate.tos, compiled_data->evaluate.to_size, &compiled_data->graph, &compiled_data->tensor_arena, &compiled_data->graph_exec_arena); | |||
| 1784 | ccv_array_free(tensor_binds); | |||
| 1785 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 1786 | if (tensors_init && parallel_count > 1) | |||
| 1787 | _ccv_cnnp_model_copy_tensors(init_v, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, compiled_data->parameters->rnum, parallel_count); | |||
| 1788 | // If tensor is not init'ed, we need to init states first. | |||
| 1789 | if (_ccv_cnnp_any_to_init(compiled_data)) | |||
| 1790 | { | |||
| 1791 | ccv_nnc_tensor_init_states_t tensor_init_states = { | |||
| 1792 | .parallel_count = parallel_count, | |||
| 1793 | .graph = model->graph, | |||
| 1794 | .compiled_data = compiled_data, | |||
| 1795 | .tensor_arena = compiled_data->tensor_arena | |||
| 1796 | }; | |||
| 1797 | ccv_cnnp_model_init_states(model, model->graph, _ccv_cnnp_init_states_for_tensors, &tensor_init_states); | |||
| 1798 | } | |||
| 1799 | compiled_data->is_test = is_test; | |||
| 1800 | ccv_nnc_graph_exec_update_t update = { | |||
| 1801 | .parallel_count = parallel_count, | |||
| 1802 | .graph = model->graph, | |||
| 1803 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
| 1804 | }; | |||
| 1805 | ccv_cnnp_model_set_is_test(model, is_test, _ccv_cnnp_cmd_update_for_execs, &update); | |||
| 1806 | const int evaluate_to_size = compiled_data->evaluate.to_size; | |||
| 1807 | compiled_data->evaluate.to_op_size = 0; | |||
| 1808 | ccv_array_t* const backward_from = ccv_array_new(sizeof(int), 0, 0); | |||
| 1809 | for (i = 0; i < evaluate_to_size; i++) | |||
| 1810 | { | |||
| 1811 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, compiled_data->evaluate.tos[i]); | |||
| 1812 | if (to_op.graph) | |||
| 1813 | compiled_data->evaluate.to_ops[compiled_data->evaluate.to_op_size++] = to_op; | |||
| 1814 | const int* tos; | |||
| 1815 | int to_size; | |||
| 1816 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->evaluate.tos[i], &tos, &to_size); | |||
| 1817 | for (j = 0; j < to_size; j++) | |||
| 1818 | { | |||
| 1819 | ccv_nnc_graph_exec_t const to_op = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ | |||
| 1820 | .d = tos[j], | |||
| 1821 | .graph = model->graph | |||
| 1822 | }); | |||
| 1823 | if (to_op.graph) | |||
| 1824 | ccv_array_add_unique_int(backward_from, to_op.d); | |||
| 1825 | } | |||
| 1826 | } | |||
| 1827 | assert(backward_from->rnum > 0)((void) sizeof ((backward_from->rnum > 0) ? 1 : 0), __extension__ ({ if (backward_from->rnum > 0) ; else __assert_fail ( "backward_from->rnum > 0", "ccv_cnnp_model.c", 1827, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1828 | compiled_data->backward.from_op_size = backward_from->rnum; | |||
| 1829 | compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum); | |||
| 1830 | for (i = 0; i < backward_from->rnum; i++) | |||
| 1831 | compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){ | |||
| 1832 | .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from )->rsize * (size_t)(i))), | |||
| 1833 | .graph = compiled_data->graph, | |||
| 1834 | }; | |||
| 1835 | // If there are any set node (to set some tensors to 0) inserted through backward pass, these won't be executed if we just do sources -> evaluate.to_ops, backward.from_ops -> destinations. We need this logic to find out these nodes and explicitly adding them to backward.from_ops. | |||
| 1836 | ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(compiled_data->graph->exec_info, 0)((void*)(((char*)((compiled_data->graph->exec_info)-> data)) + (size_t)(compiled_data->graph->exec_info)-> rsize * (size_t)(0))); | |||
| 1837 | const int exec_info_size = compiled_data->graph->exec_info->rnum; | |||
| 1838 | uint32_t* const visited = cccalloccalloc((exec_info_size + 31) >> 5, sizeof(uint32_t)); | |||
| 1839 | const ccv_nnc_graph_exec_t* const sources = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->sources, 0)((void*)(((char*)((compiled_data->graph->sources)->data )) + (size_t)(compiled_data->graph->sources)->rsize * (size_t)(0))); | |||
| 1840 | const int source_size = compiled_data->graph->sources->rnum; | |||
| 1841 | ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info )[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)(( void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + ( size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_ ))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_ ++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_]. d].r = 5; _exists_[0][_i_] = (compiled_data->evaluate.to_ops )[_i_].d; } _exist_size_[0] = (compiled_data->evaluate.to_op_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size); _i_ ++) { ((void) sizeof (((compiled_data->evaluate.to_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->evaluate.to_ops)[_i_]. d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { (( void) sizeof (((sources)[_i_].graph == compiled_data->graph ) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size ].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info )[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[ _idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings )->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size)) { _exists_[_p_][_i_ ] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[ _idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void *)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t )((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); -- _incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_ [d].r == 6 && _d_ < (compiled_data->evaluate.to_op_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (compiled_data->evaluate.to_op_size ); _i_++) { ((void) sizeof (((compiled_data->evaluate.to_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->evaluate.to_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->evaluate.to_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(compiled_data->evaluate.to_ops)[_i_ ].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_ [(compiled_data->evaluate.to_ops)[_i_].d].c == 0) ? 1 : 0) , __extension__ ({ if (_incomings_[(compiled_data->evaluate .to_ops)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(compiled_data->evaluate.to_ops)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1841, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(compiled_data->evaluate.to_ops )[_i_].d].c > 0) continue; _visit_->node[_visit_->size ].index = (((compiled_data->evaluate.to_ops)[_i_].d)); _visit_ ->node[_visit_->size].term = ((_incomings_[(compiled_data ->evaluate.to_ops)[_i_].d].d)); ++_visit_->size;; } if ( _heap_mem_) free(_incomings_); } while (0);; ((void) sizeof ( (_visit_->size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)", "ccv_cnnp_model.c" , 1841, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
| 1842 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1843 | visited[(idx >> 5)] |= (1u << (idx & 31)); | |||
| 1844 | } ccv_nnc_graph_visit_endfor} } | |||
| 1845 | ccv_nnc_graph_visit_free(visit); | |||
| 1846 | const ccv_nnc_graph_exec_t* const destinations = (ccv_nnc_graph_exec_t*)ccv_array_get(compiled_data->graph->destinations, 0)((void*)(((char*)((compiled_data->graph->destinations)-> data)) + (size_t)(compiled_data->graph->destinations)-> rsize * (size_t)(0))); | |||
| 1847 | const int destination_size = compiled_data->graph->destinations->rnum; | |||
| 1848 | visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size) ; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->backward.from_ops)[_i_ ].d].r = 1; _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } int _exist_size_[2] = { (compiled_data->backward .from_op_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[ _idx_].r = 2; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size) ; _i_++) { ((void) sizeof (((compiled_data->backward.from_ops )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(compiled_data->backward.from_ops)[_i_ ].d].r = 3; _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } _exist_size_[0] = (compiled_data->backward.from_op_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue ; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info )[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_]. outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_ [d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_ [d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof (( _exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)", "ccv_cnnp_model.c" , 1848, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_ ][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = ( _p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations) [_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0] [_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (compiled_data->backward.from_op_size); _i_++ ) { ((void) sizeof (((compiled_data->backward.from_ops)[_i_ ].graph == compiled_data->graph) ? 1 : 0), __extension__ ( { if ((compiled_data->backward.from_ops)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(compiled_data->backward.from_ops)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (compiled_data->backward.from_ops )[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (compiled_data ->backward.from_op_size); _exist_size_[1] = 0; int _d_ = 0 ; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_ [_idx_].d)); ++_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info)[_idx_].outgoings ) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size) ) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings)-> data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * ( size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size )) { ((void) sizeof ((_exist_size_[_q_] < (exec_info_size) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == compiled_data-> graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_ ].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations )[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_cnnp_model.c", 1848, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); | |||
| 1849 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1850 | visited[(idx >> 5)] |= (1u << (idx & 31)); | |||
| 1851 | } ccv_nnc_graph_visit_endfor} } | |||
| 1852 | ccv_nnc_graph_visit_free(visit); | |||
| 1853 | visit = ccv_nnc_graph_visit_new(compiled_data->graph, exec_info, exec_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_info_size); _i_++) _incoming_edges_ += ((exec_info)[_i_ ].outgoings) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_info_size) + _incoming_edges_ > 1024 ); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * ( exec_info_size) + sizeof(int32_t) * ((exec_info_size) * 2 + _incoming_edges_ )); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca ( sizeof(ccv_nnc_incoming_t) * (exec_info_size) + sizeof(int32_t ) * ((exec_info_size) * 2 + _incoming_edges_)); memset(_incomings_ , 0, sizeof(ccv_nnc_incoming_t) * (exec_info_size)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_info_size)), ( int32_t*)(_incomings_ + (exec_info_size)) + (exec_info_size), }; int32_t* const _edges_ = _exists_[1] + (exec_info_size); for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info )[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)(( void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + ( size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_ ))); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((exec_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info)[_idx_].outgoings )->data)) + (size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_ [d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d ].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d]. c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2) continue ; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_[_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].r = 5; _exists_[0] [_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size ); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[ _idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_ [_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data ->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == compiled_data->graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(sources)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size ].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((exec_info )[_idx_].outgoings) { if ((exec_info)[_idx_].outgoings->rnum == 1) { const int d = *(int*)((void*)(((char*)(((exec_info)[ _idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].outgoings )->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_ [d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum ; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info )[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_]. outgoings)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { ((void) sizeof ((_exist_size_ [_q_] < (exec_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_ [_q_] < (exec_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_info_size)" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == compiled_data-> graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == compiled_data->graph) ; else __assert_fail ("(destinations)[_i_].graph == compiled_data->graph" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(destinations)[_i_].d].r == 7) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_ ].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations )[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_info_size)) ? 1 : 0), __extension__ ({ if (_visit_ ->size <= (exec_info_size)) ; else __assert_fail ("_visit_->size <= (exec_info_size)" , "ccv_cnnp_model.c", 1853, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); | |||
| 1854 | // Find any missing nodes to be added as source. Right now, these are only set nodes. | |||
| 1855 | ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const int idx __attribute__((unused)) = (visit)->node[_i_].index ; const int _node_unused_ __attribute__((unused)) = (visit)-> node[_i_].term; typeof ((exec_info)) const node __attribute__ ((unused)) = (exec_info) + idx; { | |||
| 1856 | if (!(visited[(idx >> 5)] & (1u << (idx & 31)))) | |||
| 1857 | { | |||
| 1858 | assert(exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD)((void) sizeof ((exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD ) ? 1 : 0), __extension__ ({ if (exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD ) ; else __assert_fail ("exec_info[idx].cmd.cmd == CCV_NNC_SET_FORWARD" , "ccv_cnnp_model.c", 1858, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1859 | if (exec_info[idx].cmd.info.blas.a[0] == 0) // Special-casing for empty out the tensor set function, not for the set grad to 1 one. | |||
| 1860 | ccv_array_add_unique_int(backward_from, idx); | |||
| 1861 | } | |||
| 1862 | } ccv_nnc_graph_visit_endfor} } | |||
| 1863 | ccv_nnc_graph_visit_free(visit); | |||
| 1864 | ccfreefree(visited); | |||
| 1865 | if (backward_from->rnum != compiled_data->backward.from_op_size) // If it doesn't match, need to redo this. | |||
| 1866 | { | |||
| 1867 | compiled_data->backward.from_op_size = backward_from->rnum; | |||
| 1868 | compiled_data->backward.from_ops = (ccv_nnc_graph_exec_t*)ccreallocrealloc(compiled_data->backward.from_ops, sizeof(ccv_nnc_graph_exec_t) * backward_from->rnum); | |||
| 1869 | for (i = 0; i < backward_from->rnum; i++) | |||
| 1870 | compiled_data->backward.from_ops[i] = (ccv_nnc_graph_exec_t){ | |||
| 1871 | .d = *(int*)ccv_array_get(backward_from, i)((void*)(((char*)((backward_from)->data)) + (size_t)(backward_from )->rsize * (size_t)(i))), | |||
| 1872 | .graph = compiled_data->graph, | |||
| 1873 | }; | |||
| 1874 | } | |||
| 1875 | ccv_array_free(backward_from); | |||
| 1876 | ccv_nnc_graph_set_default_static_schedule(compiled_data->graph, compiled_data->stream_type, model->max_stream_count); | |||
| 1877 | ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL0,0,0,0); | |||
| 1878 | } | |||
| 1879 | ||||
| 1880 | void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size) | |||
| 1881 | { | |||
| 1882 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1883 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1883, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1884 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1885 | assert(output_size == model->output_size * parallel_count)((void) sizeof ((output_size == model->output_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->output_size * parallel_count) ; else __assert_fail ("output_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1885, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1886 | assert(input_size == model->input_size * parallel_count)((void) sizeof ((input_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (input_size == model->input_size * parallel_count) ; else __assert_fail ("input_size == model->input_size * parallel_count" , "ccv_cnnp_model.c", 1886, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1887 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1887, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1888 | const int target_gradient_mode = _ccv_cnnp_is_disable_outgrad_all(params.disable_outgrad, model->input_size) ? CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES : CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS; | |||
| 1889 | const int mode_mismatch = (params.requires_grad && (compiled_data->graph_mode != CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE || compiled_data->gradient_mode != target_gradient_mode || compiled_data->disable_outgrad != params.disable_outgrad)); | |||
| 1890 | if (!compiled_data->graph || mode_mismatch) | |||
| 1891 | { | |||
| 1892 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
| 1893 | if (mode_mismatch) // If mode mismatch, we need to redo the backward as well (no need to redo apply_gradients, it doesn't require target_gradient_mode or disable_outgrad. | |||
| 1894 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | |||
| 1895 | if (params.requires_grad) | |||
| 1896 | _ccv_cnnp_model_multistage_jit_0(model, params.disable_outgrad, params.is_test, inputs, input_size, outputs, output_size); | |||
| 1897 | else | |||
| 1898 | _ccv_cnnp_model_multistage_no_grad_jit(model, inputs, input_size, outputs, output_size); | |||
| 1899 | } else { | |||
| 1900 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->tensor_arena); | |||
| 1901 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1901, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1902 | const int input_size_per_p = input_size / parallel_count; | |||
| 1903 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->inputs, inputs, input_size_per_p, parallel_count); | |||
| 1904 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_cnnp_model.c" , 1904, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1905 | const int output_size_per_p = output_size / parallel_count; | |||
| 1906 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, model->outputs, outputs, output_size_per_p, parallel_count); | |||
| 1907 | } | |||
| 1908 | if (compiled_data->is_test != params.is_test) | |||
| 1909 | { | |||
| 1910 | compiled_data->is_test = params.is_test; | |||
| 1911 | ccv_nnc_graph_exec_update_t update = { | |||
| 1912 | .parallel_count = parallel_count, | |||
| 1913 | .graph = model->graph, | |||
| 1914 | .graph_exec_arena = compiled_data->graph_exec_arena, | |||
| 1915 | }; | |||
| 1916 | ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update); | |||
| 1917 | } | |||
| 1918 | } | |||
| 1919 | ||||
| 1920 | void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
| 1921 | { | |||
| 1922 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1923 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1923, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1924 | ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size); | |||
| 1925 | if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD) | |||
| 1926 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context); | |||
| 1927 | else { | |||
| 1928 | if (!compiled_data->evaluate.schedule) | |||
| 1929 | compiled_data->evaluate.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, 0, 0, compiled_data->evaluate.to_ops, compiled_data->evaluate.to_op_size); | |||
| 1930 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->evaluate.schedule, tensor_tape, stream_context); | |||
| 1931 | } | |||
| 1932 | } | |||
| 1933 | ||||
| 1934 | // Compile the graph to run ccv_cnnp_model_backward after ccv_cnnp_model_evaluate with requires_grad = true (MULTISTAGE_MODE). | |||
| 1935 | // Particularly, this method compiles the accumulator graph. | |||
| 1936 | static void _ccv_cnnp_model_multistage_jit_1(ccv_cnnp_model_t* const model) | |||
| 1937 | { | |||
| 1938 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1939 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1939, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1940 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1940, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1941 | ccv_nnc_symbolic_graph_t* accum = ccv_nnc_symbolic_graph_new(); | |||
| 1942 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1943 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1944 | int i, j; | |||
| 1945 | compiled_data->backward.gradients = (ccv_nnc_tensor_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * parameter_size * parallel_count * 3); | |||
| 1946 | compiled_data->backward.accum_gradients = compiled_data->backward.gradients + parameter_size * parallel_count; | |||
| 1947 | compiled_data->backward.updated_accum_gradients = compiled_data->backward.accum_gradients + parameter_size * parallel_count; | |||
| 1948 | for (i = 0; i < parameter_size; i++) | |||
| 1949 | for (j = 0; j < parallel_count; j++) | |||
| 1950 | if (compiled_data->tensors.gradients[i + j * parameter_size]) | |||
| 1951 | { | |||
| 1952 | const ccv_nnc_tensor_param_t info = compiled_data->tensors.gradients[i + j * parameter_size]->info; | |||
| 1953 | // Now, the old gradient is the accumulated gradient, getting new gradient tensor setup so we can collect them. | |||
| 1954 | compiled_data->tensors.accum_gradients[i + j * parameter_size] = compiled_data->tensors.gradients[i + j * parameter_size]; | |||
| 1955 | compiled_data->tensors.gradients[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | |||
| 1956 | ccv_nnc_tensor_symbol_t inputs[2]; | |||
| 1957 | inputs[0] = compiled_data->backward.accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | |||
| 1958 | inputs[1] = compiled_data->backward.gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | |||
| 1959 | ccv_nnc_tensor_symbol_t output = compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = ccv_nnc_tensor_symbol_new(accum, info, 0); | |||
| 1960 | ccv_nnc_graph_exec_symbol_new(accum, CMD_EWSUM_FORWARD()ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0), inputs, 2, &output, 1, 0); | |||
| 1961 | } else { | |||
| 1962 | compiled_data->backward.accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 1963 | compiled_data->backward.gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 1964 | compiled_data->backward.updated_accum_gradients[i + j * parameter_size] = NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 1965 | } | |||
| 1966 | ccv_nnc_graph_exec_symbol_autogen(accum, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 1967 | if (ccv_nnc_symbolic_graph_source_size(accum) == 0) | |||
| 1968 | { | |||
| 1969 | ccv_nnc_symbolic_graph_free(accum); | |||
| 1970 | // Create empty graph. | |||
| 1971 | compiled_data->backward.accum = ccv_nnc_graph_new(); | |||
| 1972 | ccv_nnc_graph_topsort(compiled_data->backward.accum, 0, 0); | |||
| 1973 | return; | |||
| 1974 | } | |||
| 1975 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
| 1976 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); | |||
| 1977 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1, tensor_binds); | |||
| 1978 | _ccv_cnnp_model_bind_tensors(accum, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1, tensor_binds); | |||
| 1979 | ccv_nnc_symbolic_graph_compile(accum, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, SYMBOLIC_GRAPH_SOURCES(accum)ccv_nnc_symbolic_graph_sources(accum), ccv_nnc_symbolic_graph_source_size (accum), SYMBOLIC_GRAPH_DESTINATIONS(accum)ccv_nnc_symbolic_graph_destinations(accum), ccv_nnc_symbolic_graph_destination_size (accum), &compiled_data->backward.accum, &compiled_data->backward.tensor_arena, &compiled_data->backward.graph_exec_arena); | |||
| 1980 | ccv_nnc_symbolic_graph_free(accum); | |||
| 1981 | ccv_array_free(tensor_binds); | |||
| 1982 | ccv_nnc_graph_set_default_static_schedule(compiled_data->backward.accum, compiled_data->stream_type, model->max_stream_count); | |||
| 1983 | } | |||
| 1984 | ||||
| 1985 | void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
| 1986 | { | |||
| 1987 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 1988 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 1988, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1989 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 1989, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1990 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 1991 | assert(ingrad_size == 0 || ingrad_size == model->output_size * parallel_count)((void) sizeof ((ingrad_size == 0 || ingrad_size == model-> output_size * parallel_count) ? 1 : 0), __extension__ ({ if ( ingrad_size == 0 || ingrad_size == model->output_size * parallel_count ) ; else __assert_fail ("ingrad_size == 0 || ingrad_size == model->output_size * parallel_count" , "ccv_cnnp_model.c", 1991, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1992 | if (outgrad_size > 0) | |||
| 1993 | { assert(outgrad_size == compiled_data->outgrad_size * parallel_count)((void) sizeof ((outgrad_size == compiled_data->outgrad_size * parallel_count) ? 1 : 0), __extension__ ({ if (outgrad_size == compiled_data->outgrad_size * parallel_count) ; else __assert_fail ("outgrad_size == compiled_data->outgrad_size * parallel_count" , "ccv_cnnp_model.c", 1993, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 1994 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 1994, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1995 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 1995, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1996 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 1997 | // If we need to accumulate the gradients now, do jit on accumulator. | |||
| 1998 | if (compiled_data->backward.count > 0) | |||
| 1999 | { | |||
| 2000 | if (!compiled_data->backward.accum) | |||
| 2001 | _ccv_cnnp_model_multistage_jit_1(model); | |||
| 2002 | else if (compiled_data->backward.count == 1) { | |||
| 2003 | // On this round, we need to switch accumulated gradients with gradients (so we can do accumulation properly). | |||
| 2004 | int i; | |||
| 2005 | for (i = 0; i < parameter_size * parallel_count; i++) | |||
| 2006 | { | |||
| 2007 | ccv_nnc_tensor_t* tensor; | |||
| 2008 | CCV_SWAP(compiled_data->tensors.accum_gradients[i], compiled_data->tensors.gradients[i], tensor)((tensor) = (compiled_data->tensors.accum_gradients[i]), ( compiled_data->tensors.accum_gradients[i]) = (compiled_data ->tensors.gradients[i]), (compiled_data->tensors.gradients [i]) = (tensor)); | |||
| 2009 | } | |||
| 2010 | if (compiled_data->backward.tensor_arena) | |||
| 2011 | { | |||
| 2012 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->backward.tensor_arena); | |||
| 2013 | // Do rebind in case we messed up the binding (we switch accum_gradients and gradients). | |||
| 2014 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.gradients, compiled_data->tensors.gradients, parameter_size * parallel_count, 1); | |||
| 2015 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); | |||
| 2016 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->backward.tensor_arena, 0, compiled_data->backward.updated_accum_gradients, compiled_data->tensors.accum_gradients, parameter_size * parallel_count, 1); | |||
| 2017 | } | |||
| 2018 | } | |||
| 2019 | } | |||
| 2020 | const int ingrad_size_per_p = model->output_size; | |||
| 2021 | const int outgrad_size_per_p = compiled_data->outgrad_size; | |||
| 2022 | int i, j; | |||
| 2023 | for (i = 0; i < ingrad_size_per_p; i++) | |||
| 2024 | { | |||
| 2025 | const ccv_nnc_tensor_symbol_t ingrad = ccv_nnc_tensor_symbol_for_backward(model->graph, compiled_data->f[i]); | |||
| 2026 | if (!ingrad_size || !ingrads || ingrads[i] == 0) | |||
| 2027 | { | |||
| 2028 | // Set it to 1 if it is not specified. | |||
| 2029 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ingrad); | |||
| 2030 | if (ingrad_tensor) | |||
| 2031 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); | |||
| 2032 | for (j = 1; j < parallel_count; j++) | |||
| 2033 | { | |||
| 2034 | ccv_nnc_tensor_t* const ingrad_tensor = ccv_nnc_tensor_from_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j)); | |||
| 2035 | if (ingrad_tensor) | |||
| 2036 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={1,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(ingrad_tensor)(ccv_nnc_tensor_t* []){ingrad_tensor}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), stream_context); | |||
| 2037 | } | |||
| 2038 | } else { | |||
| 2039 | // Make sure the length matches, in case it is an alias. | |||
| 2040 | assert(ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad)))((void) sizeof ((ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model-> graph, ingrad))) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count (ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params (model->graph, ingrad))) ; else __assert_fail ("ccv_nnc_tensor_count(ingrads[i]->info) == ccv_nnc_tensor_count(ccv_nnc_tensor_symbol_params(model->graph, ingrad))" , "ccv_cnnp_model.c", 2040, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2041 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ingrad, ingrads[i]); | |||
| 2042 | for (j = 1; j < parallel_count; j++) | |||
| 2043 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, ingrad, j), ingrads[i + ingrad_size_per_p * j]); | |||
| 2044 | } | |||
| 2045 | } | |||
| 2046 | if (outgrad_size > 0) | |||
| 2047 | { | |||
| 2048 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad")((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && "shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad" ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS && \"shouldn't pass disable_outgrad to ccv_cnnp_model_evaluate before if you plan to compute outgrad\"" , "ccv_cnnp_model.c", 2048, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2049 | for (i = 0; i < outgrad_size_per_p; i++) | |||
| 2050 | if (outgrads[i]) | |||
| 2051 | { | |||
| 2052 | const ccv_nnc_tensor_symbol_t outgrad = compiled_data->outgrads[i]; | |||
| 2053 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, outgrad, outgrads[i]); | |||
| 2054 | for (j = 1; j < parallel_count; j++) | |||
| 2055 | ccv_nnc_tensor_bind_symbol(compiled_data->tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, outgrad, j), outgrads[i + outgrad_size_per_p * j]); | |||
| 2056 | } | |||
| 2057 | } else { | |||
| 2058 | assert(compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES ||((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__ ); })) | |||
| 2059 | compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS)((void) sizeof ((compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ? 1 : 0), __extension__ ({ if (compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data ->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS ) ; else __assert_fail ("compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES || compiled_data->gradient_mode == CCV_CNNP_COMPILED_DATA_GRADIENT_TRAINABLES_AND_INPUTS" , "ccv_cnnp_model.c", 2059, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2060 | } | |||
| 2061 | // We need to rebind here because in ccv_cnnp_evaluate, we clear bindings, that will reset all bindings for the gradients. | |||
| 2062 | // For parameters and internals these are fine because when we clear bindings, it restores to original bindings, which are these | |||
| 2063 | // parameters and internals. The same cannot be said for gradients due to the accum_gradients switching. | |||
| 2064 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); | |||
| 2065 | if (!compiled_data->backward.schedule) | |||
| 2066 | compiled_data->backward.schedule = ccv_nnc_graph_static_schedule_new(compiled_data->graph, compiled_data->stream_type, model->max_stream_count, compiled_data->backward.from_ops, compiled_data->backward.from_op_size, 0, 0); | |||
| 2067 | // Run the backward pass. | |||
| 2068 | ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, compiled_data->backward.schedule, tensor_tape, stream_context); | |||
| 2069 | // If we need to run accumulation round, do that now. | |||
| 2070 | if (compiled_data->backward.count > 0) | |||
| 2071 | ccv_nnc_graph_run_with_schedule(compiled_data->backward.accum, 0, 0, 0, stream_context); | |||
| 2072 | // Update the count, this determines whether we need to accumulate or not. | |||
| 2073 | ++compiled_data->backward.count; | |||
| 2074 | } | |||
| 2075 | ||||
| 2076 | // Compile the graph to run ccv_cnnp_model_apply_gradients after ccv_cnnp_model_backward (MULTISTAGE_MODE). | |||
| 2077 | // Particularly, this method compiles the parameter update graph. | |||
| 2078 | static void _ccv_cnnp_model_multistage_jit_2(ccv_cnnp_model_t* const model) | |||
| 2079 | { | |||
| 2080 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2081 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 2081, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2082 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2083 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2084 | ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0); | |||
| 2085 | _ccv_cnnp_model_bind_tensors(model->graph, (ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, 0)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( 0))), compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
| 2086 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->updated_parameters, compiled_data->tensors.parameters, parameter_size, parallel_count, tensor_binds); | |||
| 2087 | // Bind accumulated gradients. | |||
| 2088 | if (compiled_data->backward.count > 1) | |||
| 2089 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count, tensor_binds); | |||
| 2090 | else | |||
| 2091 | _ccv_cnnp_model_bind_tensors(model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count, tensor_binds); | |||
| 2092 | ccv_array_t* const apply_gradients_from = ccv_array_new(sizeof(int), 0, 0); | |||
| 2093 | int i, j; | |||
| 2094 | for (i = 0; i < compiled_data->backward.to_size; i++) | |||
| 2095 | { | |||
| 2096 | const int* tos; | |||
| 2097 | int to_size; | |||
| 2098 | ccv_nnc_graph_exec_symbol_to(model->graph, compiled_data->backward.tos[i], &tos, &to_size); | |||
| 2099 | for (j = 0; j < to_size; j++) | |||
| 2100 | { | |||
| 2101 | // Check if this is already show up in the backward graph, if that is the case, it won't be in the apply | |||
| 2102 | // gradients graph. | |||
| 2103 | const ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(compiled_data->graph_exec_arena, (ccv_nnc_graph_exec_symbol_t){ | |||
| 2104 | .d = tos[j], | |||
| 2105 | .graph = model->graph, | |||
| 2106 | }); | |||
| 2107 | if (!exec.graph) | |||
| 2108 | ccv_array_add_unique_int(apply_gradients_from, tos[j]); | |||
| 2109 | } | |||
| 2110 | } | |||
| 2111 | const int from_size = apply_gradients_from->rnum; | |||
| 2112 | if (from_size == 0) | |||
| 2113 | { | |||
| 2114 | ccv_array_free(apply_gradients_from); | |||
| 2115 | ccv_array_free(tensor_binds); | |||
| 2116 | return; | |||
| 2117 | } | |||
| 2118 | ccv_nnc_graph_exec_symbol_t* const froms = (ccv_nnc_graph_exec_symbol_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * from_size); | |||
| 2119 | for (i = 0; i < from_size; i++) | |||
| 2120 | froms[i] = (ccv_nnc_graph_exec_symbol_t){ | |||
| 2121 | .d = *(int*)ccv_array_get(apply_gradients_from, i)((void*)(((char*)((apply_gradients_from)->data)) + (size_t )(apply_gradients_from)->rsize * (size_t)(i))), | |||
| 2122 | .graph = model->graph | |||
| 2123 | }; | |||
| 2124 | ccv_array_free(apply_gradients_from); | |||
| 2125 | // It can only ends with updates on the parameters. | |||
| 2126 | ccv_array_t* const tos = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), parameter_size * parallel_count, 0); | |||
| 2127 | for (i = 0; i < parameter_size; i++) | |||
| 2128 | { | |||
| 2129 | if (compiled_data->update_nodes[i].d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 2130 | continue; | |||
| 2131 | ccv_array_push(tos, &compiled_data->update_nodes[i]); | |||
| 2132 | for (j = 1; j < parallel_count; j++) | |||
| 2133 | { | |||
| 2134 | const ccv_nnc_graph_exec_symbol_t copy = ccv_nnc_graph_exec_symbol_copy(model->graph, compiled_data->update_nodes[i], j); | |||
| 2135 | ccv_array_push(tos, ©); | |||
| 2136 | } | |||
| 2137 | } | |||
| 2138 | ccv_nnc_symbolic_graph_compile(model->graph, compiled_data->compile_params, (ccv_nnc_tensor_bind_t*)ccv_array_get(tensor_binds, 0)((void*)(((char*)((tensor_binds)->data)) + (size_t)(tensor_binds )->rsize * (size_t)(0))), tensor_binds->rnum, 0, 0, froms, from_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(tos, 0)((void*)(((char*)((tos)->data)) + (size_t)(tos)->rsize * (size_t)(0))), tos->rnum, &compiled_data->apply_gradients.graph, &compiled_data->apply_gradients.tensor_arena, &compiled_data->apply_gradients.graph_exec_arena); | |||
| 2139 | ccv_array_free(tos); | |||
| 2140 | ccv_array_free(tensor_binds); | |||
| 2141 | ccfreefree(froms); | |||
| 2142 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
| 2143 | for (i = 0; i < max_saved_aux_size * parameter_size; i++) | |||
| 2144 | { | |||
| 2145 | // Skip on no tensor. | |||
| 2146 | if (compiled_data->saved_aux[i].source.d == CCV_NNC_NO_TENSOR_SYMBOL) | |||
| 2147 | continue; | |||
| 2148 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, compiled_data->saved_aux[i].source); | |||
| 2149 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, &tensor, 1, 0); | |||
| 2150 | for (j = 1; j < parallel_count; j++) | |||
| 2151 | { | |||
| 2152 | ccv_nnc_tensor_t* const copy = ccv_nnc_tensor_from_symbol(compiled_data->apply_gradients.tensor_arena, ccv_nnc_tensor_symbol_copy(model->graph, compiled_data->saved_aux[i].source, j)); | |||
| 2153 | if (copy) | |||
| 2154 | ccv_nnc_cmd_exec(CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), ccv_nnc_no_hint, 0, 0, 0, ©, 1, 0); | |||
| 2155 | } | |||
| 2156 | } | |||
| 2157 | ccv_nnc_graph_set_default_static_schedule(compiled_data->apply_gradients.graph, compiled_data->stream_type, model->max_stream_count); | |||
| 2158 | } | |||
| 2159 | ||||
| 2160 | void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context) | |||
| 2161 | { | |||
| 2162 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2163 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2163, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2164 | assert(compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE)((void) sizeof ((compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE ) ? 1 : 0), __extension__ ({ if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE) ; else __assert_fail ("compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE" , "ccv_cnnp_model.c", 2164, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2165 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2166 | assert(model->graph)((void) sizeof ((model->graph) ? 1 : 0), __extension__ ({ if (model->graph) ; else __assert_fail ("model->graph", "ccv_cnnp_model.c" , 2166, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2167 | assert(compiled_data->graph)((void) sizeof ((compiled_data->graph) ? 1 : 0), __extension__ ({ if (compiled_data->graph) ; else __assert_fail ("compiled_data->graph" , "ccv_cnnp_model.c", 2167, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2168 | // Skip if there is no backward pass. | |||
| 2169 | if (compiled_data->backward.count <= 0) | |||
| 2170 | return; | |||
| 2171 | // Skip if there is no parameters. | |||
| 2172 | if (compiled_data->parameters->rnum == 0) | |||
| 2173 | { | |||
| 2174 | compiled_data->backward.count = 0; | |||
| 2175 | return; | |||
| 2176 | } | |||
| 2177 | if (!compiled_data->apply_gradients.graph) | |||
| 2178 | _ccv_cnnp_model_multistage_jit_2(model); | |||
| 2179 | else { | |||
| 2180 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2181 | ccv_nnc_tensor_arena_clear_bindings(compiled_data->apply_gradients.tensor_arena); | |||
| 2182 | // Change to bind accum_gradients if we do gradient accumulation (run backward more than once). | |||
| 2183 | if (compiled_data->backward.count > 1) | |||
| 2184 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.accum_gradients, parameter_size, parallel_count); | |||
| 2185 | else | |||
| 2186 | _ccv_cnnp_bind_tensors_to_arena(compiled_data->apply_gradients.tensor_arena, model->graph, compiled_data->gradients, compiled_data->tensors.gradients, parameter_size, parallel_count); | |||
| 2187 | } | |||
| 2188 | if (compiled_data->apply_gradients.graph) | |||
| 2189 | ccv_nnc_graph_run_with_schedule(compiled_data->apply_gradients.graph, 0, 0, 0, stream_context); | |||
| 2190 | // Reset backward count to 0. | |||
| 2191 | compiled_data->backward.count = 0; | |||
| 2192 | } | |||
| 2193 | ||||
| 2194 | void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor) | |||
| 2195 | { | |||
| 2196 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2197 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
| 2198 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2198, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2199 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
| 2200 | if (!tensors_init) | |||
| 2201 | _ccv_cnnp_model_tensors_init(model, compiled_data); | |||
| 2202 | else if ((uintptr_t)compiled_data->tensors_init.v & (uintptr_t)1) | |||
| 2203 | // Check if it is not fully allocated, if it is not, init_1. | |||
| 2204 | ccv_cnnp_model_tensors_init_1(model, compiled_data); | |||
| 2205 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 2206 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
| 2207 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
| 2208 | if (param_ref < 0) | |||
| 2209 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2209 , __extension__ __PRETTY_FUNCTION__); })); } | |||
| 2210 | else | |||
| 2211 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2211, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2212 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
| 2213 | ccv_array_free(parameter_indices); | |||
| 2214 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2215 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2215 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 2216 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2216, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2217 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2218 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); | |||
| 2219 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2219, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2220 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)tensor)(ccv_nnc_tensor_t* []){(ccv_nnc_tensor_t*)tensor}, (1 +1 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1 ), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 2221 | int i; | |||
| 2222 | for (i = 1; i < parallel_count; i++) | |||
| 2223 | { | |||
| 2224 | ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d + i * parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d + i * parameter_size]) & ~(uintptr_t)1)); | |||
| 2225 | if (copy_tensor) | |||
| 2226 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 2227 | } | |||
| 2228 | // Mark this symbol as init'ed. | |||
| 2229 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, d)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( d))))->d; | |||
| 2230 | uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 2231 | init_v[s >> 5] |= (1u << (s & 0x1f)); | |||
| 2232 | } | |||
| 2233 | ||||
| 2234 | void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor) | |||
| 2235 | { | |||
| 2236 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2237 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
| 2238 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2238, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2239 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2239, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2240 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 2241 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
| 2242 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
| 2243 | if (param_ref < 0) | |||
| 2244 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2244 , __extension__ __PRETTY_FUNCTION__); })); } | |||
| 2245 | else | |||
| 2246 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2246, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2247 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
| 2248 | ccv_array_free(parameter_indices); | |||
| 2249 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2250 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2250 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 2251 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2251, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2252 | // We don't need to consider parallel_count, every parameter on each device is identical. | |||
| 2253 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); | |||
| 2254 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2254, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2255 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 2256 | } | |||
| 2257 | ||||
| 2258 | ccv_nnc_tensor_param_t ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter) | |||
| 2259 | { | |||
| 2260 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2261 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
| 2262 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2262, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2263 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2263, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2264 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 2265 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
| 2266 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
| 2267 | if (param_ref < 0) | |||
| 2268 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2268 , __extension__ __PRETTY_FUNCTION__); })); } | |||
| 2269 | else | |||
| 2270 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2270, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2271 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
| 2272 | ccv_array_free(parameter_indices); | |||
| 2273 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2274 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2274 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 2275 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2275, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2276 | // We don't need to consider parallel_count, every parameter on each device is identical. | |||
| 2277 | ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [d]) & ~(uintptr_t)1)); | |||
| 2278 | assert(tensor)((void) sizeof ((tensor) ? 1 : 0), __extension__ ({ if (tensor ) ; else __assert_fail ("tensor", "ccv_cnnp_model.c", 2278, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2279 | return tensor->info; | |||
| 2280 | } | |||
| 2281 | ||||
| 2282 | const char* ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter) | |||
| 2283 | { | |||
| 2284 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2285 | const int param_sel = parameter->param_sel > 0 ? parameter->param_sel - 1 : parameter->param_sel; | |||
| 2286 | assert(parameter->param_sel != 0)((void) sizeof ((parameter->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameter->param_sel != 0) ; else __assert_fail ("parameter->param_sel != 0" , "ccv_cnnp_model.c", 2286, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2287 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 2288 | ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices); | |||
| 2289 | const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref; | |||
| 2290 | if (param_ref < 0) | |||
| 2291 | { assert(parameter_indices->rnum == 1)((void) sizeof ((parameter_indices->rnum == 1) ? 1 : 0), __extension__ ({ if (parameter_indices->rnum == 1) ; else __assert_fail ("parameter_indices->rnum == 1", "ccv_cnnp_model.c", 2291 , __extension__ __PRETTY_FUNCTION__); })); } | |||
| 2292 | else | |||
| 2293 | { assert(param_ref < parameter_indices->rnum)((void) sizeof ((param_ref < parameter_indices->rnum) ? 1 : 0), __extension__ ({ if (param_ref < parameter_indices ->rnum) ; else __assert_fail ("param_ref < parameter_indices->rnum" , "ccv_cnnp_model.c", 2293, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2294 | const int d = *(int*)ccv_array_get(parameter_indices, param_ref >= 0 ? param_ref : 0)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref >= 0 ? param_ref : 0))); | |||
| 2295 | ccv_array_free(parameter_indices); | |||
| 2296 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2297 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_cnnp_model.c", 2297 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 2298 | assert(d < parameter_size)((void) sizeof ((d < parameter_size) ? 1 : 0), __extension__ ({ if (d < parameter_size) ; else __assert_fail ("d < parameter_size" , "ccv_cnnp_model.c", 2298, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2299 | return *(char**)ccv_array_get(compiled_data->ids.parameters, d)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(d))); | |||
| 2300 | } | |||
| 2301 | ||||
| 2302 | int ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model) | |||
| 2303 | { | |||
| 2304 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 2304, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2305 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2306 | return compiled_data->parameters->rnum; | |||
| 2307 | } | |||
| 2308 | ||||
| 2309 | uint64_t ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model) | |||
| 2310 | { | |||
| 2311 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 2311, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2312 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2313 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2314 | int i; | |||
| 2315 | const ccv_nnc_symbolic_graph_t* const graph = model->graph; | |||
| 2316 | uint64_t size = 0; | |||
| 2317 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
| 2318 | uint32_t* const init_v = tensors_init ? CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)) : 0; | |||
| 2319 | for (i = 0; i < parameter_size; i++) | |||
| 2320 | { | |||
| 2321 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | |||
| 2322 | if (tensors_init && compiled_data->tensors.parameters && (init_v[d >> 5] | (1u << (d & 0x1f))) && compiled_data->tensors.parameters[i]) | |||
| 2323 | { | |||
| 2324 | ccv_nnc_tensor_param_t params = compiled_data->tensors.parameters[i]->info; | |||
| 2325 | size += ccv_nnc_tensor_data_size(params); | |||
| 2326 | continue; | |||
| 2327 | } | |||
| 2328 | ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, (ccv_nnc_tensor_symbol_t){ | |||
| 2329 | .graph = graph, | |||
| 2330 | .d = d | |||
| 2331 | }); | |||
| 2332 | size += ccv_nnc_tensor_data_size(params); | |||
| 2333 | } | |||
| 2334 | return size; | |||
| 2335 | } | |||
| 2336 | ||||
| 2337 | int ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, int type) | |||
| 2338 | { | |||
| 2339 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 2339, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2340 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2341 | if (count != compiled_data->parameters->rnum) | |||
| 2342 | return 0; | |||
| 2343 | if (CCV_TENSOR_GET_DEVICE(type)((type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
| 2344 | CCV_TENSOR_SET_DEVICE_ID(type, 0)(type) = (((type) & ~0xfff00) | (((0) & 0xfff) << 8)); | |||
| 2345 | int i; | |||
| 2346 | // We don't need to consider parallel_count, every parameter on each device is identical. | |||
| 2347 | for (i = 0; i < count; i++) | |||
| 2348 | { | |||
| 2349 | ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i]; | |||
| 2350 | if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything. | |||
| 2351 | { | |||
| 2352 | tensors[i] = 0; | |||
| 2353 | continue; | |||
| 2354 | } | |||
| 2355 | tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1) ); | |||
| 2356 | if (tensor->info.type == type) | |||
| 2357 | tensors[i] = tensor; | |||
| 2358 | else { | |||
| 2359 | ccv_nnc_tensor_param_t info = tensor->info; | |||
| 2360 | info.type = type; | |||
| 2361 | tensors[i] = ccv_nnc_tensor_new(0, info, 0); // Create this tensor, don't initiate copy yet. | |||
| 2362 | } | |||
| 2363 | } | |||
| 2364 | for (i = 0; i < count; i++) | |||
| 2365 | { | |||
| 2366 | ccv_nnc_tensor_t* tensor = compiled_data->tensors.parameters[i]; | |||
| 2367 | if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything. | |||
| 2368 | continue; | |||
| 2369 | tensor = CCV_NNC_TENSOR(tensor)((ccv_nnc_tensor_t*)((uintptr_t)(tensor) & ~(uintptr_t)1) ); | |||
| 2370 | // Now initiate transfer. We should do this one on a stream. | |||
| 2371 | if (tensor->info.type != type) | |||
| 2372 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(tensors[i])(ccv_nnc_tensor_t* []){tensors[i]}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 2373 | } | |||
| 2374 | // Copy names and remove parameters. | |||
| 2375 | for (i = 0; i < count; i++) | |||
| 2376 | { | |||
| 2377 | ccv_nnc_tensor_t* const tensor = compiled_data->tensors.parameters[i]; | |||
| 2378 | if ((uintptr_t)tensor & (uintptr_t)1) // If it is not owned. We don't do anything. | |||
| 2379 | { | |||
| 2380 | names[i] = 0; | |||
| 2381 | continue; | |||
| 2382 | } | |||
| 2383 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); | |||
| 2384 | const size_t name_len = ccv_min(strnlen(name, 1023), 1023)({ typeof (strnlen(name, 1023)) _a = (strnlen(name, 1023)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; }); | |||
| 2385 | names[i] = ccmallocmalloc(name_len + 1); | |||
| 2386 | names[i][name_len] = 0; | |||
| 2387 | memcpy(names[i], name, name_len); | |||
| 2388 | if (tensor->info.type == type) | |||
| 2389 | compiled_data->tensors.parameters[i] = 0; // Only move when it is moved. | |||
| 2390 | } | |||
| 2391 | return 1; | |||
| 2392 | } | |||
| 2393 | ||||
| 2394 | KHASH_MAP_INIT_STR(ccv_cnnp_parameter_id, int)typedef struct kh_ccv_cnnp_parameter_id_s { khint_t n_buckets , size, n_occupied, upper_bound; khint32_t *flags; kh_cstr_t * keys; int *vals; } kh_ccv_cnnp_parameter_id_t; static inline __attribute__ ((__unused__)) kh_ccv_cnnp_parameter_id_t *kh_init_ccv_cnnp_parameter_id (void) { return (kh_ccv_cnnp_parameter_id_t*)calloc(1,sizeof( kh_ccv_cnnp_parameter_id_t)); } static inline __attribute__ ( (__unused__)) void kh_destroy_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h) { if (h) { free((void *)h->keys); free(h->flags); free ((void *)h->vals); free(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h) { if (h && h->flags) { memset(h->flags, 0xaa , ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->size = h->n_occupied = 0; } } static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_cnnp_parameter_id (const kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key) { if (h-> n_buckets) { khint_t k, i, last, mask, step = 0; mask = h-> n_buckets - 1; k = __ac_X31_hash_string(key); i = k & mask ; last = i; while (!((h->flags[i>>4]>>((i& 0xfU)<<1))&2) && (((h->flags[i>>4] >>((i&0xfU)<<1))&1) || !(strcmp(h->keys [i], key) == 0))) { i = (i + (++step)) & mask; if (i == last ) return h->n_buckets; } return ((h->flags[i>>4]>> ((i&0xfU)<<1))&3)? h->n_buckets : i; } else return 0; } static inline __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_parameter_id (kh_ccv_cnnp_parameter_id_t *h, khint_t new_n_buckets) { khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets )|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets) >>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets )|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets) >>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets ) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t)) ; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets ) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t)) ; if (h->n_buckets < new_n_buckets) { kh_cstr_t *new_keys = (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof (kh_cstr_t)); if (!new_keys) { free(new_flags); return -1; } h ->keys = new_keys; if (1) { int *new_vals = (int*)realloc( (void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals ) { free(new_flags); return -1; } h->vals = new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h-> flags[j>>4]>>((j&0xfU)<<1))&3) == 0 ) { kh_cstr_t key = h->keys[j]; int val; khint_t new_mask; new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h ->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while (1) { khint_t k, i, step = 0; k = __ac_X31_hash_string(key); i = k & new_mask; while (!((new_flags[i>>4]>> ((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask ; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<< 1))); if (i < h->n_buckets && ((h->flags[i>> 4]>>((i&0xfU)<<1))&3) == 0) { { kh_cstr_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1 ) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; } (h->flags[i>>4]|=1ul<<((i&0xfU)<<1) ); } else { h->keys[i] = key; if (1) h->vals[i] = val; break ; } } } } if (h->n_buckets > new_n_buckets) { h->keys = (kh_cstr_t*)realloc((void *)h->keys,new_n_buckets * sizeof (kh_cstr_t)); if (1) h->vals = (int*)realloc((void *)h-> vals,new_n_buckets * sizeof(int)); } free(h->flags); h-> flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied = h->size; h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__ ((__unused__)) khint_t kh_put_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, kh_cstr_t key, int *ret) { khint_t x; if (h->n_occupied >= h->upper_bound) { if (h->n_buckets > (h->size <<1)) { if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets - 1) < 0) { *ret = -1; return h->n_buckets; } } else if (kh_resize_ccv_cnnp_parameter_id(h, h->n_buckets + 1) < 0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site , last, mask = h->n_buckets - 1, step = 0; x = site = h-> n_buckets; k = __ac_X31_hash_string(key); i = k & mask; if (((h->flags[i>>4]>>((i&0xfU)<<1))& 2)) x = i; else { last = i; while (!((h->flags[i>>4] >>((i&0xfU)<<1))&2) && (((h->flags [i>>4]>>((i&0xfU)<<1))&1) || !(strcmp (h->keys[i], key) == 0))) { if (((h->flags[i>>4]>> ((i&0xfU)<<1))&1)) site = i; i = (i + (++step)) & mask; if (i == last) { x = site; break; } } if (x == h ->n_buckets) { if (((h->flags[i>>4]>>((i& 0xfU)<<1))&2) && site != h->n_buckets) x = site; else x = i; } } } if (((h->flags[x>>4]>> ((x&0xfU)<<1))&2)) { h->keys[x] = key; (h-> flags[x>>4]&=~(3ul<<((x&0xfU)<<1))) ; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h-> flags[x>>4]>>((x&0xfU)<<1))&1)) { h ->keys[x] = key; (h->flags[x>>4]&=~(3ul<< ((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret = 0; return x; } static inline __attribute__ ((__unused__)) void kh_del_ccv_cnnp_parameter_id(kh_ccv_cnnp_parameter_id_t *h, khint_t x) { if (x != h->n_buckets && !((h->flags[x>> 4]>>((x&0xfU)<<1))&3)) { (h->flags[x>> 4]|=1ul<<((x&0xfU)<<1)); --h->size; } } | |||
| 2395 | ||||
| 2396 | void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates) | |||
| 2397 | { | |||
| 2398 | assert(model->compiled_data)((void) sizeof ((model->compiled_data) ? 1 : 0), __extension__ ({ if (model->compiled_data) ; else __assert_fail ("model->compiled_data" , "ccv_cnnp_model.c", 2398, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2399 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2400 | int i; | |||
| 2401 | khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0; | |||
| 2402 | if (count != compiled_data->parameters->rnum) | |||
| 2403 | { | |||
| 2404 | id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id(); | |||
| 2405 | // Build the map between name and the index. | |||
| 2406 | for (i = 0; i < count; i++) | |||
| 2407 | { | |||
| 2408 | int ret; | |||
| 2409 | const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[i], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[i], &ret); | |||
| 2410 | assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret != 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2410 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 2411 | kh_val(id_map, k)((id_map)->vals[k]) = i; | |||
| 2412 | } | |||
| 2413 | } | |||
| 2414 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2415 | int* copy_back = 0; | |||
| 2416 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
| 2417 | if (!tensors_init) | |||
| 2418 | ccv_cnnp_model_tensors_init_0(model, compiled_data); | |||
| 2419 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2420 | uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 2421 | for (i = 0; i < parameter_size; i++) | |||
| 2422 | { | |||
| 2423 | int j = i; | |||
| 2424 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); | |||
| 2425 | if (i >= 0 || strncmp(name, names[i], 1023) != 0) | |||
| 2426 | { | |||
| 2427 | // Build the map. | |||
| 2428 | if (id_map == 0) | |||
| 2429 | { | |||
| 2430 | id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id(); | |||
| 2431 | for (j = 0; j < count; j++) | |||
| 2432 | { | |||
| 2433 | int ret; | |||
| 2434 | const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, names[j], &ret)kh_put_ccv_cnnp_parameter_id(id_map, names[j], &ret); | |||
| 2435 | assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret != 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2435 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 2436 | kh_val(id_map, k)((id_map)->vals[k]) = j; | |||
| 2437 | } | |||
| 2438 | } | |||
| 2439 | const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, name)kh_get_ccv_cnnp_parameter_id(id_map, name); | |||
| 2440 | if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip. | |||
| 2441 | continue; | |||
| 2442 | j = kh_val(id_map, k)((id_map)->vals[k]); | |||
| 2443 | } | |||
| 2444 | if (compiled_data->tensors.parameters[i]) // Cannot be a shared parameter to read. | |||
| 2445 | { assert(!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1))((void) sizeof ((!((uintptr_t)compiled_data->tensors.parameters [i] & (uintptr_t)1)) ? 1 : 0), __extension__ ({ if (!((uintptr_t )compiled_data->tensors.parameters[i] & (uintptr_t)1)) ; else __assert_fail ("!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)" , "ccv_cnnp_model.c", 2445, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2446 | const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))); | |||
| 2447 | ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter); | |||
| 2448 | if (CCV_TENSOR_GET_DEVICE(info.type)((info.type) & 0xfff00) == CCV_COMPUTE_DEVICE_ANY) | |||
| 2449 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 2450 | const int d = parameter.d; | |||
| 2451 | if (info.type == tensors[j]->info.type && invalidates) // Can move. | |||
| 2452 | { | |||
| 2453 | // Deallocate it if needed. | |||
| 2454 | if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)) | |||
| 2455 | if (compiled_data->tensors.parameters[i]) | |||
| 2456 | ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]); | |||
| 2457 | compiled_data->tensors.parameters[i] = tensors[j]; | |||
| 2458 | tensors[j] = 0; | |||
| 2459 | } else { | |||
| 2460 | if (!compiled_data->tensors.parameters[i]) | |||
| 2461 | { // Not allocated, to allocate first. | |||
| 2462 | // Create new one, make sure we create this by having the right parameters. | |||
| 2463 | const int type = info.type; | |||
| 2464 | info = tensors[j]->info; | |||
| 2465 | info.type = type; // Revert back the type. | |||
| 2466 | compiled_data->tensors.parameters[i] = ccv_nnc_tensor_new(0, info, 0); | |||
| 2467 | } | |||
| 2468 | if (!copy_back) | |||
| 2469 | copy_back = (int*)cccalloccalloc(parameter_size, sizeof(int)); | |||
| 2470 | copy_back[i] = j + 1; | |||
| 2471 | } | |||
| 2472 | init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
| 2473 | // Create this tensor for other data parallel allocations. | |||
| 2474 | info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info. | |||
| 2475 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type)(((info.type) & 0xfff00) >> 8); | |||
| 2476 | for (j = 1; j < parallel_count; j++) | |||
| 2477 | if (!compiled_data->tensors.parameters[i + j * parameter_size]) | |||
| 2478 | { | |||
| 2479 | if (j != device_id) | |||
| 2480 | CCV_TENSOR_SET_DEVICE_ID(info.type, j)(info.type) = (((info.type) & ~0xfff00) | (((j) & 0xfff ) << 8)); | |||
| 2481 | else | |||
| 2482 | CCV_TENSOR_SET_DEVICE_ID(info.type, 0)(info.type) = (((info.type) & ~0xfff00) | (((0) & 0xfff ) << 8)); | |||
| 2483 | compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0); | |||
| 2484 | } | |||
| 2485 | // No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method. | |||
| 2486 | } | |||
| 2487 | if (id_map) | |||
| 2488 | kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map); | |||
| 2489 | // Now do the transfer. | |||
| 2490 | if (copy_back) | |||
| 2491 | { | |||
| 2492 | for (i = 0; i < parameter_size; i++) | |||
| 2493 | { | |||
| 2494 | ccv_nnc_tensor_t* const tensor = CCV_NNC_TENSOR(compiled_data->tensors.parameters[i])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [i]) & ~(uintptr_t)1)); | |||
| 2495 | if (copy_back[i] == 0) | |||
| 2496 | continue; | |||
| 2497 | const int j = copy_back[i] - 1; | |||
| 2498 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(tensors[j])(ccv_nnc_tensor_t* []){tensors[j]}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(tensor)(ccv_nnc_tensor_t* []){tensor}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 2499 | } | |||
| 2500 | ccfreefree(copy_back); | |||
| 2501 | } | |||
| 2502 | } | |||
| 2503 | ||||
| 2504 | ccv_cnnp_model_io_t ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context) | |||
| 2505 | { | |||
| 2506 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2507 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2507, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2508 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2509 | int i; | |||
| 2510 | for (i = 0; i < parameter_size; i++) | |||
| 2511 | { | |||
| 2512 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); | |||
| 2513 | if (first(model, name, context)) | |||
| 2514 | return ccv_cnnp_model_parameters(model, -1, i); | |||
| 2515 | } | |||
| 2516 | return 0; | |||
| 2517 | } | |||
| 2518 | ||||
| 2519 | ccv_array_t* ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context) | |||
| 2520 | { | |||
| 2521 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2522 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2522, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2523 | ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_cnnp_model_io_t), 0, 0); | |||
| 2524 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2525 | int i; | |||
| 2526 | for (i = 0; i < parameter_size; i++) | |||
| 2527 | { | |||
| 2528 | const char* const name = *(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i))); | |||
| 2529 | if (filter(model, name, context)) | |||
| 2530 | { | |||
| 2531 | ccv_cnnp_model_io_t parameter = ccv_cnnp_model_parameters(model, -1, i); | |||
| 2532 | ccv_array_push(parameters, ¶meter); | |||
| 2533 | } | |||
| 2534 | } | |||
| 2535 | return parameters; | |||
| 2536 | ||||
| 2537 | } | |||
| 2538 | ||||
| 2539 | CCV_WARN_UNUSED(ccv_cnnp_model_io_t)ccv_cnnp_model_io_t __attribute__((warn_unused_result)) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model) | |||
| 2540 | { | |||
| 2541 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 2542 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 2542, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2543 | const int tensors_init = !!compiled_data->tensors_init.v; | |||
| 2544 | if (!tensors_init) // If nothing initialized, we return parameter 0. | |||
| 2545 | return ccv_cnnp_model_parameters(model, -1, 0); | |||
| 2546 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 2547 | int i; | |||
| 2548 | const uint32_t* const init_v = CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 2549 | for (i = 0; i < parameter_size; i++) | |||
| 2550 | { | |||
| 2551 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) + (size_t)(compiled_data->parameters)->rsize * (size_t)( i))))->d; | |||
| 2552 | if (!(init_v[d >> 5] & (1u << (d & 0x1f)))) | |||
| 2553 | return ccv_cnnp_model_parameters(model, -1, i); | |||
| 2554 | } | |||
| 2555 | return 0; | |||
| 2556 | } | |||
| 2557 | ||||
| 2558 | static ccv_array_t* _ccv_cnnp_model_parameter_indices(const ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int* const param_ref) | |||
| 2559 | { | |||
| 2560 | const int to_param_sel = parameters->param_sel > 0 ? parameters->param_sel - 1 : parameters->param_sel; | |||
| 2561 | assert(parameters->param_sel != 0)((void) sizeof ((parameters->param_sel != 0) ? 1 : 0), __extension__ ({ if (parameters->param_sel != 0) ; else __assert_fail ( "parameters->param_sel != 0", "ccv_cnnp_model.c", 2561, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2562 | ccv_array_t* const to_parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 2563 | ccv_cnnp_model_add_to_parameter_indices(parameters->model, to_param_sel, to_parameter_indices); | |||
| 2564 | *param_ref = parameters->param_ref > 0 ? parameters->param_ref - 1 : parameters->param_ref; | |||
| 2565 | return to_parameter_indices; | |||
| 2566 | } | |||
| 2567 | ||||
| 2568 | static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_array_t** const parameter_indices, int* const param_ref, ccv_array_t** const from_parameter_indices, int* const from_param_ref, const int only_init_0) | |||
| 2569 | { | |||
| 2570 | // If the model is not compiled yet. Compile them now. | |||
| 2571 | if (!model->graph) | |||
| 2572 | { | |||
| 2573 | model->graph = ccv_nnc_symbolic_graph_new(); | |||
| 2574 | assert(from_model->compiled_data)((void) sizeof ((from_model->compiled_data) ? 1 : 0), __extension__ ({ if (from_model->compiled_data) ; else __assert_fail ("from_model->compiled_data" , "ccv_cnnp_model.c", 2574, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2575 | const int input_size = from_model->input_size; | |||
| 2576 | ccv_nnc_tensor_param_t input_params[input_size]; | |||
| 2577 | int i; | |||
| 2578 | for (i = 0; i < input_size; i++) | |||
| 2579 | input_params[i] = ccv_nnc_tensor_symbol_params(from_model->graph, from_model->inputs[i]); | |||
| 2580 | _ccv_cnnp_model_compile(model, input_params, input_size, from_model->compiled_data->loss); | |||
| 2581 | model->parallel_count = from_model->parallel_count; | |||
| 2582 | model->memory_compression = from_model->memory_compression; | |||
| 2583 | model->memory_reduction = from_model->memory_reduction; | |||
| 2584 | model->gradient_checkpointing = from_model->gradient_checkpointing; | |||
| 2585 | model->compiled_data->stream_type = from_model->compiled_data->stream_type; | |||
| 2586 | model->compiled_data->minimize.minimizer = from_model->compiled_data->minimize.minimizer; | |||
| 2587 | model->compiled_data->minimize.max_saved_aux_size = from_model->compiled_data->minimize.max_saved_aux_size; | |||
| 2588 | } | |||
| 2589 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
| 2590 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2590, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2591 | const int to_tensors_init = !!to_compiled_data->tensors_init.v; | |||
| 2592 | if (!to_tensors_init) | |||
| 2593 | { | |||
| 2594 | if (only_init_0) | |||
| 2595 | ccv_cnnp_model_tensors_init_0(model, to_compiled_data); | |||
| 2596 | else | |||
| 2597 | _ccv_cnnp_model_tensors_init(model, to_compiled_data); | |||
| 2598 | } else if (!only_init_0 && (uintptr_t)to_compiled_data->tensors_init.v & (uintptr_t)1) | |||
| 2599 | // Check if it is not fully allocated, if it is not, init_1. | |||
| 2600 | ccv_cnnp_model_tensors_init_1(model, to_compiled_data); | |||
| 2601 | assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (to_compiled_data->tensors.parameters ) ; else __assert_fail ("to_compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2601, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2602 | *parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref); | |||
| 2603 | *from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref); | |||
| 2604 | if (*from_param_ref < 0 && *param_ref >= 0) | |||
| 2605 | { assert((*from_parameter_indices)->rnum == 1)((void) sizeof (((*from_parameter_indices)->rnum == 1) ? 1 : 0), __extension__ ({ if ((*from_parameter_indices)->rnum == 1) ; else __assert_fail ("(*from_parameter_indices)->rnum == 1" , "ccv_cnnp_model.c", 2605, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2606 | else if (*from_param_ref >= 0) | |||
| 2607 | { assert(*from_param_ref < (*from_parameter_indices)->rnum)((void) sizeof ((*from_param_ref < (*from_parameter_indices )->rnum) ? 1 : 0), __extension__ ({ if (*from_param_ref < (*from_parameter_indices)->rnum) ; else __assert_fail ("*from_param_ref < (*from_parameter_indices)->rnum" , "ccv_cnnp_model.c", 2607, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2608 | if (*param_ref < 0 && *from_param_ref >= 0) | |||
| 2609 | { assert((*parameter_indices)->rnum == 1)((void) sizeof (((*parameter_indices)->rnum == 1) ? 1 : 0) , __extension__ ({ if ((*parameter_indices)->rnum == 1) ; else __assert_fail ("(*parameter_indices)->rnum == 1", "ccv_cnnp_model.c" , 2609, __extension__ __PRETTY_FUNCTION__); })); } | |||
| 2610 | else if (*param_ref >= 0) | |||
| 2611 | { assert(*param_ref < (*parameter_indices)->rnum)((void) sizeof ((*param_ref < (*parameter_indices)->rnum ) ? 1 : 0), __extension__ ({ if (*param_ref < (*parameter_indices )->rnum) ; else __assert_fail ("*param_ref < (*parameter_indices)->rnum" , "ccv_cnnp_model.c", 2611, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2612 | } | |||
| 2613 | ||||
| 2614 | void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) | |||
| 2615 | { | |||
| 2616 | ccv_array_t* to_parameter_indices; | |||
| 2617 | int to_param_ref; | |||
| 2618 | ccv_array_t* from_parameter_indices; | |||
| 2619 | int from_param_ref; | |||
| 2620 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0); | |||
| 2621 | // Should be exactly the same tensor. | |||
| 2622 | if (to_param_ref < 0 && from_param_ref < 0) | |||
| 2623 | { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices ->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices ->rnum == to_parameter_indices->rnum) ; else __assert_fail ("from_parameter_indices->rnum == to_parameter_indices->rnum" , "ccv_cnnp_model.c", 2623, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2624 | // To models. | |||
| 2625 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
| 2626 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2626, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2627 | // From models. | |||
| 2628 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | |||
| 2629 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2630 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
| 2631 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1; | |||
| 2632 | int i, j; | |||
| 2633 | const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init. v) & ~(uintptr_t)1)); | |||
| 2634 | uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 2635 | for (i = 0; i < rnum; i++) | |||
| 2636 | { | |||
| 2637 | const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))); | |||
| 2638 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2638, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2639 | assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters ->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data ->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2639, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2640 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | |||
| 2641 | // If the original is not init'ed. We cannot copy from. | |||
| 2642 | if (!(from_init_v[s >> 5] & (1u << (s & 0x1f)))) | |||
| 2643 | continue; | |||
| 2644 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
| 2645 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2645, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2646 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2646, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2647 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d]) & ~(uintptr_t)1)); | |||
| 2648 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2648, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2649 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d]) & ~(uintptr_t)1)); | |||
| 2650 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2650, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2651 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(src)(ccv_nnc_tensor_t* []){src}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 2652 | for (j = 1; j < parallel_count; j++) | |||
| 2653 | { | |||
| 2654 | ccv_nnc_tensor_t* const copy_tensor = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
| 2655 | if (copy_tensor) | |||
| 2656 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, TENSOR_LIST(dest)(ccv_nnc_tensor_t* []){dest}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), TENSOR_LIST(copy_tensor)(ccv_nnc_tensor_t* []){copy_tensor}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), 0); | |||
| 2657 | } | |||
| 2658 | // Mark this symbol as init'ed. | |||
| 2659 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | |||
| 2660 | to_init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
| 2661 | } | |||
| 2662 | ccv_array_free(to_parameter_indices); | |||
| 2663 | ccv_array_free(from_parameter_indices); | |||
| 2664 | } | |||
| 2665 | ||||
| 2666 | void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context) | |||
| 2667 | { | |||
| 2668 | ccv_array_t* to_parameter_indices; | |||
| 2669 | int to_param_ref; | |||
| 2670 | ccv_array_t* from_parameter_indices; | |||
| 2671 | int from_param_ref; | |||
| 2672 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1); | |||
| 2673 | // Should be exactly the same tensor. | |||
| 2674 | if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0) | |||
| ||||
| 2675 | { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices ->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices ->rnum == to_parameter_indices->rnum) ; else __assert_fail ("from_parameter_indices->rnum == to_parameter_indices->rnum" , "ccv_cnnp_model.c", 2675, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2676 | // To models. | |||
| 2677 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
| 2678 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2678, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2679 | // From models. | |||
| 2680 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | |||
| 2681 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2682 | assert(parallel_count == ccv_max(from_model->parallel_count, 1))((void) sizeof ((parallel_count == ({ typeof (from_model-> parallel_count) _a = (from_model->parallel_count); typeof ( 1) _b = (1); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (parallel_count == ({ typeof (from_model->parallel_count ) _a = (from_model->parallel_count); typeof (1) _b = (1); ( _a > _b) ? _a : _b; })) ; else __assert_fail ("parallel_count == ccv_max(from_model->parallel_count, 1)" , "ccv_cnnp_model.c", 2682, __extension__ __PRETTY_FUNCTION__ ); })); // Should have the same parallel count can share parameters. | |||
| 2683 | const int from_parameter_size = from_compiled_data->parameters->rnum; | |||
| 2684 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
| 2685 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? to_parameter_indices->rnum : 1; | |||
| 2686 | int i, j; | |||
| 2687 | khash_t(ccv_cnnp_parameter_id)kh_ccv_cnnp_parameter_id_t* id_map = 0; | |||
| 2688 | char* updated_name = 0; | |||
| 2689 | const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init. v) & ~(uintptr_t)1)); | |||
| 2690 | uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 2691 | for (i = 0; i < rnum; i++) | |||
| 2692 | { | |||
| 2693 | int src_d = (from_param_ref >= 0 ? from_param_ref : i) < from_parameter_indices->rnum ? *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))) : from_parameter_size; | |||
| 2694 | // Need to figure out how to use the renamer here. | |||
| 2695 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
| 2696 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2696, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2697 | assert(dest_d < to_parameter_size)((void) sizeof ((dest_d < to_parameter_size) ? 1 : 0), __extension__ ({ if (dest_d < to_parameter_size) ; else __assert_fail ( "dest_d < to_parameter_size", "ccv_cnnp_model.c", 2697, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2698 | if (renamer
| |||
| 2699 | { | |||
| 2700 | const char* const src_name = (src_d
data)) + (size_t)(from_compiled_data->ids.parameters)-> rsize * (size_t)(src_d))) : 0; | |||
| 2701 | const char* const dest_name = *(char**)ccv_array_get(to_compiled_data->ids.parameters, dest_d)((void*)(((char*)((to_compiled_data->ids.parameters)->data )) + (size_t)(to_compiled_data->ids.parameters)->rsize * (size_t)(dest_d))); | |||
| 2702 | if (!updated_name
| |||
| 2703 | updated_name = (char*)ccmallocmalloc(1024); | |||
| 2704 | const size_t src_name_len = src_name
)); typeof (1023) _b = (1023); (_a < _b) ? _a : _b; }); | |||
| 2705 | if (src_name_len
| |||
| 2706 | memcpy(updated_name, src_name, src_name_len); | |||
| 2707 | updated_name[src_name_len] = 0; | |||
| 2708 | if (renamer(context, dest_name, updated_name, 1024) != 0) | |||
| 2709 | continue; // Skip this. | |||
| 2710 | if (src_name
| |||
| 2711 | { | |||
| 2712 | // Nothing changed. | |||
| 2713 | } else { | |||
| 2714 | if (!id_map
| |||
| 2715 | { | |||
| 2716 | id_map = kh_init(ccv_cnnp_parameter_id)kh_init_ccv_cnnp_parameter_id(); | |||
| 2717 | for (j = 0; j < from_parameter_size; j++) | |||
| 2718 | { | |||
| 2719 | int ret; | |||
| 2720 | const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(from_compiled_data->ids.parameters, j), &ret)kh_put_ccv_cnnp_parameter_id(id_map, *(char**)((void*)(((char *)((from_compiled_data->ids.parameters)->data)) + (size_t )(from_compiled_data->ids.parameters)->rsize * (size_t) (j))), &ret); | |||
| 2721 | assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret != 0) ; else __assert_fail ("ret != 0", "ccv_cnnp_model.c", 2721 , __extension__ __PRETTY_FUNCTION__); })); | |||
| 2722 | kh_val(id_map, k)((id_map)->vals[k]) = j; | |||
| 2723 | } | |||
| 2724 | } | |||
| 2725 | const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name)kh_get_ccv_cnnp_parameter_id(id_map, updated_name); | |||
| 2726 | if (k == kh_end(id_map)((id_map)->n_buckets)) // Cannot find the name, skip. | |||
| 2727 | continue; | |||
| 2728 | src_d = kh_val(id_map, k)((id_map)->vals[k]); | |||
| ||||
| 2729 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2729, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2730 | assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__ ({ if (src_d < from_parameter_size) ; else __assert_fail ( "src_d < from_parameter_size", "ccv_cnnp_model.c", 2730, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2731 | } | |||
| 2732 | } | |||
| 2733 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2733, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2734 | assert(src_d < from_parameter_size)((void) sizeof ((src_d < from_parameter_size) ? 1 : 0), __extension__ ({ if (src_d < from_parameter_size) ; else __assert_fail ( "src_d < from_parameter_size", "ccv_cnnp_model.c", 2734, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2735 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | |||
| 2736 | // If the original is not init'ed. We cannot share from. | |||
| 2737 | if (!(from_init_v[s >> 5] & (1u << (s & 0x1f)))) | |||
| 2738 | continue; | |||
| 2739 | for (j = 0; j < parallel_count; j++) | |||
| 2740 | { | |||
| 2741 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d + j * from_parameter_size]) & ~(uintptr_t )1)); | |||
| 2742 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2742, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2743 | ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size]; | |||
| 2744 | if (dest && !((uintptr_t)dest & (uintptr_t)1)) | |||
| 2745 | ccv_nnc_tensor_free(dest); | |||
| 2746 | to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1); | |||
| 2747 | } | |||
| 2748 | // Mark this symbol as init'ed. | |||
| 2749 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | |||
| 2750 | to_init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
| 2751 | } | |||
| 2752 | ccv_array_free(to_parameter_indices); | |||
| 2753 | ccv_array_free(from_parameter_indices); | |||
| 2754 | if (id_map) | |||
| 2755 | kh_destroy(ccv_cnnp_parameter_id, id_map)kh_destroy_ccv_cnnp_parameter_id(id_map); | |||
| 2756 | if (updated_name) | |||
| 2757 | ccfreefree(updated_name); | |||
| 2758 | // Mark it as incomplete so we will call init_1. | |||
| 2759 | if (ccv_cnnp_model_tensors_any_to_alloc(model, to_compiled_data)) | |||
| 2760 | to_compiled_data->tensors_init.v = (uint32_t*)((uintptr_t)to_compiled_data->tensors_init.v | (uintptr_t)1); | |||
| 2761 | else // Remove the flag. | |||
| 2762 | to_compiled_data->tensors_init.v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 2763 | } | |||
| 2764 | ||||
| 2765 | ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type) | |||
| 2766 | { | |||
| 2767 | if (!compiled_data->stream_map) | |||
| 2768 | compiled_data->stream_map = kh_init(stream_map)kh_init_stream_map(); | |||
| 2769 | int ret = 0; | |||
| 2770 | khiter_t k = kh_put(stream_map, compiled_data->stream_map, type, &ret)kh_put_stream_map(compiled_data->stream_map, type, &ret ); | |||
| 2771 | assert(ret >= 0)((void) sizeof ((ret >= 0) ? 1 : 0), __extension__ ({ if ( ret >= 0) ; else __assert_fail ("ret >= 0", "ccv_cnnp_model.c" , 2771, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2772 | ccv_nnc_stream_context_t* stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]); | |||
| 2773 | // If ret == 0, the key already exist, we can return directly, otherwise, create and return. | |||
| 2774 | if (ret != 0) | |||
| 2775 | { | |||
| 2776 | stream = ccv_nnc_stream_context_new(type); | |||
| 2777 | kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]) = stream; | |||
| 2778 | } | |||
| 2779 | return stream; | |||
| 2780 | } | |||
| 2781 | ||||
| 2782 | void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) | |||
| 2783 | { | |||
| 2784 | ccv_array_t* to_parameter_indices; | |||
| 2785 | int to_param_ref; | |||
| 2786 | ccv_array_t* from_parameter_indices; | |||
| 2787 | int from_param_ref; | |||
| 2788 | _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0); | |||
| 2789 | // Should be exactly the same tensor. | |||
| 2790 | if (to_param_ref < 0 && from_param_ref < 0) | |||
| 2791 | { assert(from_parameter_indices->rnum == to_parameter_indices->rnum)((void) sizeof ((from_parameter_indices->rnum == to_parameter_indices ->rnum) ? 1 : 0), __extension__ ({ if (from_parameter_indices ->rnum == to_parameter_indices->rnum) ; else __assert_fail ("from_parameter_indices->rnum == to_parameter_indices->rnum" , "ccv_cnnp_model.c", 2791, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 2792 | // To models. | |||
| 2793 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
| 2794 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2794, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2795 | // From models. | |||
| 2796 | const ccv_cnnp_compiled_data_t* const from_compiled_data = from_model->compiled_data; | |||
| 2797 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2798 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
| 2799 | const int rnum = (to_param_ref < 0 && from_param_ref < 0) ? from_parameter_indices->rnum : 1; | |||
| 2800 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2800, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2801 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2801, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2802 | int i, j; | |||
| 2803 | ccv_nnc_tensor_t* inputs[aux_in_size + 2]; | |||
| 2804 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | |||
| 2805 | for (i = 0; i < aux_in_size; i++) | |||
| 2806 | inputs[i + 2] = aux_ins[i]; | |||
| 2807 | for (i = 0; i < aux_out_size; i++) | |||
| 2808 | outputs[i + 1] = aux_outs[i]; | |||
| 2809 | const uint32_t* const from_init_v = CCV_NNC_INIT_V(from_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(from_compiled_data->tensors_init. v) & ~(uintptr_t)1)); | |||
| 2810 | uint32_t* const to_init_v = CCV_NNC_INIT_V(to_compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(to_compiled_data->tensors_init.v) & ~(uintptr_t)1)); | |||
| 2811 | for (i = 0; i < rnum; i++) | |||
| 2812 | { | |||
| 2813 | const int src_d = *(int*)ccv_array_get(from_parameter_indices,from_param_ref >= 0 ? from_param_ref : i)((void*)(((char*)((from_parameter_indices)->data)) + (size_t )(from_parameter_indices)->rsize * (size_t)(from_param_ref >= 0 ? from_param_ref : i))); | |||
| 2814 | assert(src_d >= 0)((void) sizeof ((src_d >= 0) ? 1 : 0), __extension__ ({ if (src_d >= 0) ; else __assert_fail ("src_d >= 0", "ccv_cnnp_model.c" , 2814, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2815 | assert(src_d < from_compiled_data->parameters->rnum)((void) sizeof ((src_d < from_compiled_data->parameters ->rnum) ? 1 : 0), __extension__ ({ if (src_d < from_compiled_data ->parameters->rnum) ; else __assert_fail ("src_d < from_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2815, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2816 | const int s = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(from_compiled_data->parameters, src_d)((void*)(((char*)((from_compiled_data->parameters)->data )) + (size_t)(from_compiled_data->parameters)->rsize * ( size_t)(src_d))))->d; | |||
| 2817 | // If the original is not init'ed. We cannot copy from. | |||
| 2818 | if (!(from_init_v[s >> 5] & (1u << (s & 0x1f)))) | |||
| 2819 | continue; | |||
| 2820 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
| 2821 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2821, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2822 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2822, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2823 | if (parallel_count > 1) | |||
| 2824 | { | |||
| 2825 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
| 2826 | ccv_nnc_stream_signal_t* signal; | |||
| 2827 | if (stream_context) | |||
| 2828 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
| 2829 | for (j = 0; j < parallel_count; j++) | |||
| 2830 | { | |||
| 2831 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
| 2832 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
| 2833 | if (!dest || !src) | |||
| 2834 | { | |||
| 2835 | streams[j] = 0; | |||
| 2836 | continue; | |||
| 2837 | } | |||
| 2838 | // At the moment, can only handle them on the same device. | |||
| 2839 | assert(CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type))((void) sizeof ((((src->info.type) & 0x3) == ((dest-> info.type) & 0x3)) ? 1 : 0), __extension__ ({ if (((src-> info.type) & 0x3) == ((dest->info.type) & 0x3)) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(src->info.type) == CCV_TENSOR_GET_MEMORY(dest->info.type)" , "ccv_cnnp_model.c", 2839, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2840 | assert(CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type))((void) sizeof (((((src->info.type) & 0xfff00) >> 8) == (((dest->info.type) & 0xfff00) >> 8)) ? 1 : 0), __extension__ ({ if ((((src->info.type) & 0xfff00 ) >> 8) == (((dest->info.type) & 0xfff00) >> 8)) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE_ID(src->info.type) == CCV_TENSOR_GET_DEVICE_ID(dest->info.type)" , "ccv_cnnp_model.c", 2840, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2841 | const int stream_type = CCV_TENSOR_GET_MEMORY(src->info.type)((src->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
| 2842 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(src->info.type)(((src->info.type) & 0xfff00) >> 8); | |||
| 2843 | int type = stream_type; | |||
| 2844 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
| 2845 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | |||
| 2846 | // Wait signal to finish. | |||
| 2847 | if (stream_context) | |||
| 2848 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
| 2849 | inputs[0] = outputs[0] = dest; | |||
| 2850 | inputs[1] = src; | |||
| 2851 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_0); | |||
| 2852 | if (stream_context) | |||
| 2853 | { | |||
| 2854 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
| 2855 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
| 2856 | } | |||
| 2857 | streams[j] = stream_0; | |||
| 2858 | } | |||
| 2859 | // If this should be blocking, blocking it. | |||
| 2860 | if (!stream_context) | |||
| 2861 | for (j = 0; j < parallel_count; j++) | |||
| 2862 | if (streams[j]) | |||
| 2863 | ccv_nnc_stream_context_wait(streams[j]); | |||
| 2864 | } else { | |||
| 2865 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d])((ccv_nnc_tensor_t*)((uintptr_t)(from_compiled_data->tensors .parameters[src_d]) & ~(uintptr_t)1)); | |||
| 2866 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 2866, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2867 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d]) & ~(uintptr_t)1)); | |||
| 2868 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2868, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2869 | inputs[0] = outputs[0] = dest; | |||
| 2870 | inputs[1] = src; | |||
| 2871 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 2, outputs, aux_out_size + 1, stream_context); | |||
| 2872 | } | |||
| 2873 | // Mark this symbol as init'ed. | |||
| 2874 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(to_compiled_data->parameters, dest_d)((void*)(((char*)((to_compiled_data->parameters)->data) ) + (size_t)(to_compiled_data->parameters)->rsize * (size_t )(dest_d))))->d; | |||
| 2875 | to_init_v[d >> 5] |= (1u << (d & 0x1f)); | |||
| 2876 | } | |||
| 2877 | ccv_array_free(to_parameter_indices); | |||
| 2878 | ccv_array_free(from_parameter_indices); | |||
| 2879 | } | |||
| 2880 | ||||
| 2881 | void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 2882 | { | |||
| 2883 | int to_param_ref; | |||
| 2884 | ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref); | |||
| 2885 | // To models. | |||
| 2886 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
| 2887 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2887, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2888 | // Tensor has to be inited already. | |||
| 2889 | assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 : 0), __extension__ ({ if (!!to_compiled_data->tensors_init .v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v" , "ccv_cnnp_model.c", 2889, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2890 | assert(to_compiled_data->tensors.parameters)((void) sizeof ((to_compiled_data->tensors.parameters) ? 1 : 0), __extension__ ({ if (to_compiled_data->tensors.parameters ) ; else __assert_fail ("to_compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 2890, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2891 | // From models. | |||
| 2892 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2893 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
| 2894 | const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1; | |||
| 2895 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2895, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2896 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2896, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2897 | int i, j; | |||
| 2898 | ccv_nnc_tensor_t* inputs[aux_in_size + 1]; | |||
| 2899 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | |||
| 2900 | for (i = 0; i < aux_in_size; i++) | |||
| 2901 | inputs[i + 1] = aux_ins[i]; | |||
| 2902 | for (i = 0; i < aux_out_size; i++) | |||
| 2903 | outputs[i + 1] = aux_outs[i]; | |||
| 2904 | for (i = 0; i < rnum; i++) | |||
| 2905 | { | |||
| 2906 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
| 2907 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2907, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2908 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2908, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2909 | if (parallel_count > 1) | |||
| 2910 | { | |||
| 2911 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
| 2912 | ccv_nnc_stream_signal_t* signal; | |||
| 2913 | if (stream_context) | |||
| 2914 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
| 2915 | for (j = 0; j < parallel_count; j++) | |||
| 2916 | { | |||
| 2917 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d + j * to_parameter_size]) & ~(uintptr_t )1)); | |||
| 2918 | if (!dest) | |||
| 2919 | { | |||
| 2920 | streams[j] = 0; | |||
| 2921 | continue; | |||
| 2922 | } | |||
| 2923 | const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
| 2924 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8); | |||
| 2925 | int type = stream_type; | |||
| 2926 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
| 2927 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | |||
| 2928 | // Wait signal to finish. | |||
| 2929 | if (stream_context) | |||
| 2930 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
| 2931 | inputs[0] = outputs[0] = dest; | |||
| 2932 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0); | |||
| 2933 | if (stream_context) | |||
| 2934 | { | |||
| 2935 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
| 2936 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
| 2937 | } | |||
| 2938 | streams[j] = stream_0; | |||
| 2939 | } | |||
| 2940 | // If this should be blocking, blocking it. | |||
| 2941 | if (!stream_context) | |||
| 2942 | for (j = 0; j < parallel_count; j++) | |||
| 2943 | if (streams[j]) | |||
| 2944 | ccv_nnc_stream_context_wait(streams[j]); | |||
| 2945 | } else { | |||
| 2946 | ccv_nnc_tensor_t* const dest = CCV_NNC_TENSOR(to_compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(to_compiled_data->tensors .parameters[dest_d]) & ~(uintptr_t)1)); | |||
| 2947 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 2947, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2948 | inputs[0] = outputs[0] = dest; | |||
| 2949 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context); | |||
| 2950 | } | |||
| 2951 | // No need to mark this symbol as init'ed, it is already. | |||
| 2952 | } | |||
| 2953 | ccv_array_free(to_parameter_indices); | |||
| 2954 | } | |||
| 2955 | ||||
| 2956 | void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 2957 | { | |||
| 2958 | int to_param_ref; | |||
| 2959 | ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref); | |||
| 2960 | // To models. | |||
| 2961 | ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; | |||
| 2962 | assert(to_compiled_data)((void) sizeof ((to_compiled_data) ? 1 : 0), __extension__ ({ if (to_compiled_data) ; else __assert_fail ("to_compiled_data" , "ccv_cnnp_model.c", 2962, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2963 | // Tensor has to be inited already. | |||
| 2964 | assert(!!to_compiled_data->tensors_init.v)((void) sizeof ((!!to_compiled_data->tensors_init.v) ? 1 : 0), __extension__ ({ if (!!to_compiled_data->tensors_init .v) ; else __assert_fail ("!!to_compiled_data->tensors_init.v" , "ccv_cnnp_model.c", 2964, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2965 | ccv_nnc_tensor_t** tensor_gradients; | |||
| 2966 | if (to_compiled_data->backward.count > 1) | |||
| 2967 | tensor_gradients = to_compiled_data->tensors.accum_gradients; | |||
| 2968 | else | |||
| 2969 | tensor_gradients = to_compiled_data->tensors.gradients; | |||
| 2970 | assert(tensor_gradients)((void) sizeof ((tensor_gradients) ? 1 : 0), __extension__ ({ if (tensor_gradients) ; else __assert_fail ("tensor_gradients" , "ccv_cnnp_model.c", 2970, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2971 | // From models. | |||
| 2972 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 2973 | const int to_parameter_size = to_compiled_data->parameters->rnum; | |||
| 2974 | const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1; | |||
| 2975 | assert(aux_in_size >= 0)((void) sizeof ((aux_in_size >= 0) ? 1 : 0), __extension__ ({ if (aux_in_size >= 0) ; else __assert_fail ("aux_in_size >= 0" , "ccv_cnnp_model.c", 2975, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2976 | assert(aux_out_size >= 0)((void) sizeof ((aux_out_size >= 0) ? 1 : 0), __extension__ ({ if (aux_out_size >= 0) ; else __assert_fail ("aux_out_size >= 0" , "ccv_cnnp_model.c", 2976, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2977 | int i, j; | |||
| 2978 | ccv_nnc_tensor_t* inputs[aux_in_size + 1]; | |||
| 2979 | ccv_nnc_tensor_t* outputs[aux_out_size + 1]; | |||
| 2980 | for (i = 0; i < aux_in_size; i++) | |||
| 2981 | inputs[i + 1] = aux_ins[i]; | |||
| 2982 | for (i = 0; i < aux_out_size; i++) | |||
| 2983 | outputs[i + 1] = aux_outs[i]; | |||
| 2984 | for (i = 0; i < rnum; i++) | |||
| 2985 | { | |||
| 2986 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
| 2987 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 2987, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2988 | assert(dest_d < to_compiled_data->parameters->rnum)((void) sizeof ((dest_d < to_compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < to_compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < to_compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 2988, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2989 | if (parallel_count > 1) | |||
| 2990 | { | |||
| 2991 | ccv_nnc_stream_context_t* streams[parallel_count]; | |||
| 2992 | ccv_nnc_stream_signal_t* signal; | |||
| 2993 | if (stream_context) | |||
| 2994 | signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
| 2995 | for (j = 0; j < parallel_count; j++) | |||
| 2996 | { | |||
| 2997 | ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d + j * to_parameter_size]; | |||
| 2998 | if (!dest) | |||
| 2999 | { | |||
| 3000 | streams[j] = 0; | |||
| 3001 | continue; | |||
| 3002 | } | |||
| 3003 | const int stream_type = CCV_TENSOR_GET_MEMORY(dest->info.type)((dest->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU; | |||
| 3004 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(dest->info.type)(((dest->info.type) & 0xfff00) >> 8); | |||
| 3005 | int type = stream_type; | |||
| 3006 | CCV_STREAM_SET_DEVICE_ID(type, device_id)(type) = (((type) & ~0xfff00) | (((device_id) & 0xfff ) << 8)); | |||
| 3007 | ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(to_compiled_data, type); | |||
| 3008 | // Wait signal to finish. | |||
| 3009 | if (stream_context) | |||
| 3010 | ccv_nnc_stream_context_wait_signal(stream_0, signal); | |||
| 3011 | inputs[0] = outputs[0] = dest; | |||
| 3012 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_0); | |||
| 3013 | if (stream_context) | |||
| 3014 | { | |||
| 3015 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0); | |||
| 3016 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
| 3017 | } | |||
| 3018 | streams[j] = stream_0; | |||
| 3019 | } | |||
| 3020 | // If this should be blocking, blocking it. | |||
| 3021 | if (!stream_context) | |||
| 3022 | for (j = 0; j < parallel_count; j++) | |||
| 3023 | if (streams[j]) | |||
| 3024 | ccv_nnc_stream_context_wait(streams[j]); | |||
| 3025 | } else { | |||
| 3026 | ccv_nnc_tensor_t* const dest = tensor_gradients[dest_d]; | |||
| 3027 | if (!dest) | |||
| 3028 | continue; | |||
| 3029 | assert(dest)((void) sizeof ((dest) ? 1 : 0), __extension__ ({ if (dest) ; else __assert_fail ("dest", "ccv_cnnp_model.c", 3029, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3030 | inputs[0] = outputs[0] = dest; | |||
| 3031 | ccv_nnc_cmd_exec(cmd, hint, flags, inputs, aux_in_size + 1, outputs, aux_out_size + 1, stream_context); | |||
| 3032 | } | |||
| 3033 | // No need to mark this symbol as init'ed, it is already. | |||
| 3034 | } | |||
| 3035 | ccv_array_free(to_parameter_indices); | |||
| 3036 | } | |||
| 3037 | ||||
| 3038 | void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context) | |||
| 3039 | { | |||
| 3040 | // Only CUDA backend has this feature. | |||
| 3041 | #ifdef HAVE_CUDA1 | |||
| 3042 | int to_param_ref; | |||
| 3043 | ccv_array_t* const to_parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, &to_param_ref); | |||
| 3044 | // To models. | |||
| 3045 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 3046 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 3046, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3047 | // Tensor has to be inited already. | |||
| 3048 | assert(!!compiled_data->tensors_init.v)((void) sizeof ((!!compiled_data->tensors_init.v) ? 1 : 0) , __extension__ ({ if (!!compiled_data->tensors_init.v) ; else __assert_fail ("!!compiled_data->tensors_init.v", "ccv_cnnp_model.c" , 3048, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3049 | assert(compiled_data->tensors.parameters)((void) sizeof ((compiled_data->tensors.parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->tensors.parameters) ; else __assert_fail ("compiled_data->tensors.parameters" , "ccv_cnnp_model.c", 3049, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3050 | // From models. | |||
| 3051 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 3052 | const int rnum = (to_param_ref < 0) ? to_parameter_indices->rnum : 1; | |||
| 3053 | int i; | |||
| 3054 | for (i = 0; i < rnum; i++) | |||
| 3055 | { | |||
| 3056 | const int dest_d = *(int*)ccv_array_get(to_parameter_indices, to_param_ref >= 0 ? to_param_ref : i)((void*)(((char*)((to_parameter_indices)->data)) + (size_t )(to_parameter_indices)->rsize * (size_t)(to_param_ref >= 0 ? to_param_ref : i))); | |||
| 3057 | assert(dest_d >= 0)((void) sizeof ((dest_d >= 0) ? 1 : 0), __extension__ ({ if (dest_d >= 0) ; else __assert_fail ("dest_d >= 0", "ccv_cnnp_model.c" , 3057, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3058 | assert(dest_d < compiled_data->parameters->rnum)((void) sizeof ((dest_d < compiled_data->parameters-> rnum) ? 1 : 0), __extension__ ({ if (dest_d < compiled_data ->parameters->rnum) ; else __assert_fail ("dest_d < compiled_data->parameters->rnum" , "ccv_cnnp_model.c", 3058, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3059 | if (parallel_count > 1) | |||
| 3060 | { | |||
| 3061 | assert(0 && "Cannot support this when data parallel is in effect.")((void) sizeof ((0 && "Cannot support this when data parallel is in effect." ) ? 1 : 0), __extension__ ({ if (0 && "Cannot support this when data parallel is in effect." ) ; else __assert_fail ("0 && \"Cannot support this when data parallel is in effect.\"" , "ccv_cnnp_model.c", 3061, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3062 | } else { | |||
| 3063 | ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(compiled_data->tensors.parameters[dest_d])((ccv_nnc_tensor_t*)((uintptr_t)(compiled_data->tensors.parameters [dest_d]) & ~(uintptr_t)1)); | |||
| 3064 | assert(src)((void) sizeof ((src) ? 1 : 0), __extension__ ({ if (src) ; else __assert_fail ("src", "ccv_cnnp_model.c", 3064, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3065 | ccv_nnc_tensor_param_t params = src->info; | |||
| 3066 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) != CCV_TENSOR_GPU_MEMORY) | |||
| 3067 | continue; | |||
| 3068 | const size_t size = ccv_nnc_tensor_data_size(params); | |||
| 3069 | if (size <= 0) | |||
| 3070 | continue; | |||
| 3071 | const int should_free = !((uintptr_t)compiled_data->tensors.parameters[dest_d] & (uintptr_t)1); | |||
| 3072 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0); | |||
| 3073 | ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t)); | |||
| 3074 | tensor->dataof = 0; | |||
| 3075 | tensor->alias_ref = 0; | |||
| 3076 | tensor->sig = 0; | |||
| 3077 | tensor->refcount = 1; | |||
| 3078 | tensor->info = params; | |||
| 3079 | if (tfb) | |||
| 3080 | { | |||
| 3081 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2]; | |||
| 3082 | // This corresponding to mat->step | |||
| 3083 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype ) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12 ] * (((((params.datatype) & 0xFF000) | params.dim[2])) & 0xFFF) + 3) & -4); | |||
| 3084 | } else // This won't be recognized by ccv_dense_matrix_t | |||
| 3085 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000); | |||
| 3086 | // Remove this flag so it can be deallocated as usual. | |||
| 3087 | tensor->type &= ~CCV_NO_DATA_ALLOC; | |||
| 3088 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_cnnp_model.c", 3088, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3089 | void* ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size); | |||
| 3090 | if (ptr) // If allocated successfully. Otherwise we go through the fallback path. | |||
| 3091 | { | |||
| 3092 | tensor->data.u8 = (uint8_t*)ptr; | |||
| 3093 | cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size); | |||
| 3094 | tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call. | |||
| 3095 | } else { | |||
| 3096 | // Allocation failed. | |||
| 3097 | ccfreefree(tensor); | |||
| 3098 | continue; | |||
| 3099 | } | |||
| 3100 | // TODO: Cannot run this on the stream context yet, due to allocation and deallocations. | |||
| 3101 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, &src, 1, &tensor, 1, 0); | |||
| 3102 | compiled_data->tensors.parameters[dest_d] = tensor; | |||
| 3103 | // Can free out the old one. | |||
| 3104 | if (should_free) | |||
| 3105 | ccv_nnc_tensor_free(src); | |||
| 3106 | } | |||
| 3107 | // No need to mark this symbol as init'ed, it is already. | |||
| 3108 | } | |||
| 3109 | ccv_array_free(to_parameter_indices); | |||
| 3110 | #endif | |||
| 3111 | } | |||
| 3112 | ||||
| 3113 | ccv_nnc_cmd_t ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model) | |||
| 3114 | { | |||
| 3115 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 3116 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 3116, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3117 | return compiled_data->minimize.minimizer; | |||
| 3118 | } | |||
| 3119 | ||||
| 3120 | void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const set_parameters, const int set_parameter_size) | |||
| 3121 | { | |||
| 3122 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 3123 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 3123, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3124 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 3125 | if (parameter_size == 0) | |||
| 3126 | return; | |||
| 3127 | if (reset) | |||
| 3128 | { assert(set_parameters == 0 && set_parameter_size == 0)((void) sizeof ((set_parameters == 0 && set_parameter_size == 0) ? 1 : 0), __extension__ ({ if (set_parameters == 0 && set_parameter_size == 0) ; else __assert_fail ("set_parameters == 0 && set_parameter_size == 0" , "ccv_cnnp_model.c", 3128, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 3129 | const int old_max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
| 3130 | const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(minimizer); | |||
| 3131 | if (saved_aux_size > compiled_data->minimize.max_saved_aux_size) | |||
| 3132 | compiled_data->minimize.max_saved_aux_size = saved_aux_size; | |||
| 3133 | const int max_saved_aux_size = compiled_data->minimize.max_saved_aux_size; | |||
| 3134 | // We update all parameters, at this point, we have one minimizer. | |||
| 3135 | if (set_parameters == 0 || set_parameter_size == 0) | |||
| 3136 | compiled_data->minimize.minimizer = minimizer; | |||
| 3137 | int i; | |||
| 3138 | if (set_parameters && set_parameter_size) | |||
| 3139 | { | |||
| 3140 | // I need to save what's the minimizer along with this. | |||
| 3141 | if (!compiled_data->minimize.parameters) | |||
| 3142 | compiled_data->minimize.parameters = ccv_array_new(sizeof(ccv_cnnp_set_minimizer_for_parameter_t*), 1, 0); | |||
| 3143 | ccv_cnnp_set_minimizer_for_parameter_t* const set_minimizer_for_parameter = ccmallocmalloc(sizeof(ccv_cnnp_set_minimizer_for_parameter_t) + (set_parameter_size - 1) * sizeof(ccv_cnnp_model_io_t)); | |||
| 3144 | set_minimizer_for_parameter->minimizer = minimizer; | |||
| 3145 | set_minimizer_for_parameter->parameter_size = set_parameter_size; | |||
| 3146 | memcpy(set_minimizer_for_parameter->parameters, set_parameters, sizeof(ccv_cnnp_model_io_t) * set_parameter_size); | |||
| 3147 | ccv_array_push(compiled_data->minimize.parameters, &set_minimizer_for_parameter); | |||
| 3148 | } | |||
| 3149 | // If reset is true, clear the parameters array. | |||
| 3150 | if (reset && compiled_data->minimize.parameters) | |||
| 3151 | { | |||
| 3152 | for (i = 0; i < compiled_data->minimize.parameters->rnum; i++) | |||
| 3153 | ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)-> data)) + (size_t)(compiled_data->minimize.parameters)-> rsize * (size_t)(i)))); | |||
| 3154 | ccv_array_clear(compiled_data->minimize.parameters); | |||
| 3155 | } | |||
| 3156 | if (!compiled_data->update_nodes) | |||
| 3157 | return; | |||
| 3158 | ccv_nnc_symbolic_graph_t* const symbolic_graph = model->graph; | |||
| 3159 | assert(symbolic_graph)((void) sizeof ((symbolic_graph) ? 1 : 0), __extension__ ({ if (symbolic_graph) ; else __assert_fail ("symbolic_graph", "ccv_cnnp_model.c" , 3159, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3160 | if (saved_aux_size > old_max_saved_aux_size) | |||
| 3161 | { | |||
| 3162 | assert(compiled_data->updated_parameters)((void) sizeof ((compiled_data->updated_parameters) ? 1 : 0 ), __extension__ ({ if (compiled_data->updated_parameters) ; else __assert_fail ("compiled_data->updated_parameters" , "ccv_cnnp_model.c", 3162, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3163 | // Reallocate first, move them around later. | |||
| 3164 | compiled_data->updated_parameters = (ccv_nnc_tensor_symbol_t*)ccreallocrealloc(compiled_data->updated_parameters, sizeof(ccv_nnc_tensor_symbol_t) * parameter_size + sizeof(ccv_nnc_graph_exec_symbol_t) * parameter_size + sizeof(ccv_nnc_tensor_symbol_map_t) * saved_aux_size * parameter_size); | |||
| 3165 | compiled_data->update_nodes = (ccv_nnc_graph_exec_symbol_t*)(compiled_data->updated_parameters + parameter_size); | |||
| 3166 | compiled_data->saved_aux = (ccv_nnc_tensor_symbol_map_t*)(compiled_data->update_nodes + parameter_size); | |||
| 3167 | // We need to do this from back to front because saved_aux_size > old_saved_aux_size, it could overlap. | |||
| 3168 | _ccv_cnnp_scatter_saved_aux(compiled_data->saved_aux, parameter_size, old_max_saved_aux_size, saved_aux_size); | |||
| 3169 | } | |||
| 3170 | int flag = 0; | |||
| 3171 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 3172 | if (set_parameters && set_parameter_size) | |||
| 3173 | { | |||
| 3174 | ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0); | |||
| 3175 | for (i = 0; i < set_parameter_size; i++) | |||
| 3176 | { | |||
| 3177 | const int param_sel = set_parameters[i]->param_sel > 0 ? set_parameters[i]->param_sel - 1 : set_parameters[i]->param_sel; | |||
| 3178 | assert(set_parameters[i]->param_sel != 0)((void) sizeof ((set_parameters[i]->param_sel != 0) ? 1 : 0 ), __extension__ ({ if (set_parameters[i]->param_sel != 0) ; else __assert_fail ("set_parameters[i]->param_sel != 0" , "ccv_cnnp_model.c", 3178, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3179 | const int old_rnum = parameter_indices->rnum; | |||
| 3180 | ccv_cnnp_model_add_to_parameter_indices(set_parameters[i]->model, param_sel, parameter_indices); | |||
| 3181 | const int param_ref = set_parameters[i]->param_ref > 0 ? set_parameters[i]->param_ref - 1 : set_parameters[i]->param_ref; | |||
| 3182 | assert(set_parameters[i]->param_ref != 0)((void) sizeof ((set_parameters[i]->param_ref != 0) ? 1 : 0 ), __extension__ ({ if (set_parameters[i]->param_ref != 0) ; else __assert_fail ("set_parameters[i]->param_ref != 0" , "ccv_cnnp_model.c", 3182, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3183 | if (param_ref >= 0) | |||
| 3184 | { | |||
| 3185 | assert(param_ref + old_rnum < parameter_indices->rnum)((void) sizeof ((param_ref + old_rnum < parameter_indices-> rnum) ? 1 : 0), __extension__ ({ if (param_ref + old_rnum < parameter_indices->rnum) ; else __assert_fail ("param_ref + old_rnum < parameter_indices->rnum" , "ccv_cnnp_model.c", 3185, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3186 | *(int*)ccv_array_get(parameter_indices, old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(old_rnum))) = *(int*)ccv_array_get(parameter_indices, param_ref + old_rnum)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(param_ref + old_rnum))); | |||
| 3187 | parameter_indices->rnum = old_rnum + 1; | |||
| 3188 | } | |||
| 3189 | } | |||
| 3190 | // We may have duplicated indices, but that is OK, we will set it twice. | |||
| 3191 | for (i = 0; i < parameter_indices->rnum; i++) | |||
| 3192 | { | |||
| 3193 | const int d = *(int*)ccv_array_get(parameter_indices, i)((void*)(((char*)((parameter_indices)->data)) + (size_t)(parameter_indices )->rsize * (size_t)(i))); | |||
| 3194 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, d)) | |||
| 3195 | flag = 1; | |||
| 3196 | } | |||
| 3197 | ccv_array_free(parameter_indices); | |||
| 3198 | } else { | |||
| 3199 | for (i = 0; i < parameter_size; i++) | |||
| 3200 | if (_ccv_cnnp_set_minimizer_for_parameter(symbolic_graph, compiled_data, compiled_data->update_nodes, compiled_data->updated_parameters, compiled_data->saved_aux, parallel_count, minimizer, saved_aux_size, max_saved_aux_size, i)) | |||
| 3201 | flag = 1; | |||
| 3202 | if (compiled_data->minimize.parameters) | |||
| 3203 | if (_ccv_cnnp_apply_parameters_with_minimizer(model)) | |||
| 3204 | flag = 1; | |||
| 3205 | } | |||
| 3206 | if (flag) | |||
| 3207 | { | |||
| 3208 | // If saved_aux_size doesn't match, we need to remove / add new saved_aux to the graph. But first, free up apply gradients graph. | |||
| 3209 | if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_FIT_MODE) | |||
| 3210 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
| 3211 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | |||
| 3212 | } | |||
| 3213 | } | |||
| 3214 | ||||
| 3215 | void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params) | |||
| 3216 | { | |||
| 3217 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 3218 | assert(compiled_data)((void) sizeof ((compiled_data) ? 1 : 0), __extension__ ({ if (compiled_data) ; else __assert_fail ("compiled_data", "ccv_cnnp_model.c" , 3218, __extension__ __PRETTY_FUNCTION__); })); | |||
| 3219 | compiled_data->compile_params = compile_params; | |||
| 3220 | } | |||
| 3221 | ||||
| 3222 | void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size) | |||
| 3223 | { | |||
| 3224 | if (model->graph && out_size > 0) | |||
| 3225 | ccv_nnc_symbolic_graph_dot(model->graph, flags, outs[0]); | |||
| 3226 | if (model->compiled_data && model->compiled_data->graph && out_size > 1) | |||
| 3227 | ccv_nnc_graph_dot(model->compiled_data->graph, flags, outs[1]); | |||
| 3228 | if (model->compiled_data && model->compiled_data->backward.accum && out_size > 2) | |||
| 3229 | ccv_nnc_graph_dot(model->compiled_data->backward.accum, flags, outs[2]); | |||
| 3230 | if (model->compiled_data && model->compiled_data->apply_gradients.graph && out_size > 3) | |||
| 3231 | ccv_nnc_graph_dot(model->compiled_data->apply_gradients.graph, flags, outs[3]); | |||
| 3232 | } | |||
| 3233 | ||||
| 3234 | void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context) | |||
| 3235 | { | |||
| 3236 | if (model->graph) | |||
| 3237 | ccv_nnc_symbolic_graph_format(model->graph, 0, 0, 0, 0, format_fn, context); | |||
| 3238 | } | |||
| 3239 | ||||
| 3240 | static void _ccv_cnnp_compiled_data_free(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data) | |||
| 3241 | { | |||
| 3242 | int i; | |||
| 3243 | const int parameter_size = compiled_data->parameters->rnum; | |||
| 3244 | ccv_array_free(compiled_data->parameters); | |||
| 3245 | if (compiled_data->parameter_flags) | |||
| 3246 | ccfreefree(compiled_data->parameter_flags); | |||
| 3247 | const int internal_size = compiled_data->internals->rnum; | |||
| 3248 | ccv_array_free(compiled_data->internals); | |||
| 3249 | assert(compiled_data->ids.parameters->rnum == parameter_size)((void) sizeof ((compiled_data->ids.parameters->rnum == parameter_size) ? 1 : 0), __extension__ ({ if (compiled_data ->ids.parameters->rnum == parameter_size) ; else __assert_fail ("compiled_data->ids.parameters->rnum == parameter_size" , "ccv_cnnp_model.c", 3249, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3250 | assert(compiled_data->ids.internals->rnum == internal_size)((void) sizeof ((compiled_data->ids.internals->rnum == internal_size ) ? 1 : 0), __extension__ ({ if (compiled_data->ids.internals ->rnum == internal_size) ; else __assert_fail ("compiled_data->ids.internals->rnum == internal_size" , "ccv_cnnp_model.c", 3250, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3251 | for (i = 0; i < parameter_size; i++) | |||
| 3252 | ccfreefree(*(char**)ccv_array_get(compiled_data->ids.parameters, i)((void*)(((char*)((compiled_data->ids.parameters)->data )) + (size_t)(compiled_data->ids.parameters)->rsize * ( size_t)(i)))); | |||
| 3253 | ccv_array_free(compiled_data->ids.parameters); | |||
| 3254 | for (i = 0; i < internal_size; i++) | |||
| 3255 | ccfreefree(*(char**)ccv_array_get(compiled_data->ids.internals, i)((void*)(((char*)((compiled_data->ids.internals)->data) ) + (size_t)(compiled_data->ids.internals)->rsize * (size_t )(i)))); | |||
| 3256 | ccv_array_free(compiled_data->ids.internals); | |||
| 3257 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 3258 | if (compiled_data->tensors.parameters) | |||
| 3259 | { | |||
| 3260 | for (i = 0; i < parameter_size * parallel_count; i++) | |||
| 3261 | // If it is not marked as not belonging, we can free it. | |||
| 3262 | if (!((uintptr_t)compiled_data->tensors.parameters[i] & (uintptr_t)1)) | |||
| 3263 | if (compiled_data->tensors.parameters[i]) | |||
| 3264 | ccv_nnc_tensor_free(compiled_data->tensors.parameters[i]); | |||
| 3265 | for (i = 0; i < internal_size * parallel_count; i++) | |||
| 3266 | if (compiled_data->tensors.internals[i]) | |||
| 3267 | ccv_nnc_tensor_free(compiled_data->tensors.internals[i]); | |||
| 3268 | ccfreefree(compiled_data->tensors.parameters); | |||
| 3269 | } | |||
| 3270 | if (compiled_data->tensors.gradients) | |||
| 3271 | { | |||
| 3272 | for (i = 0; i < parameter_size * parallel_count; i++) | |||
| 3273 | { | |||
| 3274 | if (compiled_data->tensors.gradients[i]) | |||
| 3275 | ccv_nnc_tensor_free(compiled_data->tensors.gradients[i]); | |||
| 3276 | if (compiled_data->tensors.accum_gradients[i]) | |||
| 3277 | ccv_nnc_tensor_free(compiled_data->tensors.accum_gradients[i]); | |||
| 3278 | } | |||
| 3279 | ccfreefree(compiled_data->tensors.gradients); | |||
| 3280 | } | |||
| 3281 | if (compiled_data->minimize.parameters) | |||
| 3282 | { | |||
| 3283 | for (i = 0; i < compiled_data->minimize.parameters->rnum; i++) | |||
| 3284 | ccfreefree(*(ccv_cnnp_set_minimizer_for_parameter_t**)ccv_array_get(compiled_data->minimize.parameters, i)((void*)(((char*)((compiled_data->minimize.parameters)-> data)) + (size_t)(compiled_data->minimize.parameters)-> rsize * (size_t)(i)))); | |||
| 3285 | ccv_array_free(compiled_data->minimize.parameters); | |||
| 3286 | } | |||
| 3287 | if (compiled_data->rewindables) | |||
| 3288 | ccv_array_free(compiled_data->rewindables); | |||
| 3289 | if (compiled_data->tensors_init.v) | |||
| 3290 | ccfreefree(CCV_NNC_INIT_V(compiled_data->tensors_init.v)((uint32_t*)((uintptr_t)(compiled_data->tensors_init.v) & ~(uintptr_t)1))); | |||
| 3291 | if (compiled_data->evaluate.tos) | |||
| 3292 | ccfreefree(compiled_data->evaluate.tos); | |||
| 3293 | compiled_data->evaluate.tos = 0; | |||
| 3294 | if (compiled_data->stream_map) | |||
| 3295 | { | |||
| 3296 | khiter_t k; | |||
| 3297 | for (k = kh_begin(compiled_data->stream_map)(khint_t)(0); k != kh_end(compiled_data->stream_map)((compiled_data->stream_map)->n_buckets); ++k) | |||
| 3298 | { | |||
| 3299 | if (!kh_exist(compiled_data->stream_map, k)(!(((compiled_data->stream_map)->flags[(k)>>4]>> (((k)&0xfU)<<1))&3))) | |||
| 3300 | continue; | |||
| 3301 | ccv_nnc_stream_context_t* const stream = kh_val(compiled_data->stream_map, k)((compiled_data->stream_map)->vals[k]); | |||
| 3302 | ccv_nnc_stream_context_free(stream); | |||
| 3303 | } | |||
| 3304 | kh_destroy(stream_map, compiled_data->stream_map)kh_destroy_stream_map(compiled_data->stream_map); | |||
| 3305 | } | |||
| 3306 | _ccv_cnnp_compiled_data_graph_free(compiled_data); | |||
| 3307 | _ccv_cnnp_compiled_data_gradient_free(compiled_data); | |||
| 3308 | _ccv_cnnp_compiled_data_backward_free(compiled_data); | |||
| 3309 | _ccv_cnnp_compiled_data_apply_gradients_free(compiled_data); | |||
| 3310 | if (compiled_data->gradient_checkpoints) | |||
| 3311 | { | |||
| 3312 | for (i = 0; i < compiled_data->gradient_checkpoints->rnum; i++) | |||
| 3313 | { | |||
| 3314 | ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(compiled_data->gradient_checkpoints, i)((void*)(((char*)((compiled_data->gradient_checkpoints)-> data)) + (size_t)(compiled_data->gradient_checkpoints)-> rsize * (size_t)(i))); | |||
| 3315 | assert(checkpoint->inputs)((void) sizeof ((checkpoint->inputs) ? 1 : 0), __extension__ ({ if (checkpoint->inputs) ; else __assert_fail ("checkpoint->inputs" , "ccv_cnnp_model.c", 3315, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 3316 | ccfreefree(checkpoint->inputs); | |||
| 3317 | ccv_array_free(checkpoint->tensor_symbols); | |||
| 3318 | } | |||
| 3319 | ccv_array_free(compiled_data->gradient_checkpoints); | |||
| 3320 | } | |||
| 3321 | ccv_nnc_xpu_alloc_destroy(&compiled_data->xpu_alloc); | |||
| 3322 | ccfreefree(compiled_data); | |||
| 3323 | } | |||
| 3324 | ||||
| 3325 | void ccv_cnnp_model_free(ccv_cnnp_model_t* const model) | |||
| 3326 | { | |||
| 3327 | ccv_cnnp_model_deinit(model); | |||
| 3328 | if (model->isa->dealloc) | |||
| 3329 | model->isa->dealloc(model); | |||
| 3330 | if (model->io) | |||
| 3331 | { | |||
| 3332 | int i; | |||
| 3333 | for (i = 0; i < model->io->rnum; i++) | |||
| 3334 | { | |||
| 3335 | ccv_cnnp_model_io_t model_io = *(ccv_cnnp_model_io_t*)ccv_array_get(model->io, i)((void*)(((char*)((model->io)->data)) + (size_t)(model-> io)->rsize * (size_t)(i))); | |||
| 3336 | if (model_io->outgoings) | |||
| 3337 | ccv_array_free(model_io->outgoings); | |||
| 3338 | if (model_io->incomings) | |||
| 3339 | ccv_array_free(model_io->incomings); | |||
| 3340 | if (model_io->dependencies) | |||
| 3341 | ccv_array_free(model_io->dependencies); | |||
| 3342 | ccfreefree(model_io); | |||
| 3343 | } | |||
| 3344 | ccv_array_free(model->io); | |||
| 3345 | } | |||
| 3346 | if (model->parameter_indices) | |||
| 3347 | ccv_array_free(model->parameter_indices); | |||
| 3348 | if (model->inputs) | |||
| 3349 | ccfreefree(model->inputs); | |||
| 3350 | if (model->graph) | |||
| 3351 | ccv_nnc_symbolic_graph_free(model->graph); | |||
| 3352 | if (model->compiled_data) | |||
| 3353 | _ccv_cnnp_compiled_data_free(model, model->compiled_data); | |||
| 3354 | if (model->name) | |||
| 3355 | ccfreefree(model->name); | |||
| 3356 | ccfreefree(model); | |||
| 3357 | } | |||
| 3358 | ||||
| 3359 | void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model) | |||
| 3360 | { | |||
| 3361 | ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data; | |||
| 3362 | if (!compiled_data) | |||
| 3363 | return; | |||
| 3364 | if (compiled_data->graph) | |||
| 3365 | ccv_nnc_graph_cancel(compiled_data->graph); | |||
| 3366 | if (compiled_data->apply_gradients.graph) | |||
| 3367 | ccv_nnc_graph_cancel(compiled_data->apply_gradients.graph); | |||
| 3368 | } | |||
| 3369 | ||||
| 3370 | void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags) | |||
| 3371 | { | |||
| 3372 | model->exec_flags = flags; | |||
| 3373 | } | |||
| 3374 | ||||
| 3375 | int ccv_cnnp_model_flags(ccv_cnnp_model_t* const model) | |||
| 3376 | { | |||
| 3377 | return model->exec_flags; | |||
| 3378 | } |