| File: | nnc/ccv_nnc_dynamic_graph_evaluate.c |
| Warning: | line 179, column 2 Declared variable-length array (VLA) has negative size |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" | |||
| 2 | #include "ccv_nnc_easy.h" | |||
| 3 | #include "ccv_nnc_internal.h" | |||
| 4 | #include "ccv_nnc_easy.h" | |||
| 5 | #include "ccv_internal.h" | |||
| 6 | #include "_ccv_nnc_dynamic_graph.h" | |||
| 7 | #include "_ccv_cnnp_model.h" | |||
| 8 | ||||
| 9 | // MARK - Level-5.5 API | |||
| 10 | ||||
| 11 | static int _ccv_cnnp_model_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 12 | { | |||
| 13 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
| 14 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | |||
| 15 | // I cannot just use stream context, it cannot synchronize correctly based on existing coroutine implementation. | |||
| 16 | int i; | |||
| 17 | int wait_for_any_neighbor = 0; | |||
| 18 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 19 | if (stream_context) // Find all neighbor context and wait on them all. | |||
| 20 | for (i = 0; i < parallel_count; i++) | |||
| 21 | { | |||
| 22 | ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i); | |||
| 23 | if (neighbor_context && neighbor_context != stream_context) | |||
| 24 | { | |||
| 25 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(neighbor_context); | |||
| 26 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
| 27 | wait_for_any_neighbor = 1; | |||
| 28 | } | |||
| 29 | } | |||
| 30 | co_scheduler_t* old_scheduler; | |||
| 31 | co_routine_t* old_main; | |||
| 32 | if (stream_context) | |||
| 33 | { | |||
| 34 | old_main = stream_context->main; | |||
| 35 | old_scheduler = stream_context->scheduler; | |||
| 36 | // We cannot piggyback on old scheduler. | |||
| 37 | stream_context->scheduler = 0; | |||
| 38 | // We will have a new main coroutine when schedule as the root. | |||
| 39 | // Otherwise it will be scheduled after the existing routines all scheduled | |||
| 40 | // out, and that won't be right. | |||
| 41 | stream_context->main = 0; | |||
| 42 | } | |||
| 43 | if (cmd.cmd == CCV_NNC_CUSTOM_FORWARD) | |||
| 44 | { | |||
| 45 | ccv_cnnp_model_evaluate(model, (ccv_cnnp_evaluate_param_t){ | |||
| 46 | .requires_grad = stateful_exec->requires_grad, | |||
| 47 | .disable_outgrad = stateful_exec->disable_outgrad, | |||
| 48 | .is_test = stateful_exec->is_test, | |||
| 49 | }, inputs, input_size, outputs, output_size, 0, stream_context); | |||
| 50 | } else { | |||
| 51 | const int ingrad_size = model->output_size * parallel_count; | |||
| 52 | assert(ingrad_size <= input_size)((void) sizeof ((ingrad_size <= input_size) ? 1 : 0), __extension__ ({ if (ingrad_size <= input_size) ; else __assert_fail ("ingrad_size <= input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 52, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 53 | if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) | |||
| 54 | ccv_cnnp_model_backward(model, inputs, ingrad_size, outputs, output_size, 0, stream_context); | |||
| 55 | else if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL) | |||
| 56 | ccv_cnnp_model_backward(model, inputs, ingrad_size, 0, 0, 0, stream_context); | |||
| 57 | else { | |||
| 58 | assert(output_size == model->input_size * parallel_count)((void) sizeof ((output_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->input_size * parallel_count) ; else __assert_fail ("output_size == model->input_size * parallel_count" , "ccv_nnc_dynamic_graph_evaluate.c", 58, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 59 | int per_outgrad_size = 0; | |||
| 60 | int i, j, k; | |||
| 61 | for (i = 0; i < model->input_size; i++) | |||
| 62 | if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << i))) | |||
| 63 | ++per_outgrad_size; | |||
| 64 | assert(per_outgrad_size > 0)((void) sizeof ((per_outgrad_size > 0) ? 1 : 0), __extension__ ({ if (per_outgrad_size > 0) ; else __assert_fail ("per_outgrad_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 64, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 65 | const int outgrad_size = per_outgrad_size * parallel_count; | |||
| 66 | ccv_nnc_tensor_t* outgrads[outgrad_size]; | |||
| 67 | for (i = 0; i < parallel_count; i++) | |||
| 68 | for (k = 0, j = 0; j < model->input_size; j++) | |||
| 69 | if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << j))) | |||
| 70 | outgrads[(k++) + i * per_outgrad_size] = outputs[j + i * model->input_size]; | |||
| 71 | ccv_cnnp_model_backward(model, inputs, ingrad_size, outgrads, outgrad_size, 0, stream_context); | |||
| 72 | } | |||
| 73 | stateful_exec->did_backward_but_not_apply_gradients = 1; | |||
| 74 | } | |||
| 75 | if (stream_context) | |||
| 76 | { | |||
| 77 | // Should have new scheduler created. | |||
| 78 | assert(stream_context->scheduler)((void) sizeof ((stream_context->scheduler) ? 1 : 0), __extension__ ({ if (stream_context->scheduler) ; else __assert_fail ("stream_context->scheduler" , "ccv_nnc_dynamic_graph_evaluate.c", 78, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 79 | // The new scheduler shouldn't be active (everything is scheduled). | |||
| 80 | assert(!co_scheduler_is_active(stream_context->scheduler))((void) sizeof ((!co_scheduler_is_active(stream_context->scheduler )) ? 1 : 0), __extension__ ({ if (!co_scheduler_is_active(stream_context ->scheduler)) ; else __assert_fail ("!co_scheduler_is_active(stream_context->scheduler)" , "ccv_nnc_dynamic_graph_evaluate.c", 80, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 81 | co_scheduler_free(stream_context->scheduler); | |||
| 82 | // Switch back to the old scheduler. | |||
| 83 | stream_context->scheduler = old_scheduler; | |||
| 84 | // The main coroutine should be cleared. | |||
| 85 | assert(!stream_context->main)((void) sizeof ((!stream_context->main) ? 1 : 0), __extension__ ({ if (!stream_context->main) ; else __assert_fail ("!stream_context->main" , "ccv_nnc_dynamic_graph_evaluate.c", 85, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 86 | stream_context->main = old_main; | |||
| 87 | } | |||
| 88 | if (wait_for_any_neighbor) // Find all neighbor context and wait on them all. | |||
| 89 | { | |||
| 90 | assert(stream_context)((void) sizeof ((stream_context) ? 1 : 0), __extension__ ({ if (stream_context) ; else __assert_fail ("stream_context", "ccv_nnc_dynamic_graph_evaluate.c" , 90, __extension__ __PRETTY_FUNCTION__); })); | |||
| 91 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
| 92 | for (i = 0; i < parallel_count; i++) | |||
| 93 | { | |||
| 94 | ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i); | |||
| 95 | if (neighbor_context && neighbor_context != stream_context) | |||
| 96 | ccv_nnc_stream_context_wait_signal(neighbor_context, signal); | |||
| 97 | } | |||
| 98 | } | |||
| 99 | return CCV_NNC_EXEC_SUCCESS; | |||
| 100 | } | |||
| 101 | ||||
| 102 | static void _ccv_cnnp_model_tensor_auto(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size) | |||
| 103 | { | |||
| 104 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
| 105 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | |||
| 106 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 107 | const int per_input_size = input_size / parallel_count; | |||
| 108 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 108, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 109 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 109, __extension__ __PRETTY_FUNCTION__); })); | |||
| 110 | const int per_output_size = output_size / parallel_count; | |||
| 111 | assert(per_output_size > 0)((void) sizeof ((per_output_size > 0) ? 1 : 0), __extension__ ({ if (per_output_size > 0) ; else __assert_fail ("per_output_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 111, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 112 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 112, __extension__ __PRETTY_FUNCTION__); })); | |||
| 113 | int i, j; | |||
| 114 | for (i = 0; i < parallel_count; i++) | |||
| 115 | { | |||
| 116 | ccv_cnnp_model_tensor_auto(model, outputs + i * per_output_size, per_output_size); | |||
| 117 | // Set device id to the corresponding inputs' device id. | |||
| 118 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[i * per_input_size].type)(((inputs[i * per_input_size].type) & 0xfff00) >> 8 ); | |||
| 119 | for (j = 0; j < per_output_size; j++) | |||
| 120 | CCV_TENSOR_SET_DEVICE_ID(outputs[i * per_output_size + j].type, device_id)(outputs[i * per_output_size + j].type) = (((outputs[i * per_output_size + j].type) & ~0xfff00) | (((device_id) & 0xfff) << 8)); | |||
| 121 | } | |||
| 122 | } | |||
| 123 | ||||
| 124 | static void _ccv_cnnp_model_apply_gradients(const ccv_nnc_cmd_t cmd, ccv_nnc_stream_context_t* const stream_context) | |||
| 125 | { | |||
| 126 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
| 127 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | |||
| 128 | ccv_cnnp_model_apply_gradients(model, stream_context); | |||
| 129 | } | |||
| 130 | ||||
| 131 | static ccv_nnc_stateful_cmd_vtab_t ccv_cnnp_model_exec_isa = { | |||
| 132 | .super = { | |||
| 133 | .exec = _ccv_cnnp_model_exec, | |||
| 134 | .tensor_auto = _ccv_cnnp_model_tensor_auto, | |||
| 135 | }, | |||
| 136 | .apply_gradients = _ccv_cnnp_model_apply_gradients, | |||
| 137 | }; | |||
| 138 | ||||
| 139 | void ccv_nnc_dynamic_graph_dry_run(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 140 | { | |||
| 141 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 141, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| ||||
| 142 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 143 | const int per_input_size = input_size / parallel_count; | |||
| 144 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 144, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 145 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 145, __extension__ __PRETTY_FUNCTION__); })); | |||
| 146 | int i, j; | |||
| 147 | if (!model->graph) | |||
| 148 | { | |||
| 149 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
| 150 | for (i = 0; i
| |||
| 151 | input_params[i] = inputs[i]->info; | |||
| 152 | ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
| 153 | } else { | |||
| 154 | assert(per_input_size == model->input_size)((void) sizeof ((per_input_size == model->input_size) ? 1 : 0), __extension__ ({ if (per_input_size == model->input_size ) ; else __assert_fail ("per_input_size == model->input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 154, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 155 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
| 156 | int flag = 0; | |||
| 157 | for (i = 0; i < per_input_size; i++) | |||
| 158 | { | |||
| 159 | input_params[i] = inputs[i]->info; | |||
| 160 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]); | |||
| 161 | // If these two parameters doesn't match, recompile the graph.. | |||
| 162 | if (memcmp(¶ms, &input_params[i], sizeof(params)) != 0) | |||
| 163 | flag = 1; | |||
| 164 | } | |||
| 165 | if (flag) // Recompile the graph. | |||
| 166 | ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
| 167 | } | |||
| 168 | ccv_nnc_tensor_t* input_tensors[input_size]; | |||
| 169 | for (i = 0; i < input_size; i++) | |||
| 170 | { | |||
| 171 | // Cannot have the parameter be a partial tensor view for model evaluation. | |||
| 172 | input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, inputs[i], stream_context ) : 0; | |||
| 173 | if (input_tensors[i]) | |||
| 174 | { assert(CCV_IS_TENSOR_CONTIGUOUS(input_tensors[i]))((void) sizeof (((!((*(int*)(input_tensors[i])) & CCV_TENSOR_VIEW ) || (((ccv_nnc_tensor_view_t*)input_tensors[i])->contiguous == 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(input_tensors [i])) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)input_tensors [i])->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(input_tensors[i])" , "ccv_nnc_dynamic_graph_evaluate.c", 174, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 175 | } | |||
| 176 | const int per_output_size = ccv_cnnp_model_output_size(model); | |||
| 177 | ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
| 178 | const int output_size = per_output_size * parallel_count; | |||
| 179 | ccv_nnc_tensor_variable_t outputs[output_size]; | |||
| ||||
| 180 | ccv_nnc_tensor_t* output_tensors[output_size]; | |||
| 181 | for (i = 0; i < parallel_count; i++) | |||
| 182 | { | |||
| 183 | for (j = 0; j < per_output_size; j++) | |||
| 184 | output_params[j] = ccv_nnc_tensor_auto; | |||
| 185 | ccv_cnnp_model_tensor_auto(model, output_params, per_output_size); | |||
| 186 | for (j = 0; j < per_output_size; j++) | |||
| 187 | if (!ccv_nnc_is_tensor_auto(output_params[j])) | |||
| 188 | { | |||
| 189 | outputs[i * per_output_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, output_params[j])ccv_nnc_tensor_variable_new_impl(dynamic_graph, output_params [j]); | |||
| 190 | output_tensors[i * per_output_size + j] = ccv_nnc_tensor_from_variable(dynamic_graph, outputs[i * per_output_size + j], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, outputs[i * per_output_size + j], stream_context); | |||
| 191 | } else { | |||
| 192 | outputs[i * per_output_size + j] = 0; | |||
| 193 | output_tensors[i * per_output_size + j] = 0; | |||
| 194 | } | |||
| 195 | } | |||
| 196 | if (dynamic_graph->no_grad) | |||
| 197 | { | |||
| 198 | ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){ | |||
| 199 | .requires_grad = 0, | |||
| 200 | .disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL, | |||
| 201 | .is_test = is_test, | |||
| 202 | }, input_tensors, input_size, output_tensors, output_size); | |||
| 203 | } else { | |||
| 204 | uint64_t disable_outgrad = 0; | |||
| 205 | int count = 0; | |||
| 206 | for (i = 0; i < per_input_size; i++) | |||
| 207 | if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT) | |||
| 208 | { | |||
| 209 | disable_outgrad |= ((uint64_t)1 << i); | |||
| 210 | ++count; | |||
| 211 | } | |||
| 212 | if (count == per_input_size) | |||
| 213 | disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL; | |||
| 214 | ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){ | |||
| 215 | .requires_grad = 1, | |||
| 216 | .disable_outgrad = disable_outgrad, | |||
| 217 | .is_test = is_test, | |||
| 218 | }, input_tensors, input_size, output_tensors, output_size); | |||
| 219 | } | |||
| 220 | // Free the allocated variables. | |||
| 221 | for (i = 0; i < output_size; i++) | |||
| 222 | if (outputs[i]) | |||
| 223 | ccv_nnc_tensor_variable_free(dynamic_graph, outputs[i]); | |||
| 224 | } | |||
| 225 | ||||
| 226 | void ccv_nnc_dynamic_graph_evaluate(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
| 227 | { | |||
| 228 | ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, (ccv_nnc_cmd_vtab_t*)&ccv_cnnp_model_exec_isa, (ccv_nnc_cmd_param_t){}, 0); | |||
| 229 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 229, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 230 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
| 231 | const int per_input_size = input_size / parallel_count; | |||
| 232 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 232, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 233 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 233, __extension__ __PRETTY_FUNCTION__); })); | |||
| 234 | int i; | |||
| 235 | if (!model->graph) | |||
| 236 | { | |||
| 237 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
| 238 | for (i = 0; i < per_input_size; i++) | |||
| 239 | input_params[i] = inputs[i]->info; | |||
| 240 | ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
| 241 | } else { | |||
| 242 | assert(per_input_size == model->input_size)((void) sizeof ((per_input_size == model->input_size) ? 1 : 0), __extension__ ({ if (per_input_size == model->input_size ) ; else __assert_fail ("per_input_size == model->input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 242, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 243 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
| 244 | int flag = 0; | |||
| 245 | for (i = 0; i < per_input_size; i++) | |||
| 246 | { | |||
| 247 | input_params[i] = inputs[i]->info; | |||
| 248 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]); | |||
| 249 | // If these two parameters doesn't match, recompile the graph.. | |||
| 250 | if (memcmp(¶ms, &input_params[i], sizeof(params)) != 0) | |||
| 251 | flag = 1; | |||
| 252 | } | |||
| 253 | if (flag) // Recompile the graph. | |||
| 254 | ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
| 255 | } | |||
| 256 | for (i = 0; i < input_size; i++) | |||
| 257 | { | |||
| 258 | // Cannot have the parameter be a partial tensor view for model evaluation. | |||
| 259 | ccv_nnc_tensor_t* const tensor = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, inputs[i], stream_context ) : 0; | |||
| 260 | if (tensor) | |||
| 261 | { assert(CCV_IS_TENSOR_CONTIGUOUS(tensor))((void) sizeof (((!((*(int*)(tensor)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(tensor)) & CCV_TENSOR_VIEW ) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(tensor)", "ccv_nnc_dynamic_graph_evaluate.c" , 261, __extension__ __PRETTY_FUNCTION__); })); } | |||
| 262 | } | |||
| 263 | if (dynamic_graph->no_grad) | |||
| 264 | { | |||
| 265 | ccv_nnc_stateful_exec_t stateful_exec = { | |||
| 266 | .requires_grad = 0, | |||
| 267 | .is_test = is_test, | |||
| 268 | .disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL, | |||
| 269 | .tensor_tape = tensor_tape, | |||
| 270 | .data = model | |||
| 271 | }; | |||
| 272 | cmd.data = &stateful_exec; | |||
| 273 | // Parallel parameter doesn't make sense here, the parallel is defined inside the model. | |||
| 274 | ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, 0); | |||
| 275 | } else { | |||
| 276 | uint64_t disable_outgrad = 0; | |||
| 277 | int count = 0; | |||
| 278 | for (i = 0; i < per_input_size; i++) | |||
| 279 | if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT) | |||
| 280 | { | |||
| 281 | disable_outgrad |= ((uint64_t)1 << i); | |||
| 282 | ++count; | |||
| 283 | } | |||
| 284 | if (count == per_input_size) | |||
| 285 | disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL; | |||
| 286 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_stateful_exec_t)); | |||
| 287 | cmd.data = stateful_exec; | |||
| 288 | stateful_exec->requires_grad = 1; | |||
| 289 | stateful_exec->is_test = is_test; | |||
| 290 | stateful_exec->did_backward_but_not_apply_gradients = 0; | |||
| 291 | stateful_exec->should_free = 0; | |||
| 292 | stateful_exec->disable_outgrad = disable_outgrad; | |||
| 293 | stateful_exec->tensor_tape = tensor_tape; | |||
| 294 | stateful_exec->data = model; | |||
| 295 | stateful_exec->cmd = cmd; | |||
| 296 | ccv_nnc_graph_exec_symbol_t symbol = {}; | |||
| 297 | ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, &symbol); | |||
| 298 | if (!symbol.graph) // This is because inputs are all constants. | |||
| 299 | ccfreefree(stateful_exec); // No one records it, there is no cmd.data refer to it. | |||
| 300 | else { | |||
| 301 | if (!dynamic_graph->stateful_execs) | |||
| 302 | { | |||
| 303 | dynamic_graph->stateful_execs = ccv_array_new(sizeof(ccv_nnc_stateful_exec_t*), 1, 0); | |||
| 304 | ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec); | |||
| 305 | stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1; | |||
| 306 | } else { | |||
| 307 | if (dynamic_graph->reuse_stateful_exec >= 0) | |||
| 308 | { | |||
| 309 | *(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, dynamic_graph->reuse_stateful_exec)((void*)(((char*)((dynamic_graph->stateful_execs)->data )) + (size_t)(dynamic_graph->stateful_execs)->rsize * ( size_t)(dynamic_graph->reuse_stateful_exec))) = stateful_exec; | |||
| 310 | stateful_exec->index = dynamic_graph->reuse_stateful_exec; | |||
| 311 | int flag = 0; | |||
| 312 | for (i = dynamic_graph->reuse_stateful_exec + 1; !flag && i < dynamic_graph->stateful_execs->rnum; i++) | |||
| 313 | if (*(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, i)((void*)(((char*)((dynamic_graph->stateful_execs)->data )) + (size_t)(dynamic_graph->stateful_execs)->rsize * ( size_t)(i))) == 0) | |||
| 314 | dynamic_graph->reuse_stateful_exec = i, flag = 1; | |||
| 315 | if (!flag) // Reset to 1. | |||
| 316 | dynamic_graph->reuse_stateful_exec = -1; | |||
| 317 | } else { | |||
| 318 | // Push new, no reuse available. | |||
| 319 | ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec); | |||
| 320 | stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1; | |||
| 321 | } | |||
| 322 | } | |||
| 323 | } | |||
| 324 | } | |||
| 325 | } | |||
| 326 |