File: | nnc/ccv_nnc_dynamic_graph_evaluate.c |
Warning: | line 179, column 2 Declared variable-length array (VLA) has negative size |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #include "ccv_nnc_easy.h" | |||
5 | #include "ccv_internal.h" | |||
6 | #include "_ccv_nnc_dynamic_graph.h" | |||
7 | #include "_ccv_cnnp_model.h" | |||
8 | ||||
9 | // MARK - Level-5.5 API | |||
10 | ||||
11 | static int _ccv_cnnp_model_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
12 | { | |||
13 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
14 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | |||
15 | // I cannot just use stream context, it cannot synchronize correctly based on existing coroutine implementation. | |||
16 | int i; | |||
17 | int wait_for_any_neighbor = 0; | |||
18 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
19 | if (stream_context) // Find all neighbor context and wait on them all. | |||
20 | for (i = 0; i < parallel_count; i++) | |||
21 | { | |||
22 | ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i); | |||
23 | if (neighbor_context && neighbor_context != stream_context) | |||
24 | { | |||
25 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(neighbor_context); | |||
26 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | |||
27 | wait_for_any_neighbor = 1; | |||
28 | } | |||
29 | } | |||
30 | co_scheduler_t* old_scheduler; | |||
31 | co_routine_t* old_main; | |||
32 | if (stream_context) | |||
33 | { | |||
34 | old_main = stream_context->main; | |||
35 | old_scheduler = stream_context->scheduler; | |||
36 | // We cannot piggyback on old scheduler. | |||
37 | stream_context->scheduler = 0; | |||
38 | // We will have a new main coroutine when schedule as the root. | |||
39 | // Otherwise it will be scheduled after the existing routines all scheduled | |||
40 | // out, and that won't be right. | |||
41 | stream_context->main = 0; | |||
42 | } | |||
43 | if (cmd.cmd == CCV_NNC_CUSTOM_FORWARD) | |||
44 | { | |||
45 | ccv_cnnp_model_evaluate(model, (ccv_cnnp_evaluate_param_t){ | |||
46 | .requires_grad = stateful_exec->requires_grad, | |||
47 | .disable_outgrad = stateful_exec->disable_outgrad, | |||
48 | .is_test = stateful_exec->is_test, | |||
49 | }, inputs, input_size, outputs, output_size, 0, stream_context); | |||
50 | } else { | |||
51 | const int ingrad_size = model->output_size * parallel_count; | |||
52 | assert(ingrad_size <= input_size)((void) sizeof ((ingrad_size <= input_size) ? 1 : 0), __extension__ ({ if (ingrad_size <= input_size) ; else __assert_fail ("ingrad_size <= input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 52, __extension__ __PRETTY_FUNCTION__ ); })); | |||
53 | if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) | |||
54 | ccv_cnnp_model_backward(model, inputs, ingrad_size, outputs, output_size, 0, stream_context); | |||
55 | else if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL) | |||
56 | ccv_cnnp_model_backward(model, inputs, ingrad_size, 0, 0, 0, stream_context); | |||
57 | else { | |||
58 | assert(output_size == model->input_size * parallel_count)((void) sizeof ((output_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->input_size * parallel_count) ; else __assert_fail ("output_size == model->input_size * parallel_count" , "ccv_nnc_dynamic_graph_evaluate.c", 58, __extension__ __PRETTY_FUNCTION__ ); })); | |||
59 | int per_outgrad_size = 0; | |||
60 | int i, j, k; | |||
61 | for (i = 0; i < model->input_size; i++) | |||
62 | if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << i))) | |||
63 | ++per_outgrad_size; | |||
64 | assert(per_outgrad_size > 0)((void) sizeof ((per_outgrad_size > 0) ? 1 : 0), __extension__ ({ if (per_outgrad_size > 0) ; else __assert_fail ("per_outgrad_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 64, __extension__ __PRETTY_FUNCTION__ ); })); | |||
65 | const int outgrad_size = per_outgrad_size * parallel_count; | |||
66 | ccv_nnc_tensor_t* outgrads[outgrad_size]; | |||
67 | for (i = 0; i < parallel_count; i++) | |||
68 | for (k = 0, j = 0; j < model->input_size; j++) | |||
69 | if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << j))) | |||
70 | outgrads[(k++) + i * per_outgrad_size] = outputs[j + i * model->input_size]; | |||
71 | ccv_cnnp_model_backward(model, inputs, ingrad_size, outgrads, outgrad_size, 0, stream_context); | |||
72 | } | |||
73 | stateful_exec->did_backward_but_not_apply_gradients = 1; | |||
74 | } | |||
75 | if (stream_context) | |||
76 | { | |||
77 | // Should have new scheduler created. | |||
78 | assert(stream_context->scheduler)((void) sizeof ((stream_context->scheduler) ? 1 : 0), __extension__ ({ if (stream_context->scheduler) ; else __assert_fail ("stream_context->scheduler" , "ccv_nnc_dynamic_graph_evaluate.c", 78, __extension__ __PRETTY_FUNCTION__ ); })); | |||
79 | // The new scheduler shouldn't be active (everything is scheduled). | |||
80 | assert(!co_scheduler_is_active(stream_context->scheduler))((void) sizeof ((!co_scheduler_is_active(stream_context->scheduler )) ? 1 : 0), __extension__ ({ if (!co_scheduler_is_active(stream_context ->scheduler)) ; else __assert_fail ("!co_scheduler_is_active(stream_context->scheduler)" , "ccv_nnc_dynamic_graph_evaluate.c", 80, __extension__ __PRETTY_FUNCTION__ ); })); | |||
81 | co_scheduler_free(stream_context->scheduler); | |||
82 | // Switch back to the old scheduler. | |||
83 | stream_context->scheduler = old_scheduler; | |||
84 | // The main coroutine should be cleared. | |||
85 | assert(!stream_context->main)((void) sizeof ((!stream_context->main) ? 1 : 0), __extension__ ({ if (!stream_context->main) ; else __assert_fail ("!stream_context->main" , "ccv_nnc_dynamic_graph_evaluate.c", 85, __extension__ __PRETTY_FUNCTION__ ); })); | |||
86 | stream_context->main = old_main; | |||
87 | } | |||
88 | if (wait_for_any_neighbor) // Find all neighbor context and wait on them all. | |||
89 | { | |||
90 | assert(stream_context)((void) sizeof ((stream_context) ? 1 : 0), __extension__ ({ if (stream_context) ; else __assert_fail ("stream_context", "ccv_nnc_dynamic_graph_evaluate.c" , 90, __extension__ __PRETTY_FUNCTION__); })); | |||
91 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | |||
92 | for (i = 0; i < parallel_count; i++) | |||
93 | { | |||
94 | ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i); | |||
95 | if (neighbor_context && neighbor_context != stream_context) | |||
96 | ccv_nnc_stream_context_wait_signal(neighbor_context, signal); | |||
97 | } | |||
98 | } | |||
99 | return CCV_NNC_EXEC_SUCCESS; | |||
100 | } | |||
101 | ||||
102 | static void _ccv_cnnp_model_tensor_auto(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size) | |||
103 | { | |||
104 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
105 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | |||
106 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
107 | const int per_input_size = input_size / parallel_count; | |||
108 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 108, __extension__ __PRETTY_FUNCTION__ ); })); | |||
109 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 109, __extension__ __PRETTY_FUNCTION__); })); | |||
110 | const int per_output_size = output_size / parallel_count; | |||
111 | assert(per_output_size > 0)((void) sizeof ((per_output_size > 0) ? 1 : 0), __extension__ ({ if (per_output_size > 0) ; else __assert_fail ("per_output_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 111, __extension__ __PRETTY_FUNCTION__ ); })); | |||
112 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 112, __extension__ __PRETTY_FUNCTION__); })); | |||
113 | int i, j; | |||
114 | for (i = 0; i < parallel_count; i++) | |||
115 | { | |||
116 | ccv_cnnp_model_tensor_auto(model, outputs + i * per_output_size, per_output_size); | |||
117 | // Set device id to the corresponding inputs' device id. | |||
118 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[i * per_input_size].type)(((inputs[i * per_input_size].type) & 0xfff00) >> 8 ); | |||
119 | for (j = 0; j < per_output_size; j++) | |||
120 | CCV_TENSOR_SET_DEVICE_ID(outputs[i * per_output_size + j].type, device_id)(outputs[i * per_output_size + j].type) = (((outputs[i * per_output_size + j].type) & ~0xfff00) | (((device_id) & 0xfff) << 8)); | |||
121 | } | |||
122 | } | |||
123 | ||||
124 | static void _ccv_cnnp_model_apply_gradients(const ccv_nnc_cmd_t cmd, ccv_nnc_stream_context_t* const stream_context) | |||
125 | { | |||
126 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | |||
127 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | |||
128 | ccv_cnnp_model_apply_gradients(model, stream_context); | |||
129 | } | |||
130 | ||||
131 | static ccv_nnc_stateful_cmd_vtab_t ccv_cnnp_model_exec_isa = { | |||
132 | .super = { | |||
133 | .exec = _ccv_cnnp_model_exec, | |||
134 | .tensor_auto = _ccv_cnnp_model_tensor_auto, | |||
135 | }, | |||
136 | .apply_gradients = _ccv_cnnp_model_apply_gradients, | |||
137 | }; | |||
138 | ||||
139 | void ccv_nnc_dynamic_graph_dry_run(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_stream_context_t* const stream_context) | |||
140 | { | |||
141 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 141, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| ||||
142 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
143 | const int per_input_size = input_size / parallel_count; | |||
144 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 144, __extension__ __PRETTY_FUNCTION__ ); })); | |||
145 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 145, __extension__ __PRETTY_FUNCTION__); })); | |||
146 | int i, j; | |||
147 | if (!model->graph) | |||
148 | { | |||
149 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
150 | for (i = 0; i
| |||
151 | input_params[i] = inputs[i]->info; | |||
152 | ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
153 | } else { | |||
154 | assert(per_input_size == model->input_size)((void) sizeof ((per_input_size == model->input_size) ? 1 : 0), __extension__ ({ if (per_input_size == model->input_size ) ; else __assert_fail ("per_input_size == model->input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 154, __extension__ __PRETTY_FUNCTION__ ); })); | |||
155 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
156 | int flag = 0; | |||
157 | for (i = 0; i < per_input_size; i++) | |||
158 | { | |||
159 | input_params[i] = inputs[i]->info; | |||
160 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]); | |||
161 | // If these two parameters doesn't match, recompile the graph.. | |||
162 | if (memcmp(¶ms, &input_params[i], sizeof(params)) != 0) | |||
163 | flag = 1; | |||
164 | } | |||
165 | if (flag) // Recompile the graph. | |||
166 | ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
167 | } | |||
168 | ccv_nnc_tensor_t* input_tensors[input_size]; | |||
169 | for (i = 0; i < input_size; i++) | |||
170 | { | |||
171 | // Cannot have the parameter be a partial tensor view for model evaluation. | |||
172 | input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, inputs[i], stream_context ) : 0; | |||
173 | if (input_tensors[i]) | |||
174 | { assert(CCV_IS_TENSOR_CONTIGUOUS(input_tensors[i]))((void) sizeof (((!((*(int*)(input_tensors[i])) & CCV_TENSOR_VIEW ) || (((ccv_nnc_tensor_view_t*)input_tensors[i])->contiguous == 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(input_tensors [i])) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)input_tensors [i])->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(input_tensors[i])" , "ccv_nnc_dynamic_graph_evaluate.c", 174, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
175 | } | |||
176 | const int per_output_size = ccv_cnnp_model_output_size(model); | |||
177 | ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | |||
178 | const int output_size = per_output_size * parallel_count; | |||
179 | ccv_nnc_tensor_variable_t outputs[output_size]; | |||
| ||||
180 | ccv_nnc_tensor_t* output_tensors[output_size]; | |||
181 | for (i = 0; i < parallel_count; i++) | |||
182 | { | |||
183 | for (j = 0; j < per_output_size; j++) | |||
184 | output_params[j] = ccv_nnc_tensor_auto; | |||
185 | ccv_cnnp_model_tensor_auto(model, output_params, per_output_size); | |||
186 | for (j = 0; j < per_output_size; j++) | |||
187 | if (!ccv_nnc_is_tensor_auto(output_params[j])) | |||
188 | { | |||
189 | outputs[i * per_output_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, output_params[j])ccv_nnc_tensor_variable_new_impl(dynamic_graph, output_params [j]); | |||
190 | output_tensors[i * per_output_size + j] = ccv_nnc_tensor_from_variable(dynamic_graph, outputs[i * per_output_size + j], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, outputs[i * per_output_size + j], stream_context); | |||
191 | } else { | |||
192 | outputs[i * per_output_size + j] = 0; | |||
193 | output_tensors[i * per_output_size + j] = 0; | |||
194 | } | |||
195 | } | |||
196 | if (dynamic_graph->no_grad) | |||
197 | { | |||
198 | ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){ | |||
199 | .requires_grad = 0, | |||
200 | .disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL, | |||
201 | .is_test = is_test, | |||
202 | }, input_tensors, input_size, output_tensors, output_size); | |||
203 | } else { | |||
204 | uint64_t disable_outgrad = 0; | |||
205 | int count = 0; | |||
206 | for (i = 0; i < per_input_size; i++) | |||
207 | if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT) | |||
208 | { | |||
209 | disable_outgrad |= ((uint64_t)1 << i); | |||
210 | ++count; | |||
211 | } | |||
212 | if (count == per_input_size) | |||
213 | disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL; | |||
214 | ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){ | |||
215 | .requires_grad = 1, | |||
216 | .disable_outgrad = disable_outgrad, | |||
217 | .is_test = is_test, | |||
218 | }, input_tensors, input_size, output_tensors, output_size); | |||
219 | } | |||
220 | // Free the allocated variables. | |||
221 | for (i = 0; i < output_size; i++) | |||
222 | if (outputs[i]) | |||
223 | ccv_nnc_tensor_variable_free(dynamic_graph, outputs[i]); | |||
224 | } | |||
225 | ||||
226 | void ccv_nnc_dynamic_graph_evaluate(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | |||
227 | { | |||
228 | ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, (ccv_nnc_cmd_vtab_t*)&ccv_cnnp_model_exec_isa, (ccv_nnc_cmd_param_t){}, 0); | |||
229 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 229, __extension__ __PRETTY_FUNCTION__ ); })); | |||
230 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | |||
231 | const int per_input_size = input_size / parallel_count; | |||
232 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 232, __extension__ __PRETTY_FUNCTION__ ); })); | |||
233 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 233, __extension__ __PRETTY_FUNCTION__); })); | |||
234 | int i; | |||
235 | if (!model->graph) | |||
236 | { | |||
237 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
238 | for (i = 0; i < per_input_size; i++) | |||
239 | input_params[i] = inputs[i]->info; | |||
240 | ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
241 | } else { | |||
242 | assert(per_input_size == model->input_size)((void) sizeof ((per_input_size == model->input_size) ? 1 : 0), __extension__ ({ if (per_input_size == model->input_size ) ; else __assert_fail ("per_input_size == model->input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 242, __extension__ __PRETTY_FUNCTION__ ); })); | |||
243 | ccv_nnc_tensor_param_t input_params[per_input_size]; | |||
244 | int flag = 0; | |||
245 | for (i = 0; i < per_input_size; i++) | |||
246 | { | |||
247 | input_params[i] = inputs[i]->info; | |||
248 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]); | |||
249 | // If these two parameters doesn't match, recompile the graph.. | |||
250 | if (memcmp(¶ms, &input_params[i], sizeof(params)) != 0) | |||
251 | flag = 1; | |||
252 | } | |||
253 | if (flag) // Recompile the graph. | |||
254 | ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | |||
255 | } | |||
256 | for (i = 0; i < input_size; i++) | |||
257 | { | |||
258 | // Cannot have the parameter be a partial tensor view for model evaluation. | |||
259 | ccv_nnc_tensor_t* const tensor = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, inputs[i], stream_context ) : 0; | |||
260 | if (tensor) | |||
261 | { assert(CCV_IS_TENSOR_CONTIGUOUS(tensor))((void) sizeof (((!((*(int*)(tensor)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(tensor)) & CCV_TENSOR_VIEW ) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(tensor)", "ccv_nnc_dynamic_graph_evaluate.c" , 261, __extension__ __PRETTY_FUNCTION__); })); } | |||
262 | } | |||
263 | if (dynamic_graph->no_grad) | |||
264 | { | |||
265 | ccv_nnc_stateful_exec_t stateful_exec = { | |||
266 | .requires_grad = 0, | |||
267 | .is_test = is_test, | |||
268 | .disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL, | |||
269 | .tensor_tape = tensor_tape, | |||
270 | .data = model | |||
271 | }; | |||
272 | cmd.data = &stateful_exec; | |||
273 | // Parallel parameter doesn't make sense here, the parallel is defined inside the model. | |||
274 | ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, 0); | |||
275 | } else { | |||
276 | uint64_t disable_outgrad = 0; | |||
277 | int count = 0; | |||
278 | for (i = 0; i < per_input_size; i++) | |||
279 | if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT) | |||
280 | { | |||
281 | disable_outgrad |= ((uint64_t)1 << i); | |||
282 | ++count; | |||
283 | } | |||
284 | if (count == per_input_size) | |||
285 | disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL; | |||
286 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_stateful_exec_t)); | |||
287 | cmd.data = stateful_exec; | |||
288 | stateful_exec->requires_grad = 1; | |||
289 | stateful_exec->is_test = is_test; | |||
290 | stateful_exec->did_backward_but_not_apply_gradients = 0; | |||
291 | stateful_exec->should_free = 0; | |||
292 | stateful_exec->disable_outgrad = disable_outgrad; | |||
293 | stateful_exec->tensor_tape = tensor_tape; | |||
294 | stateful_exec->data = model; | |||
295 | stateful_exec->cmd = cmd; | |||
296 | ccv_nnc_graph_exec_symbol_t symbol = {}; | |||
297 | ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, &symbol); | |||
298 | if (!symbol.graph) // This is because inputs are all constants. | |||
299 | ccfreefree(stateful_exec); // No one records it, there is no cmd.data refer to it. | |||
300 | else { | |||
301 | if (!dynamic_graph->stateful_execs) | |||
302 | { | |||
303 | dynamic_graph->stateful_execs = ccv_array_new(sizeof(ccv_nnc_stateful_exec_t*), 1, 0); | |||
304 | ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec); | |||
305 | stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1; | |||
306 | } else { | |||
307 | if (dynamic_graph->reuse_stateful_exec >= 0) | |||
308 | { | |||
309 | *(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, dynamic_graph->reuse_stateful_exec)((void*)(((char*)((dynamic_graph->stateful_execs)->data )) + (size_t)(dynamic_graph->stateful_execs)->rsize * ( size_t)(dynamic_graph->reuse_stateful_exec))) = stateful_exec; | |||
310 | stateful_exec->index = dynamic_graph->reuse_stateful_exec; | |||
311 | int flag = 0; | |||
312 | for (i = dynamic_graph->reuse_stateful_exec + 1; !flag && i < dynamic_graph->stateful_execs->rnum; i++) | |||
313 | if (*(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, i)((void*)(((char*)((dynamic_graph->stateful_execs)->data )) + (size_t)(dynamic_graph->stateful_execs)->rsize * ( size_t)(i))) == 0) | |||
314 | dynamic_graph->reuse_stateful_exec = i, flag = 1; | |||
315 | if (!flag) // Reset to 1. | |||
316 | dynamic_graph->reuse_stateful_exec = -1; | |||
317 | } else { | |||
318 | // Push new, no reuse available. | |||
319 | ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec); | |||
320 | stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1; | |||
321 | } | |||
322 | } | |||
323 | } | |||
324 | } | |||
325 | } | |||
326 |