File: | nnc/ccv_nnc_dynamic_graph_evaluate.c |
Warning: | line 222, column 7 Branch condition evaluates to a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | ||||
2 | #include "ccv_nnc_easy.h" | ||||
3 | #include "ccv_nnc_internal.h" | ||||
4 | #include "ccv_nnc_easy.h" | ||||
5 | #include "ccv_internal.h" | ||||
6 | #include "_ccv_nnc_dynamic_graph.h" | ||||
7 | #include "_ccv_cnnp_model.h" | ||||
8 | |||||
9 | // MARK - Level-5.5 API | ||||
10 | |||||
11 | static int _ccv_cnnp_model_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | ||||
12 | { | ||||
13 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | ||||
14 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | ||||
15 | // I cannot just use stream context, it cannot synchronize correctly based on existing coroutine implementation. | ||||
16 | int i; | ||||
17 | int wait_for_any_neighbor = 0; | ||||
18 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
19 | if (stream_context) // Find all neighbor context and wait on them all. | ||||
20 | for (i = 0; i < parallel_count; i++) | ||||
21 | { | ||||
22 | ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i); | ||||
23 | if (neighbor_context && neighbor_context != stream_context) | ||||
24 | { | ||||
25 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(neighbor_context); | ||||
26 | ccv_nnc_stream_context_wait_signal(stream_context, signal); | ||||
27 | wait_for_any_neighbor = 1; | ||||
28 | } | ||||
29 | } | ||||
30 | co_scheduler_t* old_scheduler; | ||||
31 | co_routine_t* old_main; | ||||
32 | if (stream_context) | ||||
33 | { | ||||
34 | old_main = stream_context->main; | ||||
35 | old_scheduler = stream_context->scheduler; | ||||
36 | // We cannot piggyback on old scheduler. | ||||
37 | stream_context->scheduler = 0; | ||||
38 | // We will have a new main coroutine when schedule as the root. | ||||
39 | // Otherwise it will be scheduled after the existing routines all scheduled | ||||
40 | // out, and that won't be right. | ||||
41 | stream_context->main = 0; | ||||
42 | } | ||||
43 | if (cmd.cmd == CCV_NNC_CUSTOM_FORWARD) | ||||
44 | { | ||||
45 | ccv_cnnp_model_evaluate(model, (ccv_cnnp_evaluate_param_t){ | ||||
46 | .requires_grad = stateful_exec->requires_grad, | ||||
47 | .disable_outgrad = stateful_exec->disable_outgrad, | ||||
48 | .is_test = stateful_exec->is_test, | ||||
49 | }, inputs, input_size, outputs, output_size, 0, stream_context); | ||||
50 | } else { | ||||
51 | const int ingrad_size = model->output_size * parallel_count; | ||||
52 | assert(ingrad_size <= input_size)((void) sizeof ((ingrad_size <= input_size) ? 1 : 0), __extension__ ({ if (ingrad_size <= input_size) ; else __assert_fail ("ingrad_size <= input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 52, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
53 | if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE) | ||||
54 | ccv_cnnp_model_backward(model, inputs, ingrad_size, outputs, output_size, 0, stream_context); | ||||
55 | else if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL) | ||||
56 | ccv_cnnp_model_backward(model, inputs, ingrad_size, 0, 0, 0, stream_context); | ||||
57 | else { | ||||
58 | assert(output_size == model->input_size * parallel_count)((void) sizeof ((output_size == model->input_size * parallel_count ) ? 1 : 0), __extension__ ({ if (output_size == model->input_size * parallel_count) ; else __assert_fail ("output_size == model->input_size * parallel_count" , "ccv_nnc_dynamic_graph_evaluate.c", 58, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
59 | int per_outgrad_size = 0; | ||||
60 | int i, j, k; | ||||
61 | for (i = 0; i < model->input_size; i++) | ||||
62 | if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << i))) | ||||
63 | ++per_outgrad_size; | ||||
64 | assert(per_outgrad_size > 0)((void) sizeof ((per_outgrad_size > 0) ? 1 : 0), __extension__ ({ if (per_outgrad_size > 0) ; else __assert_fail ("per_outgrad_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 64, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
65 | const int outgrad_size = per_outgrad_size * parallel_count; | ||||
66 | ccv_nnc_tensor_t* outgrads[outgrad_size]; | ||||
67 | for (i = 0; i < parallel_count; i++) | ||||
68 | for (k = 0, j = 0; j < model->input_size; j++) | ||||
69 | if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << j))) | ||||
70 | outgrads[(k++) + i * per_outgrad_size] = outputs[j + i * model->input_size]; | ||||
71 | ccv_cnnp_model_backward(model, inputs, ingrad_size, outgrads, outgrad_size, 0, stream_context); | ||||
72 | } | ||||
73 | stateful_exec->did_backward_but_not_apply_gradients = 1; | ||||
74 | } | ||||
75 | if (stream_context) | ||||
76 | { | ||||
77 | // Should have new scheduler created. | ||||
78 | assert(stream_context->scheduler)((void) sizeof ((stream_context->scheduler) ? 1 : 0), __extension__ ({ if (stream_context->scheduler) ; else __assert_fail ("stream_context->scheduler" , "ccv_nnc_dynamic_graph_evaluate.c", 78, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
79 | // The new scheduler shouldn't be active (everything is scheduled). | ||||
80 | assert(!co_scheduler_is_active(stream_context->scheduler))((void) sizeof ((!co_scheduler_is_active(stream_context->scheduler )) ? 1 : 0), __extension__ ({ if (!co_scheduler_is_active(stream_context ->scheduler)) ; else __assert_fail ("!co_scheduler_is_active(stream_context->scheduler)" , "ccv_nnc_dynamic_graph_evaluate.c", 80, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
81 | co_scheduler_free(stream_context->scheduler); | ||||
82 | // Switch back to the old scheduler. | ||||
83 | stream_context->scheduler = old_scheduler; | ||||
84 | // The main coroutine should be cleared. | ||||
85 | assert(!stream_context->main)((void) sizeof ((!stream_context->main) ? 1 : 0), __extension__ ({ if (!stream_context->main) ; else __assert_fail ("!stream_context->main" , "ccv_nnc_dynamic_graph_evaluate.c", 85, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
86 | stream_context->main = old_main; | ||||
87 | } | ||||
88 | if (wait_for_any_neighbor) // Find all neighbor context and wait on them all. | ||||
89 | { | ||||
90 | assert(stream_context)((void) sizeof ((stream_context) ? 1 : 0), __extension__ ({ if (stream_context) ; else __assert_fail ("stream_context", "ccv_nnc_dynamic_graph_evaluate.c" , 90, __extension__ __PRETTY_FUNCTION__); })); | ||||
91 | ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_context); | ||||
92 | for (i = 0; i < parallel_count; i++) | ||||
93 | { | ||||
94 | ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i); | ||||
95 | if (neighbor_context && neighbor_context != stream_context) | ||||
96 | ccv_nnc_stream_context_wait_signal(neighbor_context, signal); | ||||
97 | } | ||||
98 | } | ||||
99 | return CCV_NNC_EXEC_SUCCESS; | ||||
100 | } | ||||
101 | |||||
102 | static void _ccv_cnnp_model_tensor_auto(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size) | ||||
103 | { | ||||
104 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | ||||
105 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | ||||
106 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
107 | const int per_input_size = input_size / parallel_count; | ||||
108 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 108, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
109 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 109, __extension__ __PRETTY_FUNCTION__); })); | ||||
110 | const int per_output_size = output_size / parallel_count; | ||||
111 | assert(per_output_size > 0)((void) sizeof ((per_output_size > 0) ? 1 : 0), __extension__ ({ if (per_output_size > 0) ; else __assert_fail ("per_output_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 111, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
112 | assert((output_size % parallel_count) == 0)((void) sizeof (((output_size % parallel_count) == 0) ? 1 : 0 ), __extension__ ({ if ((output_size % parallel_count) == 0) ; else __assert_fail ("(output_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 112, __extension__ __PRETTY_FUNCTION__); })); | ||||
113 | int i, j; | ||||
114 | for (i = 0; i < parallel_count; i++) | ||||
115 | { | ||||
116 | ccv_cnnp_model_tensor_auto(model, outputs + i * per_output_size, per_output_size); | ||||
117 | // Set device id to the corresponding inputs' device id. | ||||
118 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[i * per_input_size].type)(((inputs[i * per_input_size].type) & 0xfff00) >> 8 ); | ||||
119 | for (j = 0; j < per_output_size; j++) | ||||
120 | CCV_TENSOR_SET_DEVICE_ID(outputs[i * per_output_size + j].type, device_id)(outputs[i * per_output_size + j].type) = (((outputs[i * per_output_size + j].type) & ~0xfff00) | (((device_id) & 0xfff) << 8)); | ||||
121 | } | ||||
122 | } | ||||
123 | |||||
124 | static void _ccv_cnnp_model_apply_gradients(const ccv_nnc_cmd_t cmd, ccv_nnc_stream_context_t* const stream_context) | ||||
125 | { | ||||
126 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data; | ||||
127 | ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data; | ||||
128 | ccv_cnnp_model_apply_gradients(model, stream_context); | ||||
129 | } | ||||
130 | |||||
131 | static ccv_nnc_stateful_cmd_vtab_t ccv_cnnp_model_exec_isa = { | ||||
132 | .super = { | ||||
133 | .exec = _ccv_cnnp_model_exec, | ||||
134 | .tensor_auto = _ccv_cnnp_model_tensor_auto, | ||||
135 | }, | ||||
136 | .apply_gradients = _ccv_cnnp_model_apply_gradients, | ||||
137 | }; | ||||
138 | |||||
139 | void ccv_nnc_dynamic_graph_dry_run(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_stream_context_t* const stream_context) | ||||
140 | { | ||||
141 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 141, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| |||||
142 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
143 | const int per_input_size = input_size / parallel_count; | ||||
144 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 144, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
145 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 145, __extension__ __PRETTY_FUNCTION__); })); | ||||
146 | int i, j; | ||||
147 | if (!model->graph) | ||||
148 | { | ||||
149 | ccv_nnc_tensor_param_t input_params[per_input_size]; | ||||
150 | for (i = 0; i
| ||||
151 | input_params[i] = inputs[i]->info; | ||||
152 | ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | ||||
153 | } else { | ||||
154 | assert(per_input_size == model->input_size)((void) sizeof ((per_input_size == model->input_size) ? 1 : 0), __extension__ ({ if (per_input_size == model->input_size ) ; else __assert_fail ("per_input_size == model->input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 154, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
155 | ccv_nnc_tensor_param_t input_params[per_input_size]; | ||||
156 | int flag = 0; | ||||
157 | for (i = 0; i < per_input_size; i++) | ||||
158 | { | ||||
159 | input_params[i] = inputs[i]->info; | ||||
160 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]); | ||||
161 | // If these two parameters doesn't match, recompile the graph.. | ||||
162 | if (memcmp(¶ms, &input_params[i], sizeof(params)) != 0) | ||||
163 | flag = 1; | ||||
164 | } | ||||
165 | if (flag) // Recompile the graph. | ||||
166 | ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | ||||
167 | } | ||||
168 | ccv_nnc_tensor_t* input_tensors[input_size]; | ||||
169 | for (i = 0; i
| ||||
170 | { | ||||
171 | // Cannot have the parameter be a partial tensor view for model evaluation. | ||||
172 | input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, inputs[i], stream_context ) : 0; | ||||
173 | if (input_tensors[i]) | ||||
174 | { assert(CCV_IS_TENSOR_CONTIGUOUS(input_tensors[i]))((void) sizeof (((!((*(int*)(input_tensors[i])) & CCV_TENSOR_VIEW ) || (((ccv_nnc_tensor_view_t*)input_tensors[i])->contiguous == 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(input_tensors [i])) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)input_tensors [i])->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(input_tensors[i])" , "ccv_nnc_dynamic_graph_evaluate.c", 174, __extension__ __PRETTY_FUNCTION__ ); })); } | ||||
175 | } | ||||
176 | const int per_output_size = ccv_cnnp_model_output_size(model); | ||||
177 | ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)({ typeof (1) _a = (1); typeof (per_output_size) _b = (per_output_size ); (_a > _b) ? _a : _b; })]; | ||||
178 | const int output_size = per_output_size * parallel_count; | ||||
179 | ccv_nnc_tensor_variable_t outputs[output_size]; | ||||
180 | ccv_nnc_tensor_t* output_tensors[output_size]; | ||||
181 | for (i = 0; i
| ||||
182 | { | ||||
183 | for (j = 0; j
| ||||
184 | output_params[j] = ccv_nnc_tensor_auto; | ||||
185 | ccv_cnnp_model_tensor_auto(model, output_params, per_output_size); | ||||
186 | for (j = 0; j
| ||||
187 | if (!ccv_nnc_is_tensor_auto(output_params[j])) | ||||
188 | { | ||||
189 | outputs[i * per_output_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, output_params[j])ccv_nnc_tensor_variable_new_impl(dynamic_graph, output_params [j]); | ||||
190 | output_tensors[i * per_output_size + j] = ccv_nnc_tensor_from_variable(dynamic_graph, outputs[i * per_output_size + j], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, outputs[i * per_output_size + j], stream_context); | ||||
191 | } else { | ||||
192 | outputs[i * per_output_size + j] = 0; | ||||
193 | output_tensors[i * per_output_size + j] = 0; | ||||
194 | } | ||||
195 | } | ||||
196 | if (dynamic_graph->no_grad) | ||||
197 | { | ||||
198 | ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){ | ||||
199 | .requires_grad = 0, | ||||
200 | .disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL, | ||||
201 | .is_test = is_test, | ||||
202 | }, input_tensors, input_size, output_tensors, output_size); | ||||
203 | } else { | ||||
204 | uint64_t disable_outgrad = 0; | ||||
205 | int count = 0; | ||||
206 | for (i = 0; i < per_input_size; i++) | ||||
207 | if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT) | ||||
208 | { | ||||
209 | disable_outgrad |= ((uint64_t)1 << i); | ||||
210 | ++count; | ||||
211 | } | ||||
212 | if (count == per_input_size) | ||||
213 | disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL; | ||||
214 | ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){ | ||||
215 | .requires_grad = 1, | ||||
216 | .disable_outgrad = disable_outgrad, | ||||
217 | .is_test = is_test, | ||||
218 | }, input_tensors, input_size, output_tensors, output_size); | ||||
219 | } | ||||
220 | // Free the allocated variables. | ||||
221 | for (i = 0; i
| ||||
222 | if (outputs[i]) | ||||
| |||||
223 | ccv_nnc_tensor_variable_free(dynamic_graph, outputs[i]); | ||||
224 | } | ||||
225 | |||||
226 | void ccv_nnc_dynamic_graph_evaluate(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context) | ||||
227 | { | ||||
228 | ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, (ccv_nnc_cmd_vtab_t*)&ccv_cnnp_model_exec_isa, (ccv_nnc_cmd_param_t){}, 0); | ||||
229 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 229, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
230 | const int parallel_count = ccv_max(model->parallel_count, 1)({ typeof (model->parallel_count) _a = (model->parallel_count ); typeof (1) _b = (1); (_a > _b) ? _a : _b; }); | ||||
231 | const int per_input_size = input_size / parallel_count; | ||||
232 | assert(per_input_size > 0)((void) sizeof ((per_input_size > 0) ? 1 : 0), __extension__ ({ if (per_input_size > 0) ; else __assert_fail ("per_input_size > 0" , "ccv_nnc_dynamic_graph_evaluate.c", 232, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
233 | assert((input_size % parallel_count) == 0)((void) sizeof (((input_size % parallel_count) == 0) ? 1 : 0) , __extension__ ({ if ((input_size % parallel_count) == 0) ; else __assert_fail ("(input_size % parallel_count) == 0", "ccv_nnc_dynamic_graph_evaluate.c" , 233, __extension__ __PRETTY_FUNCTION__); })); | ||||
234 | int i; | ||||
235 | if (!model->graph) | ||||
236 | { | ||||
237 | ccv_nnc_tensor_param_t input_params[per_input_size]; | ||||
238 | for (i = 0; i < per_input_size; i++) | ||||
239 | input_params[i] = inputs[i]->info; | ||||
240 | ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | ||||
241 | } else { | ||||
242 | assert(per_input_size == model->input_size)((void) sizeof ((per_input_size == model->input_size) ? 1 : 0), __extension__ ({ if (per_input_size == model->input_size ) ; else __assert_fail ("per_input_size == model->input_size" , "ccv_nnc_dynamic_graph_evaluate.c", 242, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
243 | ccv_nnc_tensor_param_t input_params[per_input_size]; | ||||
244 | int flag = 0; | ||||
245 | for (i = 0; i < per_input_size; i++) | ||||
246 | { | ||||
247 | input_params[i] = inputs[i]->info; | ||||
248 | const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]); | ||||
249 | // If these two parameters doesn't match, recompile the graph.. | ||||
250 | if (memcmp(¶ms, &input_params[i], sizeof(params)) != 0) | ||||
251 | flag = 1; | ||||
252 | } | ||||
253 | if (flag) // Recompile the graph. | ||||
254 | ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)); | ||||
255 | } | ||||
256 | for (i = 0; i < input_size; i++) | ||||
257 | { | ||||
258 | // Cannot have the parameter be a partial tensor view for model evaluation. | ||||
259 | ccv_nnc_tensor_t* const tensor = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context)ccv_nnc_tensor_from_variable_impl(dynamic_graph, inputs[i], stream_context ) : 0; | ||||
260 | if (tensor) | ||||
261 | { assert(CCV_IS_TENSOR_CONTIGUOUS(tensor))((void) sizeof (((!((*(int*)(tensor)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(tensor)) & CCV_TENSOR_VIEW ) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(tensor)", "ccv_nnc_dynamic_graph_evaluate.c" , 261, __extension__ __PRETTY_FUNCTION__); })); } | ||||
262 | } | ||||
263 | if (dynamic_graph->no_grad) | ||||
264 | { | ||||
265 | ccv_nnc_stateful_exec_t stateful_exec = { | ||||
266 | .requires_grad = 0, | ||||
267 | .is_test = is_test, | ||||
268 | .disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL, | ||||
269 | .tensor_tape = tensor_tape, | ||||
270 | .data = model | ||||
271 | }; | ||||
272 | cmd.data = &stateful_exec; | ||||
273 | // Parallel parameter doesn't make sense here, the parallel is defined inside the model. | ||||
274 | ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, 0); | ||||
275 | } else { | ||||
276 | uint64_t disable_outgrad = 0; | ||||
277 | int count = 0; | ||||
278 | for (i = 0; i < per_input_size; i++) | ||||
279 | if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT) | ||||
280 | { | ||||
281 | disable_outgrad |= ((uint64_t)1 << i); | ||||
282 | ++count; | ||||
283 | } | ||||
284 | if (count == per_input_size) | ||||
285 | disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL; | ||||
286 | ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)ccmallocmalloc(sizeof(ccv_nnc_stateful_exec_t)); | ||||
287 | cmd.data = stateful_exec; | ||||
288 | stateful_exec->requires_grad = 1; | ||||
289 | stateful_exec->is_test = is_test; | ||||
290 | stateful_exec->did_backward_but_not_apply_gradients = 0; | ||||
291 | stateful_exec->should_free = 0; | ||||
292 | stateful_exec->disable_outgrad = disable_outgrad; | ||||
293 | stateful_exec->tensor_tape = tensor_tape; | ||||
294 | stateful_exec->data = model; | ||||
295 | stateful_exec->cmd = cmd; | ||||
296 | ccv_nnc_graph_exec_symbol_t symbol = {}; | ||||
297 | ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, &symbol); | ||||
298 | if (!symbol.graph) // This is because inputs are all constants. | ||||
299 | ccfreefree(stateful_exec); // No one records it, there is no cmd.data refer to it. | ||||
300 | else { | ||||
301 | if (!dynamic_graph->stateful_execs) | ||||
302 | { | ||||
303 | dynamic_graph->stateful_execs = ccv_array_new(sizeof(ccv_nnc_stateful_exec_t*), 1, 0); | ||||
304 | ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec); | ||||
305 | stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1; | ||||
306 | } else { | ||||
307 | if (dynamic_graph->reuse_stateful_exec >= 0) | ||||
308 | { | ||||
309 | *(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, dynamic_graph->reuse_stateful_exec)((void*)(((char*)((dynamic_graph->stateful_execs)->data )) + (size_t)(dynamic_graph->stateful_execs)->rsize * ( size_t)(dynamic_graph->reuse_stateful_exec))) = stateful_exec; | ||||
310 | stateful_exec->index = dynamic_graph->reuse_stateful_exec; | ||||
311 | int flag = 0; | ||||
312 | for (i = dynamic_graph->reuse_stateful_exec + 1; !flag && i < dynamic_graph->stateful_execs->rnum; i++) | ||||
313 | if (*(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, i)((void*)(((char*)((dynamic_graph->stateful_execs)->data )) + (size_t)(dynamic_graph->stateful_execs)->rsize * ( size_t)(i))) == 0) | ||||
314 | dynamic_graph->reuse_stateful_exec = i, flag = 1; | ||||
315 | if (!flag) // Reset to 1. | ||||
316 | dynamic_graph->reuse_stateful_exec = -1; | ||||
317 | } else { | ||||
318 | // Push new, no reuse available. | ||||
319 | ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec); | ||||
320 | stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1; | ||||
321 | } | ||||
322 | } | ||||
323 | } | ||||
324 | } | ||||
325 | } | ||||
326 |