/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_micro_core.c
Line | Count | Source |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #include "ccv_internal.h" |
5 | | #include "_ccv_nnc_micro.h" |
6 | | #include "3rdparty/khash/khash.h" |
7 | | |
8 | | // MARK - Level-1 API |
9 | | |
10 | | const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_input_isa = {}; |
11 | | |
12 | 150 | #define GRAD(_id) (2 * (var_count) - 1 - (_id)) |
13 | | |
14 | | ccv_nnc_micro_io_t ccv_nnc_micro_input(const int dimensions) |
15 | 6 | { |
16 | 6 | assert(dimensions <= CCV_NNC_MAX_DIM_ALLOC); |
17 | 6 | ccv_nnc_micro_io_t input = cccalloc(1, sizeof(struct ccv_nnc_micro_io_s)); |
18 | 6 | input->isa = &ccv_nnc_micro_io_input_isa; |
19 | 6 | input->dimensions = dimensions; |
20 | 6 | input->id = 0; |
21 | 6 | return input; |
22 | 6 | } |
23 | | struct ccv_nnc_micro_io_grad_s { |
24 | | struct ccv_nnc_micro_io_s super; |
25 | | ccv_nnc_micro_io_t x; |
26 | | }; |
27 | | |
28 | | static void _ccv_nnc_micro_grad_numbering(const ccv_nnc_micro_io_t super, const int id, const int var_count) |
29 | 9 | { |
30 | 9 | struct ccv_nnc_micro_io_grad_s* const self = (struct ccv_nnc_micro_io_grad_s*)super; |
31 | 9 | const int sid = self->x->id; |
32 | 9 | self->super.id = GRAD(sid); |
33 | 9 | } |
34 | | |
35 | | const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_grad_isa = { |
36 | | .numbering = _ccv_nnc_micro_grad_numbering |
37 | | }; |
38 | | |
39 | | ccv_nnc_micro_io_t ccv_nnc_micro_grad(const ccv_nnc_micro_io_t x) |
40 | 9 | { |
41 | 9 | struct ccv_nnc_micro_io_grad_s* const grad = cccalloc(1, sizeof(struct ccv_nnc_micro_io_grad_s)); |
42 | 9 | grad->super.isa = &ccv_nnc_micro_io_grad_isa; |
43 | 9 | grad->super.dimensions = x->dimensions; |
44 | 9 | grad->super.id = 0; |
45 | 9 | grad->x = x; |
46 | 9 | return (ccv_nnc_micro_io_t)grad; |
47 | 9 | } |
48 | | |
49 | | // A simple recursive descent parser. Omitted tokenisation step. |
50 | | static int _accept(const char** const pos, int* const remain_size, const char* symbol, int size) |
51 | 536 | { |
52 | 536 | if (*remain_size < size) |
53 | 194 | return 0; |
54 | 342 | if (memcmp(*pos, symbol, size) == 0) |
55 | 52 | { |
56 | 52 | *remain_size -= size; |
57 | 52 | *pos += size; |
58 | 52 | return 1; |
59 | 52 | } |
60 | 290 | return 0; |
61 | 342 | } |
62 | | |
63 | | static int _expect(const char** const pos, int* const remain_size, const char* symbol, int size) |
64 | 8 | { |
65 | 8 | if (_accept(pos, remain_size, symbol, size)) |
66 | 8 | return 1; |
67 | 8 | assert(0 && "unexpected symbol")0 ; |
68 | 0 | return 0; |
69 | 0 | } |
70 | | |
71 | | static int _constant(const char** const pos, int* const remain_size, int* const id) |
72 | 74 | { |
73 | 74 | int size = 0; |
74 | 74 | *id = 0; |
75 | 82 | while (*remain_size - size > 0 && pos[0][size] >= '0'74 && pos[0][size] <= '9'64 ) |
76 | 8 | { |
77 | 8 | *id *= 10; |
78 | 8 | *id += (pos[0][size] - '0'); |
79 | 8 | ++size; |
80 | 8 | } |
81 | 74 | *remain_size -= size; |
82 | 74 | *pos += size; |
83 | 74 | return size > 0; |
84 | 74 | } |
85 | | |
86 | | static int _index(const char** const pos, int* const remain_size, int* const id) |
87 | 66 | { |
88 | 66 | if (!(*remain_size > 0 && pos[0][0] == 'i')) |
89 | 42 | return 0; |
90 | 24 | int size = 1; |
91 | 24 | *id = 0; |
92 | 48 | while (*remain_size - size > 0 && pos[0][size] >= '0'28 && pos[0][size] <= '9'24 ) |
93 | 24 | { |
94 | 24 | *id *= 10; |
95 | 24 | *id += (pos[0][size] - '0'); |
96 | 24 | ++size; |
97 | 24 | } |
98 | 24 | if (size > 1) |
99 | 24 | { |
100 | 24 | *remain_size -= size; |
101 | 24 | *pos += size; |
102 | 24 | return 1; |
103 | 24 | } |
104 | 0 | return 0; |
105 | 24 | } |
106 | | |
107 | | static int _dim(const char** const pos, int* const remain_size, int* const id, int* const d, ccv_array_t* const equal_assertions) |
108 | 46 | { |
109 | 46 | if (!(*remain_size > 1 && pos[0][0] == 'd')) |
110 | 10 | return 0; |
111 | 36 | if (!(pos[0][1] >= 'A' && pos[0][1] <= 'Z')) |
112 | 0 | return 0; |
113 | 36 | *id = pos[0][1] - 'A'; |
114 | 36 | int size = 2; |
115 | 36 | *d = 0; |
116 | 72 | while (*remain_size - size > 0 && pos[0][size] >= '0'56 && pos[0][size] <= '9'44 ) |
117 | 36 | { |
118 | 36 | *d *= 10; |
119 | 36 | *d += (pos[0][size] - '0'); |
120 | 36 | ++size; |
121 | 36 | } |
122 | 36 | if (size > 1) |
123 | 36 | { |
124 | 36 | *remain_size -= size; |
125 | 36 | *pos += size; |
126 | 48 | while (_accept(pos, remain_size, " ", 1)) {}12 |
127 | 36 | if (_accept(pos, remain_size, "[", 1)) |
128 | 4 | { |
129 | 4 | while (_accept(pos, remain_size, " ", 1)) {}0 |
130 | 4 | _expect(pos, remain_size, "=", 1); |
131 | 4 | while (_accept(pos, remain_size, " ", 1)) {}0 |
132 | 4 | int next_id; |
133 | 4 | int next_d; |
134 | 4 | if (!_dim(pos, remain_size, &next_id, &next_d, equal_assertions)) |
135 | 0 | { assert(0 && "unexpected symbol"); } |
136 | 4 | const ccv_nnc_micro_id_equal_assertion_t equal_assertion = { |
137 | 4 | .left = { |
138 | 4 | .type = CCV_NNC_MICRO_AXIS_SIZE_ID, |
139 | 4 | .id = -(*id + 1), |
140 | 4 | .d = *d |
141 | 4 | }, |
142 | 4 | .right = { |
143 | 4 | .type = CCV_NNC_MICRO_AXIS_SIZE_ID, |
144 | 4 | .id = -(next_id + 1), |
145 | 4 | .d = next_d |
146 | 4 | } |
147 | 4 | }; |
148 | 4 | ccv_array_push(equal_assertions, &equal_assertion); |
149 | 4 | while (_accept(pos, remain_size, " ", 1)) {}0 |
150 | 4 | _expect(pos, remain_size, "]", 1); |
151 | 4 | } |
152 | 36 | return 1; |
153 | 36 | } |
154 | 0 | return 0; |
155 | 36 | } |
156 | | |
157 | | static int _var(const char** const pos, int* const remain_size, char** name) |
158 | 10 | { |
159 | 10 | if (!(*remain_size > 0 && pos[0][0] == '$')) |
160 | 0 | return 0; |
161 | 10 | int size = 1; |
162 | 30 | while (*remain_size - size > 0 && |
163 | 30 | (24 (24 pos[0][size] >= '0'24 && pos[0][size] <= '9'20 ) || |
164 | 24 | (pos[0][size] >= 'a' && pos[0][size] <= 'z'20 ) || |
165 | 24 | (4 pos[0][size] >= 'A'4 && pos[0][size] <= 'Z'0 ) || |
166 | 24 | pos[0][size] == '_'4 )) |
167 | 20 | ++size; |
168 | 10 | if (size > 1) |
169 | 10 | { |
170 | 10 | *name = ccmalloc(size + 1); |
171 | 10 | memcpy(*name, *pos, size); |
172 | 10 | name[0][size] = 0; |
173 | 10 | *remain_size -= size; |
174 | 10 | *pos += size; |
175 | 10 | return 1; |
176 | 10 | } |
177 | 0 | return 0; |
178 | 10 | } |
179 | | |
180 | | static CCV_WARN_UNUSED(ccv_nnc_micro_loop_index_term_t) _expression(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions); |
181 | | |
182 | | static ccv_nnc_micro_loop_index_term_t _factor(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions) |
183 | 74 | { |
184 | 74 | ccv_nnc_micro_loop_index_term_t term; |
185 | 74 | while (_accept(pos, remain_size, " ", 1)) {}0 |
186 | 74 | int id, d; |
187 | 74 | char* name; |
188 | 74 | if (_constant(pos, remain_size, &id)) { |
189 | 8 | term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL; |
190 | 8 | term.immediate_value = id; |
191 | 66 | } else if (_index(pos, remain_size, &id)) { |
192 | 24 | term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID; |
193 | 24 | term.id.type = CCV_NNC_MICRO_LOOP_ID; |
194 | 24 | term.id.id = id; |
195 | 42 | } else if (_dim(pos, remain_size, &id, &d, equal_assertions)) { |
196 | 32 | term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID; |
197 | 32 | term.id.type = CCV_NNC_MICRO_AXIS_SIZE_ID; |
198 | 32 | term.id.d = d; |
199 | 32 | term.id.id = -(id + 1); |
200 | 32 | } else if (10 _var(pos, remain_size, &name)10 ) { |
201 | 10 | term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_UNBOUND_SCALAR; |
202 | 10 | term.name = name; |
203 | 10 | } else if (0 _accept(pos, remain_size, "(", 1)0 ) { |
204 | 0 | term = _expression(pos, remain_size, equal_assertions); |
205 | 0 | _expect(pos, remain_size, ")", 1); |
206 | 0 | } else { |
207 | 0 | assert(0 && "factor: syntax error"); |
208 | 0 | } |
209 | 82 | while (74 _accept(pos, remain_size, " ", 1)) {}8 |
210 | 74 | return term; |
211 | 74 | } |
212 | | |
213 | | static ccv_nnc_micro_loop_index_term_t _term(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions) |
214 | 74 | { |
215 | 94 | while (_accept(pos, remain_size, " ", 1)) {}20 |
216 | 74 | ccv_nnc_micro_loop_index_term_t term = _factor(pos, remain_size, equal_assertions); |
217 | 74 | while (*remain_size > 0 && (20 pos[0][0] == '*'20 || pos[0][0] == '/'20 )) |
218 | 0 | { |
219 | 0 | const int op = pos[0][0] == '*' ? CCV_NNC_MICRO_BINARY_OP_MUL : CCV_NNC_MICRO_BINARY_OP_DIV; |
220 | 0 | *remain_size -= 1; |
221 | 0 | *pos += 1; |
222 | 0 | const ccv_nnc_micro_loop_index_term_t left = term; |
223 | 0 | const ccv_nnc_micro_loop_index_term_t right = _factor(pos, remain_size, equal_assertions); |
224 | 0 | term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY; |
225 | 0 | term.binary = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t)); |
226 | 0 | term.binary->op = op; |
227 | 0 | term.binary->left = left; |
228 | 0 | term.binary->right = right; |
229 | 0 | } |
230 | 74 | while (_accept(pos, remain_size, " ", 1)) {}0 |
231 | 74 | return term; |
232 | 74 | } |
233 | | |
234 | | static ccv_nnc_micro_loop_index_term_t _expression(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions) |
235 | 54 | { |
236 | 54 | while (_accept(pos, remain_size, " ", 1)) {}0 |
237 | 54 | int prefix_op = -1; |
238 | 54 | if (*remain_size > 0 && (pos[0][0] == '+' || pos[0][0] == '-')) |
239 | 0 | { |
240 | 0 | prefix_op = pos[0][0] == '+' ? CCV_NNC_MICRO_BINARY_OP_PLUS : CCV_NNC_MICRO_BINARY_OP_MINUS; |
241 | 0 | *remain_size -= 1; |
242 | 0 | *pos += 1; |
243 | 0 | } |
244 | 54 | ccv_nnc_micro_loop_index_term_t node = _term(pos, remain_size, equal_assertions); |
245 | 74 | while (*remain_size > 0 && (20 pos[0][0] == '+'20 || pos[0][0] == '-'8 )) |
246 | 20 | { |
247 | 20 | const int op = pos[0][0] == '+' ? CCV_NNC_MICRO_BINARY_OP_PLUS12 : CCV_NNC_MICRO_BINARY_OP_MINUS8 ; |
248 | 20 | *remain_size -= 1; |
249 | 20 | *pos += 1; |
250 | 20 | const ccv_nnc_micro_loop_index_term_t left = node; |
251 | 20 | const ccv_nnc_micro_loop_index_term_t right = _term(pos, remain_size, equal_assertions); |
252 | 20 | node.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY; |
253 | 20 | node.binary = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t)); |
254 | 20 | node.binary->op = op; |
255 | 20 | node.binary->left = left; |
256 | 20 | node.binary->right = right; |
257 | 20 | } |
258 | 54 | while (_accept(pos, remain_size, " ", 1)) {}0 |
259 | 54 | if (prefix_op >= 0) |
260 | 0 | { |
261 | 0 | ccv_nnc_micro_loop_index_binary_t* const expr = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t)); |
262 | 0 | expr->op = prefix_op; |
263 | 0 | expr->left = node; |
264 | 0 | expr->right.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_NONE; |
265 | 0 | node.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY; |
266 | 0 | node.binary = expr; |
267 | 0 | } |
268 | 54 | return node; |
269 | 54 | } |
270 | | |
271 | | static void _no_index(const ccv_nnc_micro_loop_index_term_t term) |
272 | 66 | { |
273 | 66 | switch (term.type) { |
274 | 32 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID: |
275 | | // Can only be axis size id. No loop index. |
276 | 32 | assert(term.id.type == CCV_NNC_MICRO_AXIS_SIZE_ID); |
277 | 32 | break; |
278 | 32 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: |
279 | 16 | _no_index(term.binary->left); |
280 | 16 | _no_index(term.binary->right); |
281 | 16 | break; |
282 | 66 | } |
283 | 66 | } |
284 | | |
285 | | static void _sid_to_axis_size_term(ccv_nnc_micro_loop_index_term_t* const term, const int* const sids, const int sid_count) |
286 | 94 | { |
287 | 94 | switch (term->type) { |
288 | 66 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID: |
289 | | // Can only be axis size id. No loop index. |
290 | 66 | if (term->id.type == CCV_NNC_MICRO_AXIS_SIZE_ID && term->id.id < 032 ) |
291 | 32 | { |
292 | 32 | const int id = -(term->id.id + 1); |
293 | 32 | assert(id >= 0 && id < sid_count); |
294 | 32 | term->id.id = sids[id]; |
295 | 32 | } |
296 | 66 | break; |
297 | 66 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: |
298 | 20 | _sid_to_axis_size_term(&term->binary->left, sids, sid_count); |
299 | 20 | _sid_to_axis_size_term(&term->binary->right, sids, sid_count); |
300 | 20 | break; |
301 | 94 | } |
302 | 94 | } |
303 | | |
304 | | struct ccv_nnc_micro_io_reindex_s { |
305 | | struct ccv_nnc_micro_io_s super; |
306 | | int s_count; |
307 | | ccv_nnc_micro_io_t x; |
308 | | ccv_nnc_micro_loop_index_term_t* shape; |
309 | | ccv_nnc_micro_loop_index_term_t* reindex; |
310 | | ccv_nnc_micro_io_t* ss; |
311 | | ccv_array_t* equal_assertions; |
312 | | }; |
313 | | |
314 | | static void _ccv_nnc_micro_reindex_numbering(const ccv_nnc_micro_io_t super, const int id, const int var_count) |
315 | 6 | { |
316 | 6 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super; |
317 | 6 | self->super.id = id; |
318 | | // No need to update axis size. |
319 | 6 | if (self->s_count == 0) |
320 | 0 | return; |
321 | 6 | int sids[self->s_count]; |
322 | 6 | int i; |
323 | 16 | for (i = 0; i < self->s_count; i++10 ) |
324 | 10 | sids[i] = self->ss[i]->id; |
325 | 40 | for (i = 0; i < self->super.dimensions; i++34 ) |
326 | 34 | _sid_to_axis_size_term(&self->shape[i], sids, self->s_count); |
327 | 26 | for (i = 0; i < self->x->dimensions; i++20 ) |
328 | 20 | _sid_to_axis_size_term(&self->reindex[i], sids, self->s_count); |
329 | 10 | for (i = 0; i < self->equal_assertions->rnum; i++4 ) |
330 | 4 | { |
331 | 4 | ccv_nnc_micro_id_equal_assertion_t* const equal_assertion = (ccv_nnc_micro_id_equal_assertion_t*)ccv_array_get(self->equal_assertions, i); |
332 | 4 | if (equal_assertion->left.type == CCV_NNC_MICRO_AXIS_SIZE_ID && equal_assertion->left.id < 0) |
333 | 4 | { |
334 | 4 | const int id = -(equal_assertion->left.id + 1); |
335 | 4 | assert(id >= 0 && id < self->s_count); |
336 | 4 | equal_assertion->left.id = sids[id]; |
337 | 4 | } |
338 | 4 | if (equal_assertion->right.type == CCV_NNC_MICRO_AXIS_SIZE_ID && equal_assertion->right.id < 0) |
339 | 4 | { |
340 | 4 | const int id = -(equal_assertion->right.id + 1); |
341 | 4 | assert(id >= 0 && id < self->s_count); |
342 | 4 | equal_assertion->right.id = sids[id]; |
343 | 4 | } |
344 | 4 | } |
345 | 6 | } |
346 | | |
347 | | static void _ccv_nnc_micro_reindex_equal_assertions(const ccv_nnc_micro_io_t super, ccv_array_t* const equal_assertions) |
348 | 6 | { |
349 | 6 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super; |
350 | 6 | int i; |
351 | 10 | for (i = 0; i < self->equal_assertions->rnum; i++4 ) |
352 | 4 | { |
353 | 4 | ccv_nnc_micro_id_equal_assertion_t* const equal_assertion = (ccv_nnc_micro_id_equal_assertion_t*)ccv_array_get(self->equal_assertions, i); |
354 | 4 | ccv_array_push(equal_assertions, equal_assertion); |
355 | 4 | } |
356 | 6 | } |
357 | | |
358 | | static void _ccv_nnc_bind_scalars_in_term(ccv_nnc_micro_loop_index_term_t* const term, ccv_nnc_micro_scalar_lookup_f lookup, const void* const context) |
359 | 94 | { |
360 | 94 | switch (term->type) |
361 | 94 | { |
362 | 20 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: |
363 | 20 | _ccv_nnc_bind_scalars_in_term(&term->binary->left, lookup, context); |
364 | 20 | _ccv_nnc_bind_scalars_in_term(&term->binary->right, lookup, context); |
365 | 20 | break; |
366 | 10 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_UNBOUND_SCALAR: { |
367 | 10 | char* const name = term->name; |
368 | 10 | term->id.id = lookup(context, name); |
369 | 10 | ccfree(name); |
370 | 10 | term->id.d = 0; |
371 | 10 | term->id.type = CCV_NNC_MICRO_SCALAR_ID; |
372 | 10 | term->type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID; |
373 | 10 | break; |
374 | 0 | } |
375 | 94 | } |
376 | 94 | } |
377 | | |
378 | | static void _ccv_nnc_micro_reindex_bind_scalars(const ccv_nnc_micro_io_t super, ccv_nnc_micro_scalar_lookup_f lookup, const void* const context) |
379 | 6 | { |
380 | 6 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super; |
381 | 6 | int i; |
382 | 40 | for (i = 0; i < self->super.dimensions; i++34 ) |
383 | 34 | _ccv_nnc_bind_scalars_in_term(&self->shape[i], lookup, context); |
384 | 26 | for (i = 0; i < self->x->dimensions; i++20 ) |
385 | 20 | _ccv_nnc_bind_scalars_in_term(&self->reindex[i], lookup, context); |
386 | 6 | } |
387 | | |
388 | | ccv_nnc_micro_loop_index_term_t ccv_nnc_micro_loop_index_deep_copy(const ccv_nnc_micro_loop_index_term_t* const term) |
389 | 197 | { |
390 | 197 | switch (term->type) |
391 | 197 | { |
392 | 36 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: { |
393 | 36 | ccv_nnc_micro_loop_index_term_t copy = *term; |
394 | 36 | copy.binary = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t)); |
395 | 36 | *copy.binary = *term->binary; |
396 | 36 | copy.binary->left = ccv_nnc_micro_loop_index_deep_copy(&term->binary->left); |
397 | 36 | copy.binary->right = ccv_nnc_micro_loop_index_deep_copy(&term->binary->right); |
398 | 36 | return copy; |
399 | 0 | } |
400 | 0 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_NONE: |
401 | 118 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID: |
402 | 161 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL: |
403 | 161 | case CCV_NNC_MICRO_LOOP_INDEX_TYPE_UNBOUND_SCALAR: |
404 | 161 | return *term; |
405 | 197 | } |
406 | 0 | return *term; |
407 | 197 | } |
408 | | |
409 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reindex_emit(const ccv_nnc_micro_io_t super) |
410 | 12 | { |
411 | 12 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super; |
412 | 12 | const int loop_count = self->super.dimensions; |
413 | 12 | assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC); |
414 | 12 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
415 | 12 | int i; |
416 | 80 | for (i = 0; i < loop_count; i++68 ) |
417 | 68 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i); |
418 | 12 | const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment( |
419 | 12 | ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)), |
420 | 12 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, self->x->dimensions, self->reindex)) |
421 | 12 | ); |
422 | 52 | for (i = 0; i < self->x->dimensions; i++40 ) |
423 | 40 | self->reindex[i] = ccv_nnc_micro_loop_index_deep_copy(&self->reindex[i]); |
424 | 12 | loops[loop_count - 1].statement_count = 1; |
425 | 12 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
426 | 12 | loops[loop_count - 1].statements[0] = statement; |
427 | 12 | return (ccv_nnc_micro_function_t){ |
428 | 12 | .block_count = 1, |
429 | 12 | .one_block = { |
430 | 12 | .loop_count = loop_count, |
431 | 12 | .loops = loops |
432 | 12 | } |
433 | 12 | }; |
434 | 12 | } |
435 | | |
436 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reindex_emit_grad(const ccv_nnc_micro_io_t super, const int var_count) |
437 | 6 | { |
438 | | // The grad is var_count + original id. |
439 | 6 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super; |
440 | 6 | const int reset_loop_count = self->x->dimensions; |
441 | 6 | ccv_nnc_micro_loop_t* const reset_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * reset_loop_count); |
442 | | // This loop reset grad to 0. |
443 | 6 | int i; |
444 | 26 | for (i = 0; i < reset_loop_count; i++20 ) |
445 | 20 | reset_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i); |
446 | 6 | const ccv_nnc_micro_loop_statement_t reset_statement = ccv_nnc_micro_loop_assignment( |
447 | 6 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), reset_loop_count, ccv_nnc_micro_index_of_loops(reset_loops, reset_loop_count)), |
448 | 6 | ccv_nnc_micro_loop_expression_of_value(0) |
449 | 6 | ); |
450 | 6 | reset_loops[reset_loop_count - 1].statement_count = 1; |
451 | 6 | reset_loops[reset_loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
452 | 6 | reset_loops[reset_loop_count - 1].statements[0] = reset_statement; |
453 | 6 | const int loop_count = self->super.dimensions; |
454 | 6 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
455 | 40 | for (i = 0; i < loop_count; i++34 ) |
456 | 34 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i); |
457 | 6 | const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_compound_assignment_of_tensor( |
458 | 6 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), self->x->dimensions, self->reindex), |
459 | 6 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))) |
460 | 6 | ); |
461 | 26 | for (i = 0; i < self->x->dimensions; i++20 ) |
462 | 20 | self->reindex[i] = ccv_nnc_micro_loop_index_deep_copy(&self->reindex[i]); |
463 | 6 | loops[loop_count - 1].statement_count = 1; |
464 | 6 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
465 | 6 | loops[loop_count - 1].statements[0] = statement; |
466 | 6 | ccv_nnc_micro_loop_block_t* const blocks = (ccv_nnc_micro_loop_block_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_block_t) * 2); |
467 | 6 | blocks[0] = (ccv_nnc_micro_loop_block_t){ |
468 | 6 | .loop_count = reset_loop_count, |
469 | 6 | .loops = reset_loops |
470 | 6 | }; |
471 | 6 | blocks[1] = (ccv_nnc_micro_loop_block_t){ |
472 | 6 | .loop_count = loop_count, |
473 | 6 | .loops = loops |
474 | 6 | }; |
475 | 6 | return (ccv_nnc_micro_function_t){ |
476 | 6 | .block_count = 2, |
477 | 6 | .blocks = blocks |
478 | 6 | }; |
479 | 6 | } |
480 | | |
481 | | static ccv_nnc_micro_tensor_t _ccv_nnc_micro_reindex_return_shape(const ccv_nnc_micro_io_t super) |
482 | 6 | { |
483 | 6 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super; |
484 | 6 | ccv_nnc_micro_tensor_t var = {}; |
485 | 6 | var.dimensions = self->super.dimensions; |
486 | 6 | var.sibling = -1; |
487 | 6 | var.input = self->x->id; |
488 | 6 | var.shape = (ccv_nnc_micro_loop_index_term_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions); |
489 | 6 | memcpy(var.shape, self->shape, sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions); |
490 | 6 | return var; |
491 | 6 | } |
492 | | |
493 | | static void _ccv_nnc_micro_reindex_deinit(const ccv_nnc_micro_io_t super) |
494 | 6 | { |
495 | 6 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super; |
496 | 6 | int i; |
497 | 26 | for (i = 0; i < self->x->dimensions; i++20 ) |
498 | 20 | ccv_nnc_micro_loop_index_free(&self->reindex[i]); |
499 | 6 | ccv_array_free(self->equal_assertions); |
500 | 6 | } |
501 | | |
502 | | static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_reindex_isa = { |
503 | | .numbering = _ccv_nnc_micro_reindex_numbering, |
504 | | .equal_assertions = _ccv_nnc_micro_reindex_equal_assertions, |
505 | | .bind_scalars = _ccv_nnc_micro_reindex_bind_scalars, |
506 | | .emit = _ccv_nnc_micro_reindex_emit, |
507 | | .emit_grad = _ccv_nnc_micro_reindex_emit_grad, |
508 | | .return_shape = _ccv_nnc_micro_reindex_return_shape, |
509 | | .deinit = _ccv_nnc_micro_reindex_deinit |
510 | | }; |
511 | | |
512 | | ccv_nnc_micro_io_t ccv_nnc_micro_reindex(const char* const* const shape, const int shape_count, const ccv_nnc_micro_io_t* const ss, const int s_count, const char* const* const reindex, const int reindex_count, const ccv_nnc_micro_io_t x) |
513 | 6 | { |
514 | 6 | assert(shape_count <= CCV_NNC_MAX_DIM_ALLOC); |
515 | 6 | assert(reindex_count <= CCV_NNC_MAX_DIM_ALLOC); |
516 | 6 | assert(reindex_count == x->dimensions); |
517 | 6 | int i; |
518 | 6 | struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_reindex_s) + sizeof(ccv_nnc_micro_loop_index_term_t) * (shape_count + reindex_count) + sizeof(ccv_nnc_micro_io_t) * (s_count + 1)); |
519 | 6 | self->super.isa = &ccv_nnc_micro_io_reindex_isa; |
520 | 6 | self->super.dimensions = shape_count; |
521 | 6 | self->super.id = 0; |
522 | 6 | self->x = x; |
523 | 6 | self->shape = (ccv_nnc_micro_loop_index_term_t*)(self + 1); |
524 | 6 | self->reindex = self->shape + shape_count; |
525 | 6 | self->ss = (ccv_nnc_micro_io_t*)(self->reindex + reindex_count); |
526 | 6 | self->s_count = s_count; |
527 | 6 | self->ss[s_count] = x; |
528 | 6 | self->super.inputs = self->ss; |
529 | 6 | self->super.input_size = s_count + 1; |
530 | 6 | if (s_count > 0) |
531 | 6 | memcpy(self->ss, ss, sizeof(ccv_nnc_micro_io_t) * s_count); |
532 | 6 | ccv_array_t* const equal_assertions = self->equal_assertions = ccv_array_new(sizeof(ccv_nnc_micro_id_equal_assertion_t), 0, 0); |
533 | | // Parse shape into expressions and validate the grammar. Do this upfront so we don't fail on parsing |
534 | | // later, which can be confusing. |
535 | | // CFG: |
536 | | // VAR -> $[a-zA-Z0-9]+ |
537 | | // DIM -> d[A-Z]{1}[0-9]+ |
538 | | // INDEX -> i[0-9]+ |
539 | | // CONST -> [0-9]+ |
540 | | // FACTOR -> VAR | DIM | CONST | INDEX |
541 | | // TERM -> FACTOR { ("*" | "/") FACTOR } |
542 | | // EXPRESSION -> ["+" | "-"] TERM { ("+" | "-") TERM } |
543 | | // Also, we choose to reuse the index expression structure even some information (such as id of tensors |
544 | | // and the binding variables) not available. In this way, there is no need to reallocate index expression |
545 | | // later, we just need to simply "patch" it in ccv_nnc_micro_combine_t. |
546 | 40 | for (i = 0; i < shape_count; i++34 ) |
547 | 34 | { |
548 | 34 | int remain_size = strlen(shape[i]); |
549 | 34 | const char* pos = shape[i]; |
550 | 34 | ccv_nnc_micro_loop_index_term_t term = _expression(&pos, &remain_size, equal_assertions); |
551 | 34 | _no_index(term); // Make sure this is not index, no loop index. |
552 | 34 | self->shape[i] = term; |
553 | 34 | } |
554 | | // Parse reindex. |
555 | 26 | for (i = 0; i < reindex_count; i++20 ) |
556 | 20 | { |
557 | 20 | int remain_size = strlen(reindex[i]); |
558 | 20 | const char* pos = reindex[i]; |
559 | 20 | self->reindex[i] = _expression(&pos, &remain_size, equal_assertions); |
560 | 20 | } |
561 | 6 | return (ccv_nnc_micro_io_t)self; |
562 | 6 | } |
563 | | |
564 | | struct ccv_nnc_micro_io_unary_s { |
565 | | struct ccv_nnc_micro_io_s super; |
566 | | uint32_t unary_op; |
567 | | ccv_nnc_micro_io_t x; |
568 | | }; |
569 | | |
570 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_unary_emit(const ccv_nnc_micro_io_t super) |
571 | 0 | { |
572 | 0 | struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)super; |
573 | 0 | const int loop_count = self->super.dimensions; |
574 | 0 | assert(self->x->dimensions == loop_count); |
575 | 0 | assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC); |
576 | 0 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
577 | 0 | int i; |
578 | 0 | for (i = 0; i < loop_count; i++) |
579 | 0 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i); |
580 | 0 | const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment( |
581 | 0 | ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)), |
582 | 0 | ccv_nnc_micro_loop_expression_of_unary( |
583 | 0 | self->unary_op, |
584 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))) |
585 | 0 | ) |
586 | 0 | ); |
587 | 0 | loops[loop_count - 1].statement_count = 1; |
588 | 0 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
589 | 0 | loops[loop_count - 1].statements[0] = statement; |
590 | 0 | return (ccv_nnc_micro_function_t){ |
591 | 0 | .block_count = 1, |
592 | 0 | .one_block = { |
593 | 0 | .loop_count = loop_count, |
594 | 0 | .loops = loops |
595 | 0 | } |
596 | 0 | }; |
597 | 0 | } |
598 | | |
599 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_unary_emit_grad(const ccv_nnc_micro_io_t super, const int var_count) |
600 | 0 | { |
601 | 0 | struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)super; |
602 | 0 | const int loop_count = self->super.dimensions; |
603 | 0 | assert(self->x->dimensions == loop_count); |
604 | 0 | assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC); |
605 | 0 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
606 | 0 | int i; |
607 | 0 | for (i = 0; i < loop_count; i++) |
608 | 0 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i); |
609 | 0 | ccv_nnc_micro_loop_statement_t statement; |
610 | 0 | switch (self->unary_op) |
611 | 0 | { |
612 | 0 | case CCV_NNC_MICRO_UNARY_OP_NEG: |
613 | 0 | statement = ccv_nnc_micro_loop_assignment( |
614 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)), |
615 | 0 | ccv_nnc_micro_loop_expression_of_unary( |
616 | 0 | CCV_NNC_MICRO_UNARY_OP_NEG, |
617 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))) |
618 | 0 | ) |
619 | 0 | ); |
620 | 0 | break; |
621 | 0 | case CCV_NNC_MICRO_UNARY_OP_EXP: |
622 | 0 | statement = ccv_nnc_micro_loop_assignment( |
623 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)), |
624 | 0 | ccv_nnc_micro_loop_expression_of_binary( |
625 | 0 | CCV_NNC_MICRO_BINARY_OP_MUL, |
626 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))), |
627 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))) |
628 | 0 | ) |
629 | 0 | ); |
630 | 0 | break; |
631 | 0 | case CCV_NNC_MICRO_UNARY_OP_LOG: |
632 | 0 | statement = ccv_nnc_micro_loop_assignment( |
633 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)), |
634 | 0 | ccv_nnc_micro_loop_expression_of_binary( |
635 | 0 | CCV_NNC_MICRO_BINARY_OP_DIV, |
636 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))), |
637 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))) |
638 | 0 | ) |
639 | 0 | ); |
640 | 0 | break; |
641 | 0 | } |
642 | 0 | loops[loop_count - 1].statement_count = 1; |
643 | 0 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
644 | 0 | loops[loop_count - 1].statements[0] = statement; |
645 | 0 | return (ccv_nnc_micro_function_t){ |
646 | 0 | .block_count = 1, |
647 | 0 | .one_block = { |
648 | 0 | .loop_count = loop_count, |
649 | 0 | .loops = loops |
650 | 0 | } |
651 | 0 | }; |
652 | 0 | } |
653 | | |
654 | | static ccv_nnc_micro_tensor_t _ccv_nnc_micro_unary_return_shape(const ccv_nnc_micro_io_t super) |
655 | 0 | { |
656 | 0 | struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)super; |
657 | 0 | ccv_nnc_micro_tensor_t var = {}; |
658 | 0 | var.dimensions = self->super.dimensions; |
659 | 0 | var.input = self->x->id; |
660 | 0 | var.sibling = -1; |
661 | 0 | return var; |
662 | 0 | } |
663 | | |
664 | | static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_unary_isa = { |
665 | | .emit = _ccv_nnc_micro_unary_emit, |
666 | | .emit_grad = _ccv_nnc_micro_unary_emit_grad, |
667 | | .return_shape = _ccv_nnc_micro_unary_return_shape |
668 | | }; |
669 | | |
670 | | ccv_nnc_micro_io_t ccv_nnc_micro_unary(const uint32_t op, const ccv_nnc_micro_io_t x) |
671 | 0 | { |
672 | 0 | struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_unary_s)); |
673 | 0 | self->super.isa = &ccv_nnc_micro_io_unary_isa; |
674 | 0 | self->super.dimensions = x->dimensions; |
675 | 0 | self->super.id = 0; |
676 | 0 | self->super.inputs = &self->x; |
677 | 0 | self->super.input_size = 1; |
678 | 0 | self->unary_op = op; |
679 | 0 | self->x = x; |
680 | 0 | return (ccv_nnc_micro_io_t)self; |
681 | 0 | } |
682 | | |
683 | | struct ccv_nnc_micro_io_binary_s { |
684 | | struct ccv_nnc_micro_io_s super; |
685 | | uint32_t binary_op; |
686 | | ccv_nnc_micro_io_t left; |
687 | | ccv_nnc_micro_io_t right; |
688 | | }; |
689 | | |
690 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_binary_emit(const ccv_nnc_micro_io_t super) |
691 | 6 | { |
692 | 6 | struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)super; |
693 | 6 | const int loop_count = self->super.dimensions; |
694 | 6 | assert(self->left->dimensions == loop_count); |
695 | 6 | assert(self->right->dimensions == loop_count); |
696 | 6 | assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC); |
697 | 6 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
698 | 6 | int i; |
699 | 40 | for (i = 0; i < loop_count; i++34 ) |
700 | 34 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i); |
701 | 6 | const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment( |
702 | 6 | ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)), |
703 | 6 | ccv_nnc_micro_loop_expression_of_binary( |
704 | 6 | self->binary_op, |
705 | 6 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))), |
706 | 6 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))) |
707 | 6 | ) |
708 | 6 | ); |
709 | 6 | loops[loop_count - 1].statement_count = 1; |
710 | 6 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
711 | 6 | loops[loop_count - 1].statements[0] = statement; |
712 | 6 | return (ccv_nnc_micro_function_t){ |
713 | 6 | .block_count = 1, |
714 | 6 | .one_block = { |
715 | 6 | .loop_count = loop_count, |
716 | 6 | .loops = loops |
717 | 6 | } |
718 | 6 | }; |
719 | 6 | } |
720 | | |
721 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_binary_emit_grad(const ccv_nnc_micro_io_t super, const int var_count) |
722 | 3 | { |
723 | 3 | struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)super; |
724 | 3 | const int loop_count = self->super.dimensions; |
725 | 3 | assert(self->left->dimensions == loop_count); |
726 | 3 | assert(self->right->dimensions == loop_count); |
727 | 3 | assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC); |
728 | 3 | int i; |
729 | 3 | ccv_nnc_micro_loop_t* const left_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
730 | 20 | for (i = 0; i < loop_count; i++17 ) |
731 | 17 | left_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i); |
732 | 3 | ccv_nnc_micro_loop_t* const right_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
733 | 20 | for (i = 0; i < loop_count; i++17 ) |
734 | 17 | right_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i); |
735 | 3 | ccv_nnc_micro_loop_statement_t left_statement; |
736 | 3 | ccv_nnc_micro_loop_statement_t right_statement; |
737 | 3 | switch (self->binary_op) |
738 | 3 | { |
739 | 0 | case CCV_NNC_MICRO_BINARY_OP_DIV: |
740 | 0 | left_statement = ccv_nnc_micro_loop_assignment( |
741 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)), |
742 | 0 | ccv_nnc_micro_loop_expression_of_binary( |
743 | 0 | CCV_NNC_MICRO_BINARY_OP_DIV, |
744 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))), |
745 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
746 | 0 | ) |
747 | 0 | ); |
748 | 0 | right_statement = ccv_nnc_micro_loop_assignment( |
749 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)), |
750 | 0 | ccv_nnc_micro_loop_expression_of_binary( |
751 | 0 | CCV_NNC_MICRO_BINARY_OP_MUL, |
752 | 0 | ccv_nnc_micro_loop_expression_of_binary( |
753 | 0 | CCV_NNC_MICRO_BINARY_OP_DIV, |
754 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))), |
755 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
756 | 0 | ), |
757 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
758 | 0 | ) |
759 | 0 | ); |
760 | 0 | break; |
761 | 3 | case CCV_NNC_MICRO_BINARY_OP_MUL: |
762 | 3 | left_statement = ccv_nnc_micro_loop_assignment( |
763 | 3 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)), |
764 | 3 | ccv_nnc_micro_loop_expression_of_binary( |
765 | 3 | CCV_NNC_MICRO_BINARY_OP_MUL, |
766 | 3 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))), |
767 | 3 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
768 | 3 | ) |
769 | 3 | ); |
770 | 3 | right_statement = ccv_nnc_micro_loop_assignment( |
771 | 3 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)), |
772 | 3 | ccv_nnc_micro_loop_expression_of_binary( |
773 | 3 | CCV_NNC_MICRO_BINARY_OP_MUL, |
774 | 3 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))), |
775 | 3 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
776 | 3 | ) |
777 | 3 | ); |
778 | 3 | break; |
779 | 0 | case CCV_NNC_MICRO_BINARY_OP_PLUS: |
780 | 0 | left_statement = ccv_nnc_micro_loop_assignment( |
781 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)), |
782 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
783 | 0 | ); |
784 | 0 | right_statement = ccv_nnc_micro_loop_assignment( |
785 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)), |
786 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
787 | 0 | ); |
788 | 0 | break; |
789 | 0 | case CCV_NNC_MICRO_BINARY_OP_MINUS: |
790 | 0 | left_statement = ccv_nnc_micro_loop_assignment( |
791 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)), |
792 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
793 | 0 | ); |
794 | 0 | right_statement = ccv_nnc_micro_loop_assignment( |
795 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)), |
796 | 0 | ccv_nnc_micro_loop_expression_of_unary( |
797 | 0 | CCV_NNC_MICRO_UNARY_OP_NEG, |
798 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
799 | 0 | ) |
800 | 0 | ); |
801 | 0 | break; |
802 | 0 | case CCV_NNC_MICRO_BINARY_OP_MIN: |
803 | 0 | left_statement = ccv_nnc_micro_loop_assignment( |
804 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)), |
805 | 0 | ccv_nnc_micro_loop_expression_of_ternary( |
806 | 0 | ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN, |
807 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))), |
808 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
809 | 0 | ), |
810 | 0 | ccv_nnc_micro_loop_expression_of_value(0), |
811 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
812 | 0 | ) |
813 | 0 | ); |
814 | 0 | right_statement = ccv_nnc_micro_loop_assignment( |
815 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)), |
816 | 0 | ccv_nnc_micro_loop_expression_of_ternary( |
817 | 0 | ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN, |
818 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))), |
819 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
820 | 0 | ), |
821 | 0 | ccv_nnc_micro_loop_expression_of_value(0), |
822 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
823 | 0 | ) |
824 | 0 | ); |
825 | 0 | break; |
826 | 0 | case CCV_NNC_MICRO_BINARY_OP_MAX: |
827 | 0 | left_statement = ccv_nnc_micro_loop_assignment( |
828 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)), |
829 | 0 | ccv_nnc_micro_loop_expression_of_ternary( |
830 | 0 | ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN, |
831 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))), |
832 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
833 | 0 | ), |
834 | 0 | ccv_nnc_micro_loop_expression_of_value(0), |
835 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))) |
836 | 0 | ) |
837 | 0 | ); |
838 | 0 | right_statement = ccv_nnc_micro_loop_assignment( |
839 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)), |
840 | 0 | ccv_nnc_micro_loop_expression_of_ternary( |
841 | 0 | ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN, |
842 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))), |
843 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
844 | 0 | ), |
845 | 0 | ccv_nnc_micro_loop_expression_of_value(0), |
846 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))) |
847 | 0 | ) |
848 | 0 | ); |
849 | 0 | break; |
850 | 3 | } |
851 | 3 | left_loops[loop_count - 1].statement_count = 1; |
852 | 3 | left_loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
853 | 3 | left_loops[loop_count - 1].statements[0] = left_statement; |
854 | 3 | right_loops[loop_count - 1].statement_count = 1; |
855 | 3 | right_loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
856 | 3 | right_loops[loop_count - 1].statements[0] = right_statement; |
857 | 3 | ccv_nnc_micro_loop_block_t* const blocks = (ccv_nnc_micro_loop_block_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_block_t) * 2); |
858 | 3 | blocks[0] = (ccv_nnc_micro_loop_block_t){ |
859 | 3 | .loop_count = loop_count, |
860 | 3 | .loops = left_loops |
861 | 3 | }; |
862 | 3 | blocks[1] = (ccv_nnc_micro_loop_block_t){ |
863 | 3 | .loop_count = loop_count, |
864 | 3 | .loops = right_loops |
865 | 3 | }; |
866 | 3 | return (ccv_nnc_micro_function_t){ |
867 | 3 | .block_count = 2, |
868 | 3 | .blocks = blocks |
869 | 3 | }; |
870 | 3 | } |
871 | | |
872 | | static ccv_nnc_micro_tensor_t _ccv_nnc_micro_binary_return_shape(const ccv_nnc_micro_io_t super) |
873 | 3 | { |
874 | 3 | struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)super; |
875 | 3 | ccv_nnc_micro_tensor_t var = {}; |
876 | 3 | var.dimensions = self->super.dimensions; |
877 | 3 | var.input = self->left->id; |
878 | 3 | var.sibling = self->right->id; |
879 | 3 | return var; |
880 | 3 | } |
881 | | |
882 | | static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_binary_isa = { |
883 | | .emit = _ccv_nnc_micro_binary_emit, |
884 | | .emit_grad = _ccv_nnc_micro_binary_emit_grad, |
885 | | .return_shape = _ccv_nnc_micro_binary_return_shape |
886 | | }; |
887 | | |
888 | | ccv_nnc_micro_io_t ccv_nnc_micro_binary(const uint32_t op, const ccv_nnc_micro_io_t x, const ccv_nnc_micro_io_t y) |
889 | 3 | { |
890 | 3 | struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_binary_s)); |
891 | 3 | self->super.isa = &ccv_nnc_micro_io_binary_isa; |
892 | 3 | self->super.dimensions = x->dimensions; |
893 | 3 | self->super.id = 0; |
894 | 3 | self->super.inputs = &self->left; |
895 | 3 | self->super.input_size = 2; |
896 | 3 | self->binary_op = op; |
897 | 3 | self->left = x; |
898 | 3 | self->right = y; |
899 | 3 | assert(x->dimensions == y->dimensions); |
900 | 3 | return (ccv_nnc_micro_io_t)self; |
901 | 3 | } |
902 | | |
903 | | struct ccv_nnc_micro_io_reduce_s { |
904 | | struct ccv_nnc_micro_io_s super; |
905 | | uint32_t reduce_op; |
906 | | int axis_count; |
907 | | ccv_nnc_micro_io_t x; |
908 | | int axis[1]; |
909 | | }; |
910 | | |
911 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reduce_emit(const ccv_nnc_micro_io_t super) |
912 | 6 | { |
913 | 6 | struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)super; |
914 | 6 | const int loop_count = self->super.dimensions; |
915 | 6 | assert(self->x->dimensions == loop_count); |
916 | | // If axis_count == loop_count, we need extra loop to reduce. |
917 | 6 | int has_extra_loop = (self->axis_count == loop_count); |
918 | 6 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * (loop_count + has_extra_loop)); |
919 | 6 | int i, j, k; |
920 | 6 | int8_t reduce_axis[loop_count]; |
921 | 6 | memset(reduce_axis, 0, sizeof(int8_t) * loop_count); |
922 | 20 | for (i = 0; i < self->axis_count; i++14 ) |
923 | 14 | reduce_axis[self->axis[i]] = 1; |
924 | 6 | j = 0; |
925 | | // If loop_count == reduce_axis_count, we have extra loop for carried variables and blocks. |
926 | 6 | if (has_extra_loop) |
927 | 0 | { |
928 | 0 | loops[0] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_value(1), 0); |
929 | 0 | k = 1; |
930 | 0 | } else |
931 | 6 | k = loop_count - self->axis_count; |
932 | 40 | for (i = 0; i < loop_count; i++34 ) |
933 | 34 | if (reduce_axis[i]) |
934 | 14 | { |
935 | 14 | loops[k] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->x->id, i), i + has_extra_loop); |
936 | 14 | ++k; |
937 | 20 | } else { |
938 | 20 | loops[j] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->x->id, i), i + has_extra_loop); |
939 | 20 | ++j; |
940 | 20 | } |
941 | 6 | const int carried_loop_idx = has_extra_loop ? 00 : loop_count - self->axis_count - 1; |
942 | 6 | loops[carried_loop_idx].carried_count = 1; |
943 | 6 | loops[carried_loop_idx].carrieds = (ccv_nnc_micro_loop_carried_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_carried_t)); |
944 | 6 | loops[carried_loop_idx].carrieds[0] = ccv_nnc_micro_loop_carried(self->reduce_op, 0); |
945 | 6 | j = 0; |
946 | 6 | k = has_extra_loop ? 10 : loop_count - self->axis_count; |
947 | | // If loop_count == reduce_axis_count, we have extra loop for carrieds and block. |
948 | 6 | ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC]; |
949 | 40 | for (i = 0; i < loop_count; i++34 ) |
950 | 34 | if (reduce_axis[i]) |
951 | 14 | { |
952 | 14 | index[i] = ccv_nnc_micro_index_of_id(loops[k].id); |
953 | 14 | ++k; |
954 | 20 | } else { |
955 | 20 | index[i] = ccv_nnc_micro_index_of_id(loops[j].id); |
956 | 20 | ++j; |
957 | 20 | } |
958 | 6 | ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_compound_assignment_of_id( |
959 | 6 | loops[carried_loop_idx].carrieds[0].id, |
960 | 6 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, index)) |
961 | 6 | ); |
962 | 6 | loops[carried_loop_idx + self->axis_count].statement_count = 1; |
963 | 6 | loops[carried_loop_idx + self->axis_count].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
964 | 6 | loops[carried_loop_idx + self->axis_count].statements[0] = statement; |
965 | 6 | j = 0; |
966 | 40 | for (i = 0; i < loop_count; i++34 ) |
967 | 34 | if (reduce_axis[i]) |
968 | 14 | index[i] = ccv_nnc_micro_index_of_value(0); |
969 | 20 | else { |
970 | 20 | index[i] = ccv_nnc_micro_index_of_id(loops[j].id); |
971 | 20 | ++j; |
972 | 20 | } |
973 | 6 | statement = ccv_nnc_micro_loop_assignment( |
974 | 6 | ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, index), |
975 | 6 | ccv_nnc_micro_loop_expression_of_id(loops[carried_loop_idx].carrieds[0].id) |
976 | 6 | ); |
977 | 6 | loops[carried_loop_idx].statement_count = 1; |
978 | 6 | loops[carried_loop_idx].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
979 | 6 | loops[carried_loop_idx].statements[0] = statement; |
980 | 6 | return (ccv_nnc_micro_function_t){ |
981 | 6 | .block_count = 1, |
982 | 6 | .one_block = { |
983 | 6 | .carried_count = 1, |
984 | 6 | .loop_count = loop_count + has_extra_loop, |
985 | 6 | .loops = loops |
986 | 6 | } |
987 | 6 | }; |
988 | 6 | } |
989 | | |
990 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reduce_emit_grad(const ccv_nnc_micro_io_t super, const int var_count) |
991 | 3 | { |
992 | 3 | struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)super; |
993 | 3 | assert(self->reduce_op == CCV_NNC_MICRO_REDUCE_OP_SUM); // I haven't figure out how to do mean without add additional opcode. |
994 | 3 | const int loop_count = self->super.dimensions; |
995 | 3 | assert(self->x->dimensions == loop_count); |
996 | 3 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
997 | 3 | int i, j, k; |
998 | 3 | int8_t reduce_axis[loop_count]; |
999 | 3 | memset(reduce_axis, 0, sizeof(int8_t) * loop_count); |
1000 | 10 | for (i = 0; i < self->axis_count; i++7 ) |
1001 | 7 | reduce_axis[self->axis[i]] = 1; |
1002 | 3 | j = 0; |
1003 | 3 | k = loop_count - self->axis_count; |
1004 | 20 | for (i = 0; i < loop_count; i++17 ) |
1005 | 17 | if (reduce_axis[i]) |
1006 | 7 | { |
1007 | 7 | loops[k] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i); |
1008 | 7 | ++k; |
1009 | 10 | } else { |
1010 | 10 | loops[j] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i); |
1011 | 10 | ++j; |
1012 | 10 | } |
1013 | 3 | j = 0; |
1014 | 3 | k = loop_count - self->axis_count; |
1015 | | // If loop_count == reduce_axis_count, we have extra loop for carrieds and block. |
1016 | 3 | ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC]; |
1017 | 20 | for (i = 0; i < loop_count; i++17 ) |
1018 | 17 | if (reduce_axis[i]) |
1019 | 7 | { |
1020 | 7 | index[i] = ccv_nnc_micro_index_of_id(loops[k].id); |
1021 | 7 | ++k; |
1022 | 10 | } else { |
1023 | 10 | index[i] = ccv_nnc_micro_index_of_id(loops[j].id); |
1024 | 10 | ++j; |
1025 | 10 | } |
1026 | 3 | j = 0; |
1027 | 3 | ccv_nnc_micro_loop_index_term_t reduced_index[CCV_NNC_MAX_DIM_ALLOC]; |
1028 | 20 | for (i = 0; i < loop_count; i++17 ) |
1029 | 17 | if (reduce_axis[i]) |
1030 | 7 | reduced_index[i] = ccv_nnc_micro_index_of_value(0); |
1031 | 10 | else { |
1032 | 10 | reduced_index[i] = ccv_nnc_micro_index_of_id(loops[j].id); |
1033 | 10 | ++j; |
1034 | 10 | } |
1035 | 3 | ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment( |
1036 | 3 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, index), |
1037 | 3 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, reduced_index)) |
1038 | 3 | ); |
1039 | 3 | loops[loop_count - 1].statement_count = 1; |
1040 | 3 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
1041 | 3 | loops[loop_count - 1].statements[0] = statement; |
1042 | 3 | return (ccv_nnc_micro_function_t){ |
1043 | 3 | .block_count = 1, |
1044 | 3 | .one_block = { |
1045 | 3 | .carried_count = 1, |
1046 | 3 | .loop_count = loop_count, |
1047 | 3 | .loops = loops |
1048 | 3 | } |
1049 | 3 | }; |
1050 | 3 | } |
1051 | | |
1052 | | static ccv_nnc_micro_tensor_t _ccv_nnc_micro_reduce_return_shape(const ccv_nnc_micro_io_t super) |
1053 | 3 | { |
1054 | 3 | struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)super; |
1055 | 3 | ccv_nnc_micro_tensor_t var = {}; |
1056 | 3 | var.dimensions = self->super.dimensions; |
1057 | 3 | var.input = self->x->id; |
1058 | 3 | var.sibling = -1; |
1059 | 3 | var.shape = (ccv_nnc_micro_loop_index_term_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions); |
1060 | 3 | int i; |
1061 | 20 | for (i = 0; i < self->super.dimensions; i++17 ) |
1062 | 17 | var.shape[i] = ccv_nnc_micro_index_of_axis_size(self->x->id, i); |
1063 | 10 | for (i = 0; i < self->axis_count; i++7 ) |
1064 | 7 | var.shape[self->axis[i]] = ccv_nnc_micro_index_of_value(1); |
1065 | 3 | return var; |
1066 | 3 | } |
1067 | | |
1068 | | static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_reduce_isa = { |
1069 | | .emit = _ccv_nnc_micro_reduce_emit, |
1070 | | .emit_grad = _ccv_nnc_micro_reduce_emit_grad, |
1071 | | .return_shape = _ccv_nnc_micro_reduce_return_shape |
1072 | | }; |
1073 | | |
1074 | | ccv_nnc_micro_io_t ccv_nnc_micro_reduce(const uint8_t op, const int* const axis, const int axis_count, const ccv_nnc_micro_io_t x) |
1075 | 3 | { |
1076 | 3 | struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_reduce_s) + sizeof(int) * (axis_count - 1)); |
1077 | 3 | self->super.isa = &ccv_nnc_micro_io_reduce_isa; |
1078 | 3 | self->super.dimensions = x->dimensions; |
1079 | 3 | self->super.id = 0; |
1080 | 3 | self->super.inputs = &self->x; |
1081 | 3 | self->super.input_size = 1; |
1082 | 3 | self->reduce_op = op; |
1083 | 3 | self->x = x; |
1084 | 3 | self->axis_count = axis_count; |
1085 | 3 | assert(axis_count <= x->dimensions); |
1086 | 3 | int i; |
1087 | 10 | for (i = 0; i < axis_count; i++7 ) |
1088 | 7 | { assert(axis[i] < x->dimensions); } |
1089 | 3 | memcpy(self->axis, axis, sizeof(int) * axis_count); |
1090 | 3 | return (ccv_nnc_micro_io_t)self; |
1091 | 3 | } |
1092 | | |
1093 | | struct ccv_nnc_micro_io_select_s { |
1094 | | struct ccv_nnc_micro_io_s super; |
1095 | | int axis; |
1096 | | ccv_nnc_micro_io_t x; |
1097 | | ccv_nnc_micro_io_t index; |
1098 | | }; |
1099 | | |
1100 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_select_emit(const ccv_nnc_micro_io_t super) |
1101 | 0 | { |
1102 | 0 | struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)super; |
1103 | 0 | const int loop_count = self->super.dimensions; |
1104 | 0 | assert(self->x->dimensions == loop_count); |
1105 | 0 | assert(self->index->dimensions == loop_count); |
1106 | 0 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
1107 | 0 | int i; |
1108 | 0 | for (i = 0; i < loop_count; i++) |
1109 | 0 | { |
1110 | 0 | if (i == self->axis) |
1111 | 0 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_value(1), i); |
1112 | 0 | else |
1113 | 0 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i); |
1114 | 0 | } |
1115 | 0 | ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC]; |
1116 | 0 | for (i = 0; i < loop_count; i++) |
1117 | 0 | { |
1118 | 0 | if (i == self->axis) |
1119 | 0 | index[i] = ccv_nnc_micro_index_of_id(ccv_nnc_micro_id_of_tensor(self->index->id)); |
1120 | 0 | else |
1121 | 0 | index[i] = ccv_nnc_micro_index_of_id(loops[i].id); |
1122 | 0 | } |
1123 | 0 | const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment( |
1124 | 0 | ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)), |
1125 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, index)) |
1126 | 0 | ); |
1127 | 0 | loops[loop_count - 1].statement_count = 1; |
1128 | 0 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
1129 | 0 | loops[loop_count - 1].statements[0] = statement; |
1130 | 0 | return (ccv_nnc_micro_function_t){ |
1131 | 0 | .block_count = 1, |
1132 | 0 | .one_block = { |
1133 | 0 | .loop_count = loop_count, |
1134 | 0 | .loops = loops |
1135 | 0 | } |
1136 | 0 | }; |
1137 | 0 | } |
1138 | | |
1139 | | static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_select_emit_grad(const ccv_nnc_micro_io_t super, const int var_count) |
1140 | 0 | { |
1141 | 0 | struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)super; |
1142 | 0 | const int loop_count = self->super.dimensions; |
1143 | 0 | assert(self->x->dimensions == loop_count); |
1144 | 0 | assert(self->index->dimensions == loop_count); |
1145 | 0 | ccv_nnc_micro_loop_t* const reset_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
1146 | 0 | int i; |
1147 | 0 | for (i = 0; i < loop_count; i++) |
1148 | 0 | reset_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i); |
1149 | 0 | const ccv_nnc_micro_loop_statement_t reset_statement = ccv_nnc_micro_loop_assignment( |
1150 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(reset_loops, loop_count)), |
1151 | 0 | ccv_nnc_micro_loop_expression_of_value(0) |
1152 | 0 | ); |
1153 | 0 | reset_loops[loop_count - 1].statement_count = 1; |
1154 | 0 | reset_loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
1155 | 0 | reset_loops[loop_count - 1].statements[0] = reset_statement; |
1156 | 0 | ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count); |
1157 | 0 | for (i = 0; i < loop_count; i++) |
1158 | 0 | loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i); |
1159 | 0 | ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC]; |
1160 | 0 | for (i = 0; i < loop_count; i++) |
1161 | 0 | { |
1162 | 0 | if (i == self->axis) |
1163 | 0 | index[i] = ccv_nnc_micro_index_of_id(ccv_nnc_micro_id_of_tensor(self->index->id)); |
1164 | 0 | else |
1165 | 0 | index[i] = ccv_nnc_micro_index_of_id(loops[i].id); |
1166 | 0 | } |
1167 | | // This is only for x, nothing for index. |
1168 | 0 | const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_compound_assignment_of_tensor( |
1169 | 0 | ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, index), |
1170 | 0 | ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))) |
1171 | 0 | ); |
1172 | 0 | loops[loop_count - 1].statement_count = 1; |
1173 | 0 | loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t)); |
1174 | 0 | loops[loop_count - 1].statements[0] = statement; |
1175 | 0 | ccv_nnc_micro_loop_block_t* const blocks = (ccv_nnc_micro_loop_block_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_block_t) * 2); |
1176 | 0 | blocks[0] = (ccv_nnc_micro_loop_block_t){ |
1177 | 0 | .loop_count = loop_count, |
1178 | 0 | .loops = reset_loops |
1179 | 0 | }; |
1180 | 0 | blocks[1] = (ccv_nnc_micro_loop_block_t){ |
1181 | 0 | .loop_count = loop_count, |
1182 | 0 | .loops = loops |
1183 | 0 | }; |
1184 | 0 | return (ccv_nnc_micro_function_t){ |
1185 | 0 | .block_count = 2, |
1186 | 0 | .blocks = blocks |
1187 | 0 | }; |
1188 | 0 | } |
1189 | | |
1190 | | static ccv_nnc_micro_tensor_t _ccv_nnc_micro_select_return_shape(const ccv_nnc_micro_io_t super) |
1191 | 0 | { |
1192 | 0 | struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)super; |
1193 | 0 | ccv_nnc_micro_tensor_t var = {}; |
1194 | 0 | var.dimensions = self->super.dimensions; |
1195 | 0 | var.input = self->x->id; |
1196 | 0 | var.sibling = -1; |
1197 | 0 | var.shape = (ccv_nnc_micro_loop_index_term_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions); |
1198 | 0 | int i; |
1199 | 0 | for (i = 0; i < self->super.dimensions; i++) |
1200 | 0 | { |
1201 | 0 | if (i != self->axis) |
1202 | 0 | var.shape[i] = ccv_nnc_micro_index_of_axis_size(self->x->id, i); |
1203 | 0 | else |
1204 | 0 | var.shape[i] = ccv_nnc_micro_index_of_value(1); |
1205 | 0 | } |
1206 | 0 | return var; |
1207 | 0 | } |
1208 | | |
1209 | | static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_select_isa = { |
1210 | | .emit = _ccv_nnc_micro_select_emit, |
1211 | | .emit_grad = _ccv_nnc_micro_select_emit_grad, |
1212 | | .return_shape = _ccv_nnc_micro_select_return_shape |
1213 | | }; |
1214 | | |
1215 | | ccv_nnc_micro_io_t ccv_nnc_micro_select(const int axis, const ccv_nnc_micro_io_t x, const ccv_nnc_micro_io_t index) |
1216 | 0 | { |
1217 | 0 | struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_select_s)); |
1218 | 0 | self->super.isa = &ccv_nnc_micro_io_select_isa; |
1219 | 0 | self->super.dimensions = x->dimensions; |
1220 | 0 | self->super.id = 0; |
1221 | 0 | self->super.inputs = &self->x; |
1222 | 0 | self->super.input_size = 2; |
1223 | 0 | self->x = x; |
1224 | 0 | self->index = index; |
1225 | 0 | self->axis = axis; |
1226 | 0 | assert(axis <= CCV_NNC_MAX_DIM_ALLOC); |
1227 | 0 | return (ccv_nnc_micro_io_t)self; |
1228 | 0 | } |