Coverage Report

Created: 2021-04-12 01:11

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_micro_core.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_nnc_micro.h"
6
#include "3rdparty/khash/khash.h"
7
8
// MARK - Level-1 API
9
10
const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_input_isa = {};
11
12
150
#define GRAD(_id) (2 * (var_count) - 1 - (_id))
13
14
ccv_nnc_micro_io_t ccv_nnc_micro_input(const int dimensions)
15
6
{
16
6
  assert(dimensions <= CCV_NNC_MAX_DIM_ALLOC);
17
6
  ccv_nnc_micro_io_t input = cccalloc(1, sizeof(struct ccv_nnc_micro_io_s));
18
6
  input->isa = &ccv_nnc_micro_io_input_isa;
19
6
  input->dimensions = dimensions;
20
6
  input->id = 0;
21
6
  return input;
22
6
}
23
struct ccv_nnc_micro_io_grad_s {
24
  struct ccv_nnc_micro_io_s super;
25
  ccv_nnc_micro_io_t x;
26
};
27
28
static void _ccv_nnc_micro_grad_numbering(const ccv_nnc_micro_io_t super, const int id, const int var_count)
29
9
{
30
9
  struct ccv_nnc_micro_io_grad_s* const self = (struct ccv_nnc_micro_io_grad_s*)super;
31
9
  const int sid = self->x->id;
32
9
  self->super.id = GRAD(sid);
33
9
}
34
35
const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_grad_isa = {
36
  .numbering = _ccv_nnc_micro_grad_numbering
37
};
38
39
ccv_nnc_micro_io_t ccv_nnc_micro_grad(const ccv_nnc_micro_io_t x)
40
9
{
41
9
  struct ccv_nnc_micro_io_grad_s* const grad = cccalloc(1, sizeof(struct ccv_nnc_micro_io_grad_s));
42
9
  grad->super.isa = &ccv_nnc_micro_io_grad_isa;
43
9
  grad->super.dimensions = x->dimensions;
44
9
  grad->super.id = 0;
45
9
  grad->x = x;
46
9
  return (ccv_nnc_micro_io_t)grad;
47
9
}
48
49
// A simple recursive descent parser. Omitted tokenisation step.
50
static int _accept(const char** const pos, int* const remain_size, const char* symbol, int size)
51
536
{
52
536
  if (*remain_size < size)
53
194
    return 0;
54
342
  if (memcmp(*pos, symbol, size) == 0)
55
52
  {
56
52
    *remain_size -= size;
57
52
    *pos += size;
58
52
    return 1;
59
52
  }
60
290
  return 0;
61
290
}
62
63
static int _expect(const char** const pos, int* const remain_size, const char* symbol, int size)
64
8
{
65
8
  if (_accept(pos, remain_size, symbol, size))
66
8
    return 1;
67
0
  assert(0 && "unexpected symbol");
68
0
  return 0;
69
0
}
70
71
static int _constant(const char** const pos, int* const remain_size, int* const id)
72
74
{
73
74
  int size = 0;
74
74
  *id = 0;
75
82
  while (*remain_size - size > 0 && 
pos[0][size] >= '0'74
&&
pos[0][size] <= '9'64
)
76
8
  {
77
8
    *id *= 10;
78
8
    *id += (pos[0][size] - '0');
79
8
    ++size;
80
8
  }
81
74
  *remain_size -= size;
82
74
  *pos += size;
83
74
  return size > 0;
84
74
}
85
86
static int _index(const char** const pos, int* const remain_size, int* const id)
87
66
{
88
66
  if (!(*remain_size > 0 && pos[0][0] == 'i'))
89
42
    return 0;
90
24
  int size = 1;
91
24
  *id = 0;
92
48
  while (*remain_size - size > 0 && 
pos[0][size] >= '0'28
&&
pos[0][size] <= '9'24
)
93
24
  {
94
24
    *id *= 10;
95
24
    *id += (pos[0][size] - '0');
96
24
    ++size;
97
24
  }
98
24
  if (size > 1)
99
24
  {
100
24
    *remain_size -= size;
101
24
    *pos += size;
102
24
    return 1;
103
24
  }
104
0
  return 0;
105
0
}
106
107
static int _dim(const char** const pos, int* const remain_size, int* const id, int* const d, ccv_array_t* const equal_assertions)
108
46
{
109
46
  if (!(*remain_size > 1 && pos[0][0] == 'd'))
110
10
    return 0;
111
36
  if (!(pos[0][1] >= 'A' && pos[0][1] <= 'Z'))
112
0
    return 0;
113
36
  *id = pos[0][1] - 'A';
114
36
  int size = 2;
115
36
  *d = 0;
116
72
  while (*remain_size - size > 0 && 
pos[0][size] >= '0'56
&&
pos[0][size] <= '9'44
)
117
36
  {
118
36
    *d *= 10;
119
36
    *d += (pos[0][size] - '0');
120
36
    ++size;
121
36
  }
122
36
  if (size > 1)
123
36
  {
124
36
    *remain_size -= size;
125
36
    *pos += size;
126
48
    while (_accept(pos, remain_size, " ", 1)) 
{}12
127
36
    if (_accept(pos, remain_size, "[", 1))
128
4
    {
129
4
      while (_accept(pos, remain_size, " ", 1)) 
{}0
130
4
      _expect(pos, remain_size, "=", 1);
131
4
      while (_accept(pos, remain_size, " ", 1)) 
{}0
132
4
      int next_id;
133
4
      int next_d;
134
4
      if (!_dim(pos, remain_size, &next_id, &next_d, equal_assertions))
135
0
        { assert(0 && "unexpected symbol"); }
136
4
      const ccv_nnc_micro_id_equal_assertion_t equal_assertion = {
137
4
        .left = {
138
4
          .type = CCV_NNC_MICRO_AXIS_SIZE_ID,
139
4
          .id = -(*id + 1),
140
4
          .d = *d
141
4
        },
142
4
        .right = {
143
4
          .type = CCV_NNC_MICRO_AXIS_SIZE_ID,
144
4
          .id = -(next_id + 1),
145
4
          .d = next_d
146
4
        }
147
4
      };
148
4
      ccv_array_push(equal_assertions, &equal_assertion);
149
4
      while (_accept(pos, remain_size, " ", 1)) 
{}0
150
4
      _expect(pos, remain_size, "]", 1);
151
4
    }
152
36
    return 1;
153
0
  }
154
0
  return 0;
155
0
}
156
157
static int _var(const char** const pos, int* const remain_size, char** name)
158
10
{
159
10
  if (!(*remain_size > 0 && pos[0][0] == '$'))
160
0
    return 0;
161
10
  int size = 1;
162
30
  while (*remain_size - size > 0 &&
163
30
      
(24
(24
pos[0][size] >= '0'24
&&
pos[0][size] <= '9'20
) ||
164
24
       (pos[0][size] >= 'a' && 
pos[0][size] <= 'z'20
) ||
165
24
       
(4
pos[0][size] >= 'A'4
&&
pos[0][size] <= 'Z'0
) ||
166
24
       
pos[0][size] == '_'4
))
167
20
    ++size;
168
10
  if (size > 1)
169
10
  {
170
10
    *name = ccmalloc(size + 1);
171
10
    memcpy(*name, *pos, size);
172
10
    name[0][size] = 0;
173
10
    *remain_size -= size;
174
10
    *pos += size;
175
10
    return 1;
176
10
  }
177
0
  return 0;
178
0
}
179
180
static CCV_WARN_UNUSED(ccv_nnc_micro_loop_index_term_t) _expression(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions);
181
182
static ccv_nnc_micro_loop_index_term_t _factor(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions)
183
74
{
184
74
  ccv_nnc_micro_loop_index_term_t term;
185
74
  while (_accept(pos, remain_size, " ", 1)) 
{}0
186
74
  int id, d;
187
74
  char* name;
188
74
  if (_constant(pos, remain_size, &id)) {
189
8
    term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL;
190
8
    term.immediate_value = id;
191
66
  } else if (_index(pos, remain_size, &id)) {
192
24
    term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID;
193
24
    term.id.type = CCV_NNC_MICRO_LOOP_ID;
194
24
    term.id.id = id;
195
42
  } else if (_dim(pos, remain_size, &id, &d, equal_assertions)) {
196
32
    term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID;
197
32
    term.id.type = CCV_NNC_MICRO_AXIS_SIZE_ID;
198
32
    term.id.d = d;
199
32
    term.id.id = -(id + 1);
200
32
  } else 
if (10
_var(pos, remain_size, &name)10
) {
201
10
    term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_UNBOUND_SCALAR;
202
10
    term.name = name;
203
10
  } else 
if (0
_accept(pos, remain_size, "(", 1)0
) {
204
0
    term = _expression(pos, remain_size, equal_assertions);
205
0
    _expect(pos, remain_size, ")", 1);
206
0
  } else {
207
0
    assert(0 && "factor: syntax error");
208
0
  }
209
82
  
while (74
_accept(pos, remain_size, " ", 1))
{}8
210
74
  return term;
211
74
}
212
213
static ccv_nnc_micro_loop_index_term_t _term(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions)
214
74
{
215
94
  while (_accept(pos, remain_size, " ", 1)) 
{}20
216
74
  ccv_nnc_micro_loop_index_term_t term = _factor(pos, remain_size, equal_assertions);
217
74
  while (*remain_size > 0 && 
(20
pos[0][0] == '*'20
||
pos[0][0] == '/'20
))
218
0
  {
219
0
    const int op = pos[0][0] == '*' ? CCV_NNC_MICRO_BINARY_OP_MUL : CCV_NNC_MICRO_BINARY_OP_DIV;
220
0
    *remain_size -= 1;
221
0
    *pos += 1;
222
0
    const ccv_nnc_micro_loop_index_term_t left = term;
223
0
    const ccv_nnc_micro_loop_index_term_t right = _factor(pos, remain_size, equal_assertions);
224
0
    term.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY;
225
0
    term.binary = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t));
226
0
    term.binary->op = op;
227
0
    term.binary->left = left;
228
0
    term.binary->right = right;
229
0
  }
230
74
  while (_accept(pos, remain_size, " ", 1)) 
{}0
231
74
  return term;
232
74
}
233
234
static ccv_nnc_micro_loop_index_term_t _expression(const char** const pos, int* const remain_size, ccv_array_t* const equal_assertions)
235
54
{
236
54
  while (_accept(pos, remain_size, " ", 1)) 
{}0
237
54
  int prefix_op = -1;
238
54
  if (*remain_size > 0 && (pos[0][0] == '+' || pos[0][0] == '-'))
239
0
  {
240
0
    prefix_op = pos[0][0] == '+' ? CCV_NNC_MICRO_BINARY_OP_PLUS : CCV_NNC_MICRO_BINARY_OP_MINUS;
241
0
    *remain_size -= 1;
242
0
    *pos += 1;
243
0
  }
244
54
  ccv_nnc_micro_loop_index_term_t node = _term(pos, remain_size, equal_assertions);
245
74
  while (*remain_size > 0 && 
(20
pos[0][0] == '+'20
||
pos[0][0] == '-'8
))
246
20
  {
247
20
    const int op = pos[0][0] == '+' ? 
CCV_NNC_MICRO_BINARY_OP_PLUS12
:
CCV_NNC_MICRO_BINARY_OP_MINUS8
;
248
20
    *remain_size -= 1;
249
20
    *pos += 1;
250
20
    const ccv_nnc_micro_loop_index_term_t left = node;
251
20
    const ccv_nnc_micro_loop_index_term_t right = _term(pos, remain_size, equal_assertions);
252
20
    node.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY;
253
20
    node.binary = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t));
254
20
    node.binary->op = op;
255
20
    node.binary->left = left;
256
20
    node.binary->right = right;
257
20
  }
258
54
  while (_accept(pos, remain_size, " ", 1)) 
{}0
259
54
  if (prefix_op >= 0)
260
0
  {
261
0
    ccv_nnc_micro_loop_index_binary_t* const expr = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t));
262
0
    expr->op = prefix_op;
263
0
    expr->left = node;
264
0
    expr->right.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_NONE;
265
0
    node.type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY;
266
0
    node.binary = expr;
267
0
  }
268
54
  return node;
269
54
}
270
271
static void _no_index(const ccv_nnc_micro_loop_index_term_t term)
272
66
{
273
66
  switch (term.type) {
274
32
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
275
32
      // Can only be axis size id. No loop index.
276
32
      assert(term.id.type == CCV_NNC_MICRO_AXIS_SIZE_ID);
277
32
      break;
278
32
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY:
279
16
      _no_index(term.binary->left);
280
16
      _no_index(term.binary->right);
281
16
      break;
282
66
  }
283
66
}
284
285
static void _sid_to_axis_size_term(ccv_nnc_micro_loop_index_term_t* const term, const int* const sids, const int sid_count)
286
94
{
287
94
  switch (term->type) {
288
66
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
289
66
      // Can only be axis size id. No loop index.
290
66
      if (term->id.type == CCV_NNC_MICRO_AXIS_SIZE_ID && 
term->id.id < 032
)
291
32
      {
292
32
        const int id = -(term->id.id + 1);
293
32
        assert(id >= 0 && id < sid_count);
294
32
        term->id.id = sids[id];
295
32
      }
296
66
      break;
297
66
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY:
298
20
      _sid_to_axis_size_term(&term->binary->left, sids, sid_count);
299
20
      _sid_to_axis_size_term(&term->binary->right, sids, sid_count);
300
20
      break;
301
94
  }
302
94
}
303
304
struct ccv_nnc_micro_io_reindex_s {
305
  struct ccv_nnc_micro_io_s super;
306
  int s_count;
307
  ccv_nnc_micro_io_t x;
308
  ccv_nnc_micro_loop_index_term_t* shape;
309
  ccv_nnc_micro_loop_index_term_t* reindex;
310
  ccv_nnc_micro_io_t* ss;
311
  ccv_array_t* equal_assertions;
312
};
313
314
static void _ccv_nnc_micro_reindex_numbering(const ccv_nnc_micro_io_t super, const int id, const int var_count)
315
6
{
316
6
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super;
317
6
  self->super.id = id;
318
6
  // No need to update axis size.
319
6
  if (self->s_count == 0)
320
0
    return;
321
6
  int sids[self->s_count];
322
6
  int i;
323
16
  for (i = 0; i < self->s_count; 
i++10
)
324
10
    sids[i] = self->ss[i]->id;
325
40
  for (i = 0; i < self->super.dimensions; 
i++34
)
326
34
    _sid_to_axis_size_term(&self->shape[i], sids, self->s_count);
327
26
  for (i = 0; i < self->x->dimensions; 
i++20
)
328
20
    _sid_to_axis_size_term(&self->reindex[i], sids, self->s_count);
329
10
  for (i = 0; i < self->equal_assertions->rnum; 
i++4
)
330
4
  {
331
4
    ccv_nnc_micro_id_equal_assertion_t* const equal_assertion = (ccv_nnc_micro_id_equal_assertion_t*)ccv_array_get(self->equal_assertions, i);
332
4
    if (equal_assertion->left.type == CCV_NNC_MICRO_AXIS_SIZE_ID && equal_assertion->left.id < 0)
333
4
    {
334
4
        const int id = -(equal_assertion->left.id + 1);
335
4
        assert(id >= 0 && id < self->s_count);
336
4
        equal_assertion->left.id = sids[id];
337
4
    }
338
4
    if (equal_assertion->right.type == CCV_NNC_MICRO_AXIS_SIZE_ID && equal_assertion->right.id < 0)
339
4
    {
340
4
        const int id = -(equal_assertion->right.id + 1);
341
4
        assert(id >= 0 && id < self->s_count);
342
4
        equal_assertion->right.id = sids[id];
343
4
    }
344
4
  }
345
6
}
346
347
static void _ccv_nnc_micro_reindex_equal_assertions(const ccv_nnc_micro_io_t super, ccv_array_t* const equal_assertions)
348
6
{
349
6
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super;
350
6
  int i;
351
10
  for (i = 0; i < self->equal_assertions->rnum; 
i++4
)
352
4
  {
353
4
    ccv_nnc_micro_id_equal_assertion_t* const equal_assertion = (ccv_nnc_micro_id_equal_assertion_t*)ccv_array_get(self->equal_assertions, i);
354
4
    ccv_array_push(equal_assertions, equal_assertion);
355
4
  }
356
6
}
357
358
static void _ccv_nnc_bind_scalars_in_term(ccv_nnc_micro_loop_index_term_t* const term, ccv_nnc_micro_scalar_lookup_f lookup, const void* const context)
359
94
{
360
94
  switch (term->type)
361
94
  {
362
20
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY:
363
20
      _ccv_nnc_bind_scalars_in_term(&term->binary->left, lookup, context);
364
20
      _ccv_nnc_bind_scalars_in_term(&term->binary->right, lookup, context);
365
20
      break;
366
10
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_UNBOUND_SCALAR: {
367
10
      char* const name = term->name;
368
10
      term->id.id = lookup(context, name);
369
10
      ccfree(name);
370
10
      term->id.d = 0;
371
10
      term->id.type = CCV_NNC_MICRO_SCALAR_ID;
372
10
      term->type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID;
373
10
      break;
374
0
    }
375
94
  }
376
94
}
377
378
static void _ccv_nnc_micro_reindex_bind_scalars(const ccv_nnc_micro_io_t super, ccv_nnc_micro_scalar_lookup_f lookup, const void* const context)
379
6
{
380
6
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super;
381
6
  int i;
382
40
  for (i = 0; i < self->super.dimensions; 
i++34
)
383
34
    _ccv_nnc_bind_scalars_in_term(&self->shape[i], lookup, context);
384
26
  for (i = 0; i < self->x->dimensions; 
i++20
)
385
20
    _ccv_nnc_bind_scalars_in_term(&self->reindex[i], lookup, context);
386
6
}
387
388
ccv_nnc_micro_loop_index_term_t ccv_nnc_micro_loop_index_deep_copy(const ccv_nnc_micro_loop_index_term_t* const term)
389
197
{
390
197
  switch (term->type)
391
197
  {
392
36
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: {
393
36
      ccv_nnc_micro_loop_index_term_t copy = *term;
394
36
      copy.binary = (ccv_nnc_micro_loop_index_binary_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t));
395
36
      *copy.binary = *term->binary;
396
36
      copy.binary->left = ccv_nnc_micro_loop_index_deep_copy(&term->binary->left);
397
36
      copy.binary->right = ccv_nnc_micro_loop_index_deep_copy(&term->binary->right);
398
36
      return copy;
399
0
    }
400
161
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_NONE:
401
161
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
402
161
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL:
403
161
    case CCV_NNC_MICRO_LOOP_INDEX_TYPE_UNBOUND_SCALAR:
404
161
      return *term;
405
0
  }
406
0
  return *term;
407
0
}
408
409
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reindex_emit(const ccv_nnc_micro_io_t super)
410
12
{
411
12
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super;
412
12
  const int loop_count = self->super.dimensions;
413
12
  assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC);
414
12
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
415
12
  int i;
416
80
  for (i = 0; i < loop_count; 
i++68
)
417
68
    loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i);
418
12
  const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment(
419
12
    ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)),
420
12
    ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, self->x->dimensions, self->reindex))
421
12
  );
422
52
  for (i = 0; i < self->x->dimensions; 
i++40
)
423
40
    self->reindex[i] = ccv_nnc_micro_loop_index_deep_copy(&self->reindex[i]);
424
12
  loops[loop_count - 1].statement_count = 1;
425
12
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
426
12
  loops[loop_count - 1].statements[0] = statement;
427
12
  return (ccv_nnc_micro_function_t){
428
12
    .block_count = 1,
429
12
    .one_block = {
430
12
      .loop_count = loop_count,
431
12
      .loops = loops
432
12
    }
433
12
  };
434
12
}
435
436
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reindex_emit_grad(const ccv_nnc_micro_io_t super, const int var_count)
437
6
{
438
6
  // The grad is var_count + original id.
439
6
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super;
440
6
  const int reset_loop_count = self->x->dimensions;
441
6
  ccv_nnc_micro_loop_t* const reset_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * reset_loop_count);
442
6
  // This loop reset grad to 0.
443
6
  int i;
444
26
  for (i = 0; i < reset_loop_count; 
i++20
)
445
20
    reset_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i);
446
6
  const ccv_nnc_micro_loop_statement_t reset_statement = ccv_nnc_micro_loop_assignment(
447
6
    ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), reset_loop_count, ccv_nnc_micro_index_of_loops(reset_loops, reset_loop_count)),
448
6
    ccv_nnc_micro_loop_expression_of_value(0)
449
6
  );
450
6
  reset_loops[reset_loop_count - 1].statement_count = 1;
451
6
  reset_loops[reset_loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
452
6
  reset_loops[reset_loop_count - 1].statements[0] = reset_statement;
453
6
  const int loop_count = self->super.dimensions;
454
6
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
455
40
  for (i = 0; i < loop_count; 
i++34
)
456
34
    loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i);
457
6
  const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_compound_assignment_of_tensor(
458
6
    ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), self->x->dimensions, self->reindex),
459
6
    ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)))
460
6
  );
461
26
  for (i = 0; i < self->x->dimensions; 
i++20
)
462
20
    self->reindex[i] = ccv_nnc_micro_loop_index_deep_copy(&self->reindex[i]);
463
6
  loops[loop_count - 1].statement_count = 1;
464
6
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
465
6
  loops[loop_count - 1].statements[0] = statement;
466
6
  ccv_nnc_micro_loop_block_t* const blocks = (ccv_nnc_micro_loop_block_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_block_t) * 2);
467
6
  blocks[0] = (ccv_nnc_micro_loop_block_t){
468
6
    .loop_count = reset_loop_count,
469
6
    .loops = reset_loops
470
6
  };
471
6
  blocks[1] = (ccv_nnc_micro_loop_block_t){
472
6
    .loop_count = loop_count,
473
6
    .loops = loops
474
6
  };
475
6
  return (ccv_nnc_micro_function_t){
476
6
    .block_count = 2,
477
6
    .blocks = blocks
478
6
  };
479
6
}
480
481
static ccv_nnc_micro_tensor_t _ccv_nnc_micro_reindex_return_shape(const ccv_nnc_micro_io_t super)
482
6
{
483
6
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super;
484
6
  ccv_nnc_micro_tensor_t var = {};
485
6
  var.dimensions = self->super.dimensions;
486
6
  var.sibling = -1;
487
6
  var.input = self->x->id;
488
6
  var.shape = (ccv_nnc_micro_loop_index_term_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions);
489
6
  memcpy(var.shape, self->shape, sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions);
490
6
  return var;
491
6
}
492
493
static void _ccv_nnc_micro_reindex_deinit(const ccv_nnc_micro_io_t super)
494
6
{
495
6
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)super;
496
6
  int i;
497
26
  for (i = 0; i < self->x->dimensions; 
i++20
)
498
20
    ccv_nnc_micro_loop_index_free(&self->reindex[i]);
499
6
  ccv_array_free(self->equal_assertions);
500
6
}
501
502
static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_reindex_isa = {
503
  .numbering = _ccv_nnc_micro_reindex_numbering,
504
  .equal_assertions = _ccv_nnc_micro_reindex_equal_assertions,
505
  .bind_scalars = _ccv_nnc_micro_reindex_bind_scalars,
506
  .emit = _ccv_nnc_micro_reindex_emit,
507
  .emit_grad = _ccv_nnc_micro_reindex_emit_grad,
508
  .return_shape = _ccv_nnc_micro_reindex_return_shape,
509
  .deinit = _ccv_nnc_micro_reindex_deinit
510
};
511
512
ccv_nnc_micro_io_t ccv_nnc_micro_reindex(const char* const* const shape, const int shape_count, const ccv_nnc_micro_io_t* const ss, const int s_count, const char* const* const reindex, const int reindex_count, const ccv_nnc_micro_io_t x)
513
6
{
514
6
  assert(shape_count <= CCV_NNC_MAX_DIM_ALLOC);
515
6
  assert(reindex_count <= CCV_NNC_MAX_DIM_ALLOC);
516
6
  assert(reindex_count == x->dimensions);
517
6
  int i;
518
6
  struct ccv_nnc_micro_io_reindex_s* const self = (struct ccv_nnc_micro_io_reindex_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_reindex_s) + sizeof(ccv_nnc_micro_loop_index_term_t) * (shape_count + reindex_count) + sizeof(ccv_nnc_micro_io_t) * (s_count + 1));
519
6
  self->super.isa = &ccv_nnc_micro_io_reindex_isa;
520
6
  self->super.dimensions = shape_count;
521
6
  self->super.id = 0;
522
6
  self->x = x;
523
6
  self->shape = (ccv_nnc_micro_loop_index_term_t*)(self + 1);
524
6
  self->reindex = self->shape + shape_count;
525
6
  self->ss = (ccv_nnc_micro_io_t*)(self->reindex + reindex_count);
526
6
  self->s_count = s_count;
527
6
  self->ss[s_count] = x;
528
6
  self->super.inputs = self->ss;
529
6
  self->super.input_size = s_count + 1;
530
6
  if (s_count > 0)
531
6
    memcpy(self->ss, ss, sizeof(ccv_nnc_micro_io_t) * s_count);
532
6
  ccv_array_t* const equal_assertions = self->equal_assertions = ccv_array_new(sizeof(ccv_nnc_micro_id_equal_assertion_t), 0, 0);
533
6
  // Parse shape into expressions and validate the grammar. Do this upfront so we don't fail on parsing
534
6
  // later, which can be confusing.
535
6
  // CFG:
536
6
  // VAR -> $[a-zA-Z0-9]+
537
6
  // DIM -> d[A-Z]{1}[0-9]+
538
6
  // INDEX -> i[0-9]+
539
6
  // CONST -> [0-9]+
540
6
  // FACTOR -> VAR | DIM | CONST | INDEX
541
6
  // TERM -> FACTOR { ("*" | "/") FACTOR }
542
6
  // EXPRESSION -> ["+" | "-"] TERM { ("+" | "-") TERM }
543
6
  // Also, we choose to reuse the index expression structure even some information (such as id of tensors
544
6
  // and the binding variables) not available. In this way, there is no need to reallocate index expression
545
6
  // later, we just need to simply "patch" it in ccv_nnc_micro_combine_t.
546
40
  for (i = 0; i < shape_count; 
i++34
)
547
34
  {
548
34
    int remain_size = strlen(shape[i]);
549
34
    const char* pos = shape[i];
550
34
    ccv_nnc_micro_loop_index_term_t term = _expression(&pos, &remain_size, equal_assertions);
551
34
    _no_index(term); // Make sure this is not index, no loop index.
552
34
    self->shape[i] = term;
553
34
  }
554
6
  // Parse reindex.
555
26
  for (i = 0; i < reindex_count; 
i++20
)
556
20
  {
557
20
    int remain_size = strlen(reindex[i]);
558
20
    const char* pos = reindex[i];
559
20
    self->reindex[i] = _expression(&pos, &remain_size, equal_assertions);
560
20
  }
561
6
  return (ccv_nnc_micro_io_t)self;
562
6
}
563
564
struct ccv_nnc_micro_io_unary_s {
565
  struct ccv_nnc_micro_io_s super;
566
  uint32_t unary_op;
567
  ccv_nnc_micro_io_t x;
568
};
569
570
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_unary_emit(const ccv_nnc_micro_io_t super)
571
0
{
572
0
  struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)super;
573
0
  const int loop_count = self->super.dimensions;
574
0
  assert(self->x->dimensions == loop_count);
575
0
  assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC);
576
0
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
577
0
  int i;
578
0
  for (i = 0; i < loop_count; i++)
579
0
    loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i);
580
0
  const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment(
581
0
    ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)),
582
0
    ccv_nnc_micro_loop_expression_of_unary(
583
0
      self->unary_op,
584
0
      ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)))
585
0
    )
586
0
  );
587
0
  loops[loop_count - 1].statement_count = 1;
588
0
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
589
0
  loops[loop_count - 1].statements[0] = statement;
590
0
  return (ccv_nnc_micro_function_t){
591
0
    .block_count = 1,
592
0
    .one_block = {
593
0
      .loop_count = loop_count,
594
0
      .loops = loops
595
0
    }
596
0
  };
597
0
}
598
599
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_unary_emit_grad(const ccv_nnc_micro_io_t super, const int var_count)
600
0
{
601
0
  struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)super;
602
0
  const int loop_count = self->super.dimensions;
603
0
  assert(self->x->dimensions == loop_count);
604
0
  assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC);
605
0
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
606
0
  int i;
607
0
  for (i = 0; i < loop_count; i++)
608
0
    loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i);
609
0
  ccv_nnc_micro_loop_statement_t statement;
610
0
  switch (self->unary_op)
611
0
  {
612
0
    case CCV_NNC_MICRO_UNARY_OP_NEG:
613
0
      statement = ccv_nnc_micro_loop_assignment(
614
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)),
615
0
        ccv_nnc_micro_loop_expression_of_unary(
616
0
          CCV_NNC_MICRO_UNARY_OP_NEG,
617
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)))
618
0
        )
619
0
      );
620
0
      break;
621
0
    case CCV_NNC_MICRO_UNARY_OP_EXP:
622
0
      statement = ccv_nnc_micro_loop_assignment(
623
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)),
624
0
        ccv_nnc_micro_loop_expression_of_binary(
625
0
          CCV_NNC_MICRO_BINARY_OP_MUL,
626
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))),
627
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)))
628
0
        )
629
0
      );
630
0
      break;
631
0
    case CCV_NNC_MICRO_UNARY_OP_LOG:
632
0
      statement = ccv_nnc_micro_loop_assignment(
633
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)),
634
0
        ccv_nnc_micro_loop_expression_of_binary(
635
0
          CCV_NNC_MICRO_BINARY_OP_DIV,
636
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))),
637
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)))
638
0
        )
639
0
      );
640
0
      break;
641
0
  }
642
0
  loops[loop_count - 1].statement_count = 1;
643
0
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
644
0
  loops[loop_count - 1].statements[0] = statement;
645
0
  return (ccv_nnc_micro_function_t){
646
0
    .block_count = 1,
647
0
    .one_block = {
648
0
      .loop_count = loop_count,
649
0
      .loops = loops
650
0
    }
651
0
  };
652
0
}
653
654
static ccv_nnc_micro_tensor_t _ccv_nnc_micro_unary_return_shape(const ccv_nnc_micro_io_t super)
655
0
{
656
0
  struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)super;
657
0
  ccv_nnc_micro_tensor_t var = {};
658
0
  var.dimensions = self->super.dimensions;
659
0
  var.input = self->x->id;
660
0
  var.sibling = -1;
661
0
  return var;
662
0
}
663
664
static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_unary_isa = {
665
  .emit = _ccv_nnc_micro_unary_emit,
666
  .emit_grad = _ccv_nnc_micro_unary_emit_grad,
667
  .return_shape = _ccv_nnc_micro_unary_return_shape
668
};
669
670
ccv_nnc_micro_io_t ccv_nnc_micro_unary(const uint32_t op, const ccv_nnc_micro_io_t x)
671
0
{
672
0
  struct ccv_nnc_micro_io_unary_s* const self = (struct ccv_nnc_micro_io_unary_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_unary_s));
673
0
  self->super.isa = &ccv_nnc_micro_io_unary_isa;
674
0
  self->super.dimensions = x->dimensions;
675
0
  self->super.id = 0;
676
0
  self->super.inputs = &self->x;
677
0
  self->super.input_size = 1;
678
0
  self->unary_op = op;
679
0
  self->x = x;
680
0
  return (ccv_nnc_micro_io_t)self;
681
0
}
682
683
struct ccv_nnc_micro_io_binary_s {
684
  struct ccv_nnc_micro_io_s super;
685
  uint32_t binary_op;
686
  ccv_nnc_micro_io_t left;
687
  ccv_nnc_micro_io_t right;
688
};
689
690
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_binary_emit(const ccv_nnc_micro_io_t super)
691
6
{
692
6
  struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)super;
693
6
  const int loop_count = self->super.dimensions;
694
6
  assert(self->left->dimensions == loop_count);
695
6
  assert(self->right->dimensions == loop_count);
696
6
  assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC);
697
6
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
698
6
  int i;
699
40
  for (i = 0; i < loop_count; 
i++34
)
700
34
    loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i);
701
6
  const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment(
702
6
    ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)),
703
6
    ccv_nnc_micro_loop_expression_of_binary(
704
6
      self->binary_op,
705
6
      ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count))),
706
6
      ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)))
707
6
    )
708
6
  );
709
6
  loops[loop_count - 1].statement_count = 1;
710
6
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
711
6
  loops[loop_count - 1].statements[0] = statement;
712
6
  return (ccv_nnc_micro_function_t){
713
6
    .block_count = 1,
714
6
    .one_block = {
715
6
      .loop_count = loop_count,
716
6
      .loops = loops
717
6
    }
718
6
  };
719
6
}
720
721
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_binary_emit_grad(const ccv_nnc_micro_io_t super, const int var_count)
722
3
{
723
3
  struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)super;
724
3
  const int loop_count = self->super.dimensions;
725
3
  assert(self->left->dimensions == loop_count);
726
3
  assert(self->right->dimensions == loop_count);
727
3
  assert(loop_count <= CCV_NNC_MAX_DIM_ALLOC);
728
3
  int i;
729
3
  ccv_nnc_micro_loop_t* const left_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
730
20
  for (i = 0; i < loop_count; 
i++17
)
731
17
    left_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i);
732
3
  ccv_nnc_micro_loop_t* const right_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
733
20
  for (i = 0; i < loop_count; 
i++17
)
734
17
    right_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->super.id), i), i);
735
3
  ccv_nnc_micro_loop_statement_t left_statement;
736
3
  ccv_nnc_micro_loop_statement_t right_statement;
737
3
  switch (self->binary_op)
738
3
  {
739
0
    case CCV_NNC_MICRO_BINARY_OP_DIV:
740
0
      left_statement = ccv_nnc_micro_loop_assignment(
741
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)),
742
0
        ccv_nnc_micro_loop_expression_of_binary(
743
0
          CCV_NNC_MICRO_BINARY_OP_DIV,
744
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))),
745
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
746
0
        )
747
0
      );
748
0
      right_statement = ccv_nnc_micro_loop_assignment(
749
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)),
750
0
        ccv_nnc_micro_loop_expression_of_binary(
751
0
          CCV_NNC_MICRO_BINARY_OP_MUL,
752
0
          ccv_nnc_micro_loop_expression_of_binary(
753
0
            CCV_NNC_MICRO_BINARY_OP_DIV,
754
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))),
755
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
756
0
          ),
757
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
758
0
        )
759
0
      );
760
0
      break;
761
3
    case CCV_NNC_MICRO_BINARY_OP_MUL:
762
3
      left_statement = ccv_nnc_micro_loop_assignment(
763
3
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)),
764
3
        ccv_nnc_micro_loop_expression_of_binary(
765
3
          CCV_NNC_MICRO_BINARY_OP_MUL,
766
3
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))),
767
3
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
768
3
        )
769
3
      );
770
3
      right_statement = ccv_nnc_micro_loop_assignment(
771
3
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)),
772
3
        ccv_nnc_micro_loop_expression_of_binary(
773
3
          CCV_NNC_MICRO_BINARY_OP_MUL,
774
3
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))),
775
3
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
776
3
        )
777
3
      );
778
3
      break;
779
0
    case CCV_NNC_MICRO_BINARY_OP_PLUS:
780
0
      left_statement = ccv_nnc_micro_loop_assignment(
781
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)),
782
0
        ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
783
0
      );
784
0
      right_statement = ccv_nnc_micro_loop_assignment(
785
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)),
786
0
        ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
787
0
      );
788
0
      break;
789
0
    case CCV_NNC_MICRO_BINARY_OP_MINUS:
790
0
      left_statement = ccv_nnc_micro_loop_assignment(
791
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)),
792
0
        ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
793
0
      );
794
0
      right_statement = ccv_nnc_micro_loop_assignment(
795
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)),
796
0
        ccv_nnc_micro_loop_expression_of_unary(
797
0
          CCV_NNC_MICRO_UNARY_OP_NEG,
798
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
799
0
        )
800
0
      );
801
0
      break;
802
0
    case CCV_NNC_MICRO_BINARY_OP_MIN:
803
0
      left_statement = ccv_nnc_micro_loop_assignment(
804
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)),
805
0
        ccv_nnc_micro_loop_expression_of_ternary(
806
0
          ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN,
807
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))),
808
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
809
0
          ),
810
0
          ccv_nnc_micro_loop_expression_of_value(0),
811
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
812
0
        )
813
0
      );
814
0
      right_statement = ccv_nnc_micro_loop_assignment(
815
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)),
816
0
        ccv_nnc_micro_loop_expression_of_ternary(
817
0
          ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN,
818
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))),
819
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
820
0
          ),
821
0
          ccv_nnc_micro_loop_expression_of_value(0),
822
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
823
0
        )
824
0
      );
825
0
      break;
826
0
    case CCV_NNC_MICRO_BINARY_OP_MAX:
827
0
      left_statement = ccv_nnc_micro_loop_assignment(
828
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->left->id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)),
829
0
        ccv_nnc_micro_loop_expression_of_ternary(
830
0
          ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN,
831
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count))),
832
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
833
0
          ),
834
0
          ccv_nnc_micro_loop_expression_of_value(0),
835
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(left_loops, loop_count)))
836
0
        )
837
0
      );
838
0
      right_statement = ccv_nnc_micro_loop_assignment(
839
0
        ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->right->id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)),
840
0
        ccv_nnc_micro_loop_expression_of_ternary(
841
0
          ccv_nnc_micro_loop_expression_of_binary(CCV_NNC_MICRO_BINARY_OP_LESS_THAN,
842
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->right->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count))),
843
0
            ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->left->id, loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
844
0
          ),
845
0
          ccv_nnc_micro_loop_expression_of_value(0),
846
0
          ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(right_loops, loop_count)))
847
0
        )
848
0
      );
849
0
      break;
850
3
  }
851
3
  left_loops[loop_count - 1].statement_count = 1;
852
3
  left_loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
853
3
  left_loops[loop_count - 1].statements[0] = left_statement;
854
3
  right_loops[loop_count - 1].statement_count = 1;
855
3
  right_loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
856
3
  right_loops[loop_count - 1].statements[0] = right_statement;
857
3
  ccv_nnc_micro_loop_block_t* const blocks = (ccv_nnc_micro_loop_block_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_block_t) * 2);
858
3
  blocks[0] = (ccv_nnc_micro_loop_block_t){
859
3
    .loop_count = loop_count,
860
3
    .loops = left_loops
861
3
  };
862
3
  blocks[1] = (ccv_nnc_micro_loop_block_t){
863
3
    .loop_count = loop_count,
864
3
    .loops = right_loops
865
3
  };
866
3
  return (ccv_nnc_micro_function_t){
867
3
    .block_count = 2,
868
3
    .blocks = blocks
869
3
  };
870
3
}
871
872
static ccv_nnc_micro_tensor_t _ccv_nnc_micro_binary_return_shape(const ccv_nnc_micro_io_t super)
873
3
{
874
3
  struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)super;
875
3
  ccv_nnc_micro_tensor_t var = {};
876
3
  var.dimensions = self->super.dimensions;
877
3
  var.input = self->left->id;
878
3
  var.sibling = self->right->id;
879
3
  return var;
880
3
}
881
882
static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_binary_isa = {
883
  .emit = _ccv_nnc_micro_binary_emit,
884
  .emit_grad = _ccv_nnc_micro_binary_emit_grad,
885
  .return_shape = _ccv_nnc_micro_binary_return_shape
886
};
887
888
ccv_nnc_micro_io_t ccv_nnc_micro_binary(const uint32_t op, const ccv_nnc_micro_io_t x, const ccv_nnc_micro_io_t y)
889
3
{
890
3
  struct ccv_nnc_micro_io_binary_s* const self = (struct ccv_nnc_micro_io_binary_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_binary_s));
891
3
  self->super.isa = &ccv_nnc_micro_io_binary_isa;
892
3
  self->super.dimensions = x->dimensions;
893
3
  self->super.id = 0;
894
3
  self->super.inputs = &self->left;
895
3
  self->super.input_size = 2;
896
3
  self->binary_op = op;
897
3
  self->left = x;
898
3
  self->right = y;
899
3
  assert(x->dimensions == y->dimensions);
900
3
  return (ccv_nnc_micro_io_t)self;
901
3
}
902
903
struct ccv_nnc_micro_io_reduce_s {
904
  struct ccv_nnc_micro_io_s super;
905
  uint32_t reduce_op;
906
  int axis_count;
907
  ccv_nnc_micro_io_t x;
908
  int axis[1];
909
};
910
911
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reduce_emit(const ccv_nnc_micro_io_t super)
912
6
{
913
6
  struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)super;
914
6
  const int loop_count = self->super.dimensions;
915
6
  assert(self->x->dimensions == loop_count);
916
6
  // If axis_count == loop_count, we need extra loop to reduce.
917
6
  int has_extra_loop = (self->axis_count == loop_count);
918
6
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * (loop_count + has_extra_loop));
919
6
  int i, j, k;
920
6
  int8_t reduce_axis[loop_count];
921
6
  memset(reduce_axis, 0, sizeof(int8_t) * loop_count);
922
20
  for (i = 0; i < self->axis_count; 
i++14
)
923
14
    reduce_axis[self->axis[i]] = 1;
924
6
  j = 0;
925
6
  // If loop_count == reduce_axis_count, we have extra loop for carried variables and blocks.
926
6
  if (has_extra_loop)
927
0
  {
928
0
    loops[0] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_value(1), 0);
929
0
    k = 1;
930
0
  } else
931
6
    k = loop_count - self->axis_count;
932
40
  for (i = 0; i < loop_count; 
i++34
)
933
34
    if (reduce_axis[i])
934
14
    {
935
14
      loops[k] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->x->id, i), i + has_extra_loop);
936
14
      ++k;
937
20
    } else {
938
20
      loops[j] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->x->id, i), i + has_extra_loop);
939
20
      ++j;
940
20
    }
941
6
  const int carried_loop_idx = has_extra_loop ? 
00
: loop_count - self->axis_count - 1;
942
6
  loops[carried_loop_idx].carried_count = 1;
943
6
  loops[carried_loop_idx].carrieds = (ccv_nnc_micro_loop_carried_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_carried_t));
944
6
  loops[carried_loop_idx].carrieds[0] = ccv_nnc_micro_loop_carried(self->reduce_op, 0);
945
6
  j = 0;
946
6
  k = has_extra_loop ? 
10
: loop_count - self->axis_count;
947
6
  // If loop_count == reduce_axis_count, we have extra loop for carrieds and block.
948
6
  ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC];
949
40
  for (i = 0; i < loop_count; 
i++34
)
950
34
    if (reduce_axis[i])
951
14
    {
952
14
      index[i] = ccv_nnc_micro_index_of_id(loops[k].id);
953
14
      ++k;
954
20
    } else {
955
20
      index[i] = ccv_nnc_micro_index_of_id(loops[j].id);
956
20
      ++j;
957
20
    }
958
6
  ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_compound_assignment_of_id(
959
6
    loops[carried_loop_idx].carrieds[0].id,
960
6
    ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, index))
961
6
  );
962
6
  loops[carried_loop_idx + self->axis_count].statement_count = 1;
963
6
  loops[carried_loop_idx + self->axis_count].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
964
6
  loops[carried_loop_idx + self->axis_count].statements[0] = statement;
965
6
  j = 0;
966
40
  for (i = 0; i < loop_count; 
i++34
)
967
34
    if (reduce_axis[i])
968
14
      index[i] = ccv_nnc_micro_index_of_value(0);
969
20
    else {
970
20
      index[i] = ccv_nnc_micro_index_of_id(loops[j].id);
971
20
      ++j;
972
20
    }
973
6
  statement = ccv_nnc_micro_loop_assignment(
974
6
    ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, index),
975
6
    ccv_nnc_micro_loop_expression_of_id(loops[carried_loop_idx].carrieds[0].id)
976
6
  );
977
6
  loops[carried_loop_idx].statement_count = 1;
978
6
  loops[carried_loop_idx].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
979
6
  loops[carried_loop_idx].statements[0] = statement;
980
6
  return (ccv_nnc_micro_function_t){
981
6
    .block_count = 1,
982
6
    .one_block = {
983
6
      .carried_count = 1,
984
6
      .loop_count = loop_count + has_extra_loop,
985
6
      .loops = loops
986
6
    }
987
6
  };
988
6
}
989
990
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_reduce_emit_grad(const ccv_nnc_micro_io_t super, const int var_count)
991
3
{
992
3
  struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)super;
993
3
  assert(self->reduce_op == CCV_NNC_MICRO_REDUCE_OP_SUM); // I haven't figure out how to do mean without add additional opcode.
994
3
  const int loop_count = self->super.dimensions;
995
3
  assert(self->x->dimensions == loop_count);
996
3
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
997
3
  int i, j, k;
998
3
  int8_t reduce_axis[loop_count];
999
3
  memset(reduce_axis, 0, sizeof(int8_t) * loop_count);
1000
10
  for (i = 0; i < self->axis_count; 
i++7
)
1001
7
    reduce_axis[self->axis[i]] = 1;
1002
3
  j = 0;
1003
3
  k = loop_count - self->axis_count;
1004
20
  for (i = 0; i < loop_count; 
i++17
)
1005
17
    if (reduce_axis[i])
1006
7
    {
1007
7
      loops[k] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i);
1008
7
      ++k;
1009
10
    } else {
1010
10
      loops[j] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i);
1011
10
      ++j;
1012
10
    }
1013
3
  j = 0;
1014
3
  k = loop_count - self->axis_count;
1015
3
  // If loop_count == reduce_axis_count, we have extra loop for carrieds and block.
1016
3
  ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC];
1017
20
  for (i = 0; i < loop_count; 
i++17
)
1018
17
    if (reduce_axis[i])
1019
7
    {
1020
7
      index[i] = ccv_nnc_micro_index_of_id(loops[k].id);
1021
7
      ++k;
1022
10
    } else {
1023
10
      index[i] = ccv_nnc_micro_index_of_id(loops[j].id);
1024
10
      ++j;
1025
10
    }
1026
3
  j = 0;
1027
3
  ccv_nnc_micro_loop_index_term_t reduced_index[CCV_NNC_MAX_DIM_ALLOC];
1028
20
  for (i = 0; i < loop_count; 
i++17
)
1029
17
    if (reduce_axis[i])
1030
7
      reduced_index[i] = ccv_nnc_micro_index_of_value(0);
1031
10
    else {
1032
10
      reduced_index[i] = ccv_nnc_micro_index_of_id(loops[j].id);
1033
10
      ++j;
1034
10
    }
1035
3
  ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment(
1036
3
    ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, index),
1037
3
    ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, reduced_index))
1038
3
  );
1039
3
  loops[loop_count - 1].statement_count = 1;
1040
3
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
1041
3
  loops[loop_count - 1].statements[0] = statement;
1042
3
  return (ccv_nnc_micro_function_t){
1043
3
    .block_count = 1,
1044
3
    .one_block = {
1045
3
      .carried_count = 1,
1046
3
      .loop_count = loop_count,
1047
3
      .loops = loops
1048
3
    }
1049
3
  };
1050
3
}
1051
1052
static ccv_nnc_micro_tensor_t _ccv_nnc_micro_reduce_return_shape(const ccv_nnc_micro_io_t super)
1053
3
{
1054
3
  struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)super;
1055
3
  ccv_nnc_micro_tensor_t var = {};
1056
3
  var.dimensions = self->super.dimensions;
1057
3
  var.input = self->x->id;
1058
3
  var.sibling = -1;
1059
3
  var.shape = (ccv_nnc_micro_loop_index_term_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions);
1060
3
  int i;
1061
20
  for (i = 0; i < self->super.dimensions; 
i++17
)
1062
17
    var.shape[i] = ccv_nnc_micro_index_of_axis_size(self->x->id, i);
1063
10
  for (i = 0; i < self->axis_count; 
i++7
)
1064
7
    var.shape[self->axis[i]] = ccv_nnc_micro_index_of_value(1);
1065
3
  return var;
1066
3
}
1067
1068
static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_reduce_isa = {
1069
  .emit = _ccv_nnc_micro_reduce_emit,
1070
  .emit_grad = _ccv_nnc_micro_reduce_emit_grad,
1071
  .return_shape = _ccv_nnc_micro_reduce_return_shape
1072
};
1073
1074
ccv_nnc_micro_io_t ccv_nnc_micro_reduce(const uint8_t op, const int* const axis, const int axis_count, const ccv_nnc_micro_io_t x)
1075
3
{
1076
3
  struct ccv_nnc_micro_io_reduce_s* const self = (struct ccv_nnc_micro_io_reduce_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_reduce_s) + sizeof(int) * (axis_count - 1));
1077
3
  self->super.isa = &ccv_nnc_micro_io_reduce_isa;
1078
3
  self->super.dimensions = x->dimensions;
1079
3
  self->super.id = 0;
1080
3
  self->super.inputs = &self->x;
1081
3
  self->super.input_size = 1;
1082
3
  self->reduce_op = op;
1083
3
  self->x = x;
1084
3
  self->axis_count = axis_count;
1085
3
  assert(axis_count <= x->dimensions);
1086
3
  int i;
1087
10
  for (i = 0; i < axis_count; 
i++7
)
1088
7
  { assert(axis[i] < x->dimensions); }
1089
3
  memcpy(self->axis, axis, sizeof(int) * axis_count);
1090
3
  return (ccv_nnc_micro_io_t)self;
1091
3
}
1092
1093
struct ccv_nnc_micro_io_select_s {
1094
  struct ccv_nnc_micro_io_s super;
1095
  int axis;
1096
  ccv_nnc_micro_io_t x;
1097
  ccv_nnc_micro_io_t index;
1098
};
1099
1100
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_select_emit(const ccv_nnc_micro_io_t super)
1101
0
{
1102
0
  struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)super;
1103
0
  const int loop_count = self->super.dimensions;
1104
0
  assert(self->x->dimensions == loop_count);
1105
0
  assert(self->index->dimensions == loop_count);
1106
0
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
1107
0
  int i;
1108
0
  for (i = 0; i < loop_count; i++)
1109
0
  {
1110
0
    if (i == self->axis)
1111
0
      loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_value(1), i);
1112
0
    else
1113
0
      loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(self->super.id, i), i);
1114
0
  }
1115
0
  ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC];
1116
0
  for (i = 0; i < loop_count; i++)
1117
0
  {
1118
0
    if (i == self->axis)
1119
0
      index[i] = ccv_nnc_micro_index_of_id(ccv_nnc_micro_id_of_tensor(self->index->id));
1120
0
    else
1121
0
      index[i] = ccv_nnc_micro_index_of_id(loops[i].id);
1122
0
  }
1123
0
  const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_assignment(
1124
0
    ccv_nnc_micro_loop_variable_of_tensor(self->super.id, loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)),
1125
0
    ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(self->x->id, loop_count, index))
1126
0
  );
1127
0
  loops[loop_count - 1].statement_count = 1;
1128
0
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
1129
0
  loops[loop_count - 1].statements[0] = statement;
1130
0
  return (ccv_nnc_micro_function_t){
1131
0
    .block_count = 1,
1132
0
    .one_block = {
1133
0
      .loop_count = loop_count,
1134
0
      .loops = loops
1135
0
    }
1136
0
  };
1137
0
}
1138
1139
static CCV_WARN_UNUSED(ccv_nnc_micro_function_t) _ccv_nnc_micro_select_emit_grad(const ccv_nnc_micro_io_t super, const int var_count)
1140
0
{
1141
0
  struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)super;
1142
0
  const int loop_count = self->super.dimensions;
1143
0
  assert(self->x->dimensions == loop_count);
1144
0
  assert(self->index->dimensions == loop_count);
1145
0
  ccv_nnc_micro_loop_t* const reset_loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
1146
0
  int i;
1147
0
  for (i = 0; i < loop_count; i++)
1148
0
    reset_loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i);
1149
0
  const ccv_nnc_micro_loop_statement_t reset_statement = ccv_nnc_micro_loop_assignment(
1150
0
    ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, ccv_nnc_micro_index_of_loops(reset_loops, loop_count)),
1151
0
    ccv_nnc_micro_loop_expression_of_value(0)
1152
0
  );
1153
0
  reset_loops[loop_count - 1].statement_count = 1;
1154
0
  reset_loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
1155
0
  reset_loops[loop_count - 1].statements[0] = reset_statement;
1156
0
  ccv_nnc_micro_loop_t* const loops = (ccv_nnc_micro_loop_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_t) * loop_count);
1157
0
  for (i = 0; i < loop_count; i++)
1158
0
    loops[i] = ccv_nnc_micro_for_in(ccv_nnc_micro_index_of_value(0), ccv_nnc_micro_index_of_axis_size(GRAD(self->x->id), i), i);
1159
0
  ccv_nnc_micro_loop_index_term_t index[CCV_NNC_MAX_DIM_ALLOC];
1160
0
  for (i = 0; i < loop_count; i++)
1161
0
  {
1162
0
    if (i == self->axis)
1163
0
      index[i] = ccv_nnc_micro_index_of_id(ccv_nnc_micro_id_of_tensor(self->index->id));
1164
0
    else
1165
0
      index[i] = ccv_nnc_micro_index_of_id(loops[i].id);
1166
0
  }
1167
0
  // This is only for x, nothing for index.
1168
0
  const ccv_nnc_micro_loop_statement_t statement = ccv_nnc_micro_loop_compound_assignment_of_tensor(
1169
0
    ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->x->id), loop_count, index),
1170
0
    ccv_nnc_micro_loop_expression_of_variable(ccv_nnc_micro_loop_variable_of_tensor(GRAD(self->super.id), loop_count, ccv_nnc_micro_index_of_loops(loops, loop_count)))
1171
0
  );
1172
0
  loops[loop_count - 1].statement_count = 1;
1173
0
  loops[loop_count - 1].statements = (ccv_nnc_micro_loop_statement_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_statement_t));
1174
0
  loops[loop_count - 1].statements[0] = statement;
1175
0
  ccv_nnc_micro_loop_block_t* const blocks = (ccv_nnc_micro_loop_block_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_block_t) * 2);
1176
0
  blocks[0] = (ccv_nnc_micro_loop_block_t){
1177
0
    .loop_count = loop_count,
1178
0
    .loops = reset_loops
1179
0
  };
1180
0
  blocks[1] = (ccv_nnc_micro_loop_block_t){
1181
0
    .loop_count = loop_count,
1182
0
    .loops = loops
1183
0
  };
1184
0
  return (ccv_nnc_micro_function_t){
1185
0
    .block_count = 2,
1186
0
    .blocks = blocks
1187
0
  };
1188
0
}
1189
1190
static ccv_nnc_micro_tensor_t _ccv_nnc_micro_select_return_shape(const ccv_nnc_micro_io_t super)
1191
0
{
1192
0
  struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)super;
1193
0
  ccv_nnc_micro_tensor_t var = {};
1194
0
  var.dimensions = self->super.dimensions;
1195
0
  var.input = self->x->id;
1196
0
  var.sibling = -1;
1197
0
  var.shape = (ccv_nnc_micro_loop_index_term_t*)ccmalloc(sizeof(ccv_nnc_micro_loop_index_term_t) * self->super.dimensions);
1198
0
  int i;
1199
0
  for (i = 0; i < self->super.dimensions; i++)
1200
0
  {
1201
0
    if (i != self->axis)
1202
0
      var.shape[i] = ccv_nnc_micro_index_of_axis_size(self->x->id, i);
1203
0
    else
1204
0
      var.shape[i] = ccv_nnc_micro_index_of_value(1);
1205
0
  }
1206
0
  return var;
1207
0
}
1208
1209
static const ccv_nnc_micro_io_vtab_t ccv_nnc_micro_io_select_isa = {
1210
  .emit = _ccv_nnc_micro_select_emit,
1211
  .emit_grad = _ccv_nnc_micro_select_emit_grad,
1212
  .return_shape = _ccv_nnc_micro_select_return_shape
1213
};
1214
1215
ccv_nnc_micro_io_t ccv_nnc_micro_select(const int axis, const ccv_nnc_micro_io_t x, const ccv_nnc_micro_io_t index)
1216
0
{
1217
0
  struct ccv_nnc_micro_io_select_s* const self = (struct ccv_nnc_micro_io_select_s*)cccalloc(1, sizeof(struct ccv_nnc_micro_io_select_s));
1218
0
  self->super.isa = &ccv_nnc_micro_io_select_isa;
1219
0
  self->super.dimensions = x->dimensions;
1220
0
  self->super.id = 0;
1221
0
  self->super.inputs = &self->x;
1222
0
  self->super.input_size = 2;
1223
0
  self->x = x;
1224
0
  self->index = index;
1225
0
  self->axis = axis;
1226
0
  assert(axis <= CCV_NNC_MAX_DIM_ALLOC);
1227
0
  return (ccv_nnc_micro_io_t)self;
1228
0
}