Bug Summary

File:nnc/ccv_nnc_micro_simplify.c
Warning:line 9, column 1
Array access (via field 'flags') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_micro_simplify.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/18 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -D HAVE_CUDA_SM80 -I /usr/local/include -internal-isystem /usr/local/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-06-10-133529-298122-1 -x c ccv_nnc_micro_simplify.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_micro.h"
6#include "3rdparty/khash/khash.h"
7
8#define MICRO_ID_TO_INT(x)(((x).id << 8) | ((x).d)) (((x).id << 8) | ((x).d))
9KHASH_MAP_INIT_INT(ccv_nnc_axis_id_group, int)typedef struct kh_ccv_nnc_axis_id_group_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; khint32_t *
keys; int *vals; } kh_ccv_nnc_axis_id_group_t; static inline __attribute__
((__unused__)) kh_ccv_nnc_axis_id_group_t *kh_init_ccv_nnc_axis_id_group
(void) { return (kh_ccv_nnc_axis_id_group_t*)calloc(1,sizeof(
kh_ccv_nnc_axis_id_group_t)); } static inline __attribute__ (
(__unused__)) void kh_destroy_ccv_nnc_axis_id_group(kh_ccv_nnc_axis_id_group_t
*h) { if (h) { free((void *)h->keys); free(h->flags); free
((void *)h->vals); free(h); } } static inline __attribute__
((__unused__)) void kh_clear_ccv_nnc_axis_id_group(kh_ccv_nnc_axis_id_group_t
*h) { if (h && h->flags) { memset(h->flags, 0xaa
, ((h->n_buckets) < 16? 1 : (h->n_buckets)>>4)
* sizeof(khint32_t)); h->size = h->n_occupied = 0; } }
static inline __attribute__ ((__unused__)) khint_t kh_get_ccv_nnc_axis_id_group
(const kh_ccv_nnc_axis_id_group_t *h, khint32_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = (khint32_t)(key); i = k & mask; last =
i; while (!((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && (((h->flags[i>>4]>>((i&
0xfU)<<1))&1) || !((h->keys[i]) == (key)))) { i =
(i + (++step)) & mask; if (i == last) return h->n_buckets
; } return ((h->flags[i>>4]>>((i&0xfU)<<
1))&3)? h->n_buckets : i; } else return 0; } static inline
__attribute__ ((__unused__)) int kh_resize_ccv_nnc_axis_id_group
(kh_ccv_nnc_axis_id_group_t *h, khint_t new_n_buckets) { khint32_t
*new_flags = 0; khint_t j = 1; { (--(new_n_buckets), (new_n_buckets
)|=(new_n_buckets)>>1, (new_n_buckets)|=(new_n_buckets)
>>2, (new_n_buckets)|=(new_n_buckets)>>4, (new_n_buckets
)|=(new_n_buckets)>>8, (new_n_buckets)|=(new_n_buckets)
>>16, ++(new_n_buckets)); if (new_n_buckets < 4) new_n_buckets
= 4; if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER
+ 0.5)) j = 0; else { new_flags = (khint32_t*)malloc(((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (!new_flags) return -1; memset(new_flags, 0xaa, ((new_n_buckets
) < 16? 1 : (new_n_buckets)>>4) * sizeof(khint32_t))
; if (h->n_buckets < new_n_buckets) { khint32_t *new_keys
= (khint32_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(khint32_t)); if (!new_keys) { free(new_flags); return -1; } h
->keys = new_keys; if (1) { int *new_vals = (int*)realloc(
(void *)h->vals,new_n_buckets * sizeof(int)); if (!new_vals
) { free(new_flags); return -1; } h->vals = new_vals; } } }
} if (j) { for (j = 0; j != h->n_buckets; ++j) { if (((h->
flags[j>>4]>>((j&0xfU)<<1))&3) == 0
) { khint32_t key = h->keys[j]; int val; khint_t new_mask;
new_mask = new_n_buckets - 1; if (1) val = h->vals[j]; (h
->flags[j>>4]|=1ul<<((j&0xfU)<<1)); while
(1) { khint_t k, i, step = 0; k = (khint32_t)(key); i = k &
new_mask; while (!((new_flags[i>>4]>>((i&0xfU
)<<1))&2)) i = (i + (++step)) & new_mask; (new_flags
[i>>4]&=~(2ul<<((i&0xfU)<<1))); if (
i < h->n_buckets && ((h->flags[i>>4]>>
((i&0xfU)<<1))&3) == 0) { { khint32_t tmp = h->
keys[i]; h->keys[i] = key; key = tmp; } if (1) { int tmp =
h->vals[i]; h->vals[i] = val; val = tmp; } (h->flags
[i>>4]|=1ul<<((i&0xfU)<<1)); } else { h
->keys[i] = key; if (1) h->vals[i] = val; break; } } } }
if (h->n_buckets > new_n_buckets) { h->keys = (khint32_t
*)realloc((void *)h->keys,new_n_buckets * sizeof(khint32_t
)); if (1) h->vals = (int*)realloc((void *)h->vals,new_n_buckets
* sizeof(int)); } free(h->flags); h->flags = new_flags
; h->n_buckets = new_n_buckets; h->n_occupied = h->size
; h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER
+ 0.5); } return 0; } static inline __attribute__ ((__unused__
)) khint_t kh_put_ccv_nnc_axis_id_group(kh_ccv_nnc_axis_id_group_t
*h, khint32_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_nnc_axis_id_group(h, h->n_buckets
- 1) < 0) { *ret = -1; return h->n_buckets; } } else if
(kh_resize_ccv_nnc_axis_id_group(h, h->n_buckets + 1) <
0) { *ret = -1; return h->n_buckets; } } { khint_t k, i, site
, last, mask = h->n_buckets - 1, step = 0; x = site = h->
n_buckets; k = (khint32_t)(key); i = k & mask; if (((h->
flags[i>>4]>>((i&0xfU)<<1))&2)) x =
i; else { last = i; while (!((h->flags[i>>4]>>
((i&0xfU)<<1))&2) && (((h->flags[i>>
4]>>((i&0xfU)<<1))&1) || !((h->keys[i]
) == (key)))) { if (((h->flags[i>>4]>>((i&
0xfU)<<1))&1)) site = i; i = (i + (++step)) & mask
; if (i == last) { x = site; break; } } if (x == h->n_buckets
) { if (((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && site != h->n_buckets) x = site; else
x = i; } } } if (((h->flags[x>>4]>>((x&0xfU
)<<1))&2)) { h->keys[x] = key; (h->flags[x>>
4]&=~(3ul<<((x&0xfU)<<1))); ++h->size;
++h->n_occupied; *ret = 1; } else if (((h->flags[x>>
4]>>((x&0xfU)<<1))&1)) { h->keys[x] = key
; (h->flags[x>>4]&=~(3ul<<((x&0xfU)<<
1))); ++h->size; *ret = 2; } else *ret = 0; return x; } static
inline __attribute__ ((__unused__)) void kh_del_ccv_nnc_axis_id_group
(kh_ccv_nnc_axis_id_group_t *h, khint_t x) { if (x != h->n_buckets
&& !((h->flags[x>>4]>>((x&0xfU)<<
1))&3)) { (h->flags[x>>4]|=1ul<<((x&0xfU
)<<1)); --h->size; } }
13
Null pointer value stored to field 'flags'
28
Taking true branch
29
Taking false branch
30
Calling 'kh_resize_ccv_nnc_axis_id_group'
31
Taking true branch
32
Assuming the condition is true
33
Taking true branch
34
Taking false branch
35
Returning without writing to 'h->flags'
36
Returning from 'kh_resize_ccv_nnc_axis_id_group'
37
Taking false branch
38
Array access (via field 'flags') results in a null pointer dereference
10
11static int _ccv_nnc_same_index_term(const ccv_nnc_micro_loop_index_term_t a_index, const ccv_nnc_micro_loop_index_term_t b_index, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
12{
13 if (a_index.type != b_index.type)
14 return 0;
15 const int type = a_index.type;
16 switch (type)
17 {
18 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL:
19 return a_index.immediate_value == b_index.immediate_value;
20 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
21 if (a_index.id.type != b_index.id.type)
22 return 0;
23 // Check within the axis_id_groups to see if there is a match, if there is no match, we can proceed (to use the group table again to check).
24 if (axis_id_groups && a_index.id.type == CCV_NNC_MICRO_AXIS_SIZE_ID)
25 {
26 ccv_nnc_micro_id_t a_id = a_index.id;
27 while (groups && groups[a_id.id] != a_id.id)
28 a_id.id = groups[a_id.id];
29 int a_root = MICRO_ID_TO_INT(a_id)(((a_id).id << 8) | ((a_id).d));
30 khiter_t k;
31 for (;;) {
32 k = kh_get(ccv_nnc_axis_id_group, axis_id_groups, a_root)kh_get_ccv_nnc_axis_id_group(axis_id_groups, a_root);
33 if (k == kh_end(axis_id_groups)((axis_id_groups)->n_buckets))
34 break;
35 a_root = kh_val(axis_id_groups, k)((axis_id_groups)->vals[k]);
36 }
37 ccv_nnc_micro_id_t b_id = b_index.id;
38 while (groups && groups[b_id.id] != b_id.id)
39 b_id.id = groups[b_id.id];
40 int b_root = MICRO_ID_TO_INT(b_id)(((b_id).id << 8) | ((b_id).d));
41 for (;;) {
42 k = kh_get(ccv_nnc_axis_id_group, axis_id_groups, b_root)kh_get_ccv_nnc_axis_id_group(axis_id_groups, b_root);
43 if (k == kh_end(axis_id_groups)((axis_id_groups)->n_buckets))
44 break;
45 b_root = kh_val(axis_id_groups, k)((axis_id_groups)->vals[k]);
46 }
47 if (a_root == b_root)
48 return 1;
49 }
50 if (groups && (a_index.id.type == CCV_NNC_MICRO_AXIS_SIZE_ID || a_index.id.type == CCV_NNC_MICRO_TENSOR_ID))
51 {
52 if (a_index.id.d != b_index.id.d)
53 return 0;
54 switch (a_index.id.type)
55 {
56 case CCV_NNC_MICRO_TENSOR_ID:
57 case CCV_NNC_MICRO_AXIS_SIZE_ID: {
58 // Find their group identifier and then compare.
59 int a_root = groups[a_index.id.id];
60 while (groups[a_root] != a_root)
61 a_root = groups[a_root];
62 int b_root = groups[b_index.id.id];
63 while (groups[b_root] != b_root)
64 b_root = groups[b_root];
65 return a_root == b_root;
66 }
67 }
68 return a_index.id.id == b_index.id.id;
69 } else
70 return (a_index.id.d == b_index.id.d && a_index.id.id == b_index.id.id);
71 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: {
72 return a_index.binary->op == b_index.binary->op && _ccv_nnc_same_index_term(a_index.binary->left, b_index.binary->left, groups, axis_id_groups) && _ccv_nnc_same_index_term(a_index.binary->right, b_index.binary->right, groups, axis_id_groups);
73 }
74 }
75 return 0;
76}
77
78static int _ccv_nnc_same_shape(const ccv_nnc_micro_loop_index_term_t* const a_shape, const ccv_nnc_micro_loop_index_term_t* const b_shape, const int dimensions)
79{
80 int i;
81 for (i = 0; i < dimensions; i++)
82 if (!_ccv_nnc_same_index_term(a_shape[i], b_shape[i], 0, 0))
83 return 0;
84 return 1;
85}
86
87static int _ccv_nnc_same_loop(const ccv_nnc_micro_loop_block_t* const left_block, const ccv_nnc_micro_loop_block_t* const right_block, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups, int* const left_loop_idx, int* const right_loop_idx)
88{
89 assert(left_block->loop_count > 0)((void) sizeof ((left_block->loop_count > 0) ? 1 : 0), __extension__
({ if (left_block->loop_count > 0) ; else __assert_fail
("left_block->loop_count > 0", "ccv_nnc_micro_simplify.c"
, 89, __extension__ __PRETTY_FUNCTION__); }))
;
90 assert(right_block->loop_count > 0)((void) sizeof ((right_block->loop_count > 0) ? 1 : 0),
__extension__ ({ if (right_block->loop_count > 0) ; else
__assert_fail ("right_block->loop_count > 0", "ccv_nnc_micro_simplify.c"
, 90, __extension__ __PRETTY_FUNCTION__); }))
;
91 int i, j;
92 int left_right_link[left_block->loop_count];
93 int right_left_link[right_block->loop_count];
94 enum {
95 ONE = -2,
96 UNASSIGNED = -1,
97 };
98 for (i = 0; i < left_block->loop_count; i++)
99 if (left_block->loops[i].start_index.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL && left_block->loops[i].start_index.immediate_value == 0 &&
100 left_block->loops[i].end_index.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL && left_block->loops[i].end_index.immediate_value == 1)
101 left_right_link[i] = ONE;
102 else
103 left_right_link[i] = UNASSIGNED;
104 for (i = 0; i < right_block->loop_count; i++)
105 if (right_block->loops[i].start_index.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL && right_block->loops[i].start_index.immediate_value == 0 &&
106 right_block->loops[i].end_index.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL && right_block->loops[i].end_index.immediate_value == 1)
107 right_left_link[i] = ONE;
108 else
109 right_left_link[i] = UNASSIGNED;
110 for (i = 0; i < left_block->loop_count; i++) // Find corresponding loop on the right.
111 {
112 if (left_right_link[i] != UNASSIGNED)
113 continue;
114 int flag = UNASSIGNED;
115 for (j = 0; j < right_block->loop_count && flag == UNASSIGNED; j++)
116 {
117 if (right_left_link[j] != UNASSIGNED)
118 continue;
119 if (_ccv_nnc_same_index_term(left_block->loops[i].start_index, right_block->loops[j].start_index, groups, axis_id_groups) &&
120 _ccv_nnc_same_index_term(left_block->loops[i].end_index, right_block->loops[j].end_index, groups, axis_id_groups))
121 flag = j;
122 }
123 if (flag != UNASSIGNED)
124 {
125 left_right_link[i] = flag;
126 right_left_link[flag] = i;
127 }
128 }
129 // If still have unmatched, they don't share the same loop.
130 for (i = 0; i < left_block->loop_count; i++)
131 if (left_right_link[i] == UNASSIGNED)
132 return 0;
133 for (i = 0; i < right_block->loop_count; i++)
134 if (right_left_link[i] == UNASSIGNED)
135 return 0;
136 // I don't want to deal with constant loop, hence, if other than the outer-most is a constant loop (0..<1),
137 // we cannot merge.
138 for (i = 1; i < left_block->loop_count; i++)
139 if (left_right_link[i] == ONE)
140 return 0;
141 for (i = 1; i < right_block->loop_count; i++)
142 if (right_left_link[i] == ONE)
143 return 0;
144 assert((left_block->loop_count == right_block->loop_count) ||((void) sizeof (((left_block->loop_count == right_block->
loop_count) || (left_block->loop_count == right_block->
loop_count + 1) || (left_block->loop_count + 1 == right_block
->loop_count)) ? 1 : 0), __extension__ ({ if ((left_block->
loop_count == right_block->loop_count) || (left_block->
loop_count == right_block->loop_count + 1) || (left_block->
loop_count + 1 == right_block->loop_count)) ; else __assert_fail
("(left_block->loop_count == right_block->loop_count) || (left_block->loop_count == right_block->loop_count + 1) || (left_block->loop_count + 1 == right_block->loop_count)"
, "ccv_nnc_micro_simplify.c", 146, __extension__ __PRETTY_FUNCTION__
); }))
145 (left_block->loop_count == right_block->loop_count + 1) ||((void) sizeof (((left_block->loop_count == right_block->
loop_count) || (left_block->loop_count == right_block->
loop_count + 1) || (left_block->loop_count + 1 == right_block
->loop_count)) ? 1 : 0), __extension__ ({ if ((left_block->
loop_count == right_block->loop_count) || (left_block->
loop_count == right_block->loop_count + 1) || (left_block->
loop_count + 1 == right_block->loop_count)) ; else __assert_fail
("(left_block->loop_count == right_block->loop_count) || (left_block->loop_count == right_block->loop_count + 1) || (left_block->loop_count + 1 == right_block->loop_count)"
, "ccv_nnc_micro_simplify.c", 146, __extension__ __PRETTY_FUNCTION__
); }))
146 (left_block->loop_count + 1 == right_block->loop_count))((void) sizeof (((left_block->loop_count == right_block->
loop_count) || (left_block->loop_count == right_block->
loop_count + 1) || (left_block->loop_count + 1 == right_block
->loop_count)) ? 1 : 0), __extension__ ({ if ((left_block->
loop_count == right_block->loop_count) || (left_block->
loop_count == right_block->loop_count + 1) || (left_block->
loop_count + 1 == right_block->loop_count)) ; else __assert_fail
("(left_block->loop_count == right_block->loop_count) || (left_block->loop_count == right_block->loop_count + 1) || (left_block->loop_count + 1 == right_block->loop_count)"
, "ccv_nnc_micro_simplify.c", 146, __extension__ __PRETTY_FUNCTION__
); }))
;
147 // The loop matches, but the ordering probably doesn't. We reorder loop based on statements.
148 // Hence, two loops can only merge if using the statements as a pivot point and they can still
149 // match things before / after the statement.
150 // If both have statements, check if order preserving within the statement loop (we can be fancier
151 // and recursively call this while using statement as pivoting point, but that is too much to my taste).
152 const int left_start_idx = left_right_link[0] == ONE ? 1 : 0;
153 const int right_start_idx = right_left_link[0] == ONE ? 1 : 0;
154 for (i = 0; i < left_block->loop_count; i++)
155 left_loop_idx[i] = UNASSIGNED;
156 for (i = 0; i < right_block->loop_count; i++)
157 right_loop_idx[i] = UNASSIGNED;
158 if (left_start_idx == 1)
159 left_loop_idx[0] = 0; // Assign their index.
160 if (right_start_idx == 0)
161 right_loop_idx[0] = 0; // Assign their index.
162 const int end_idx = left_right_link[0] == ONE && right_left_link[0] == ONE ? left_block->loop_count - 1 : ccv_min(left_block->loop_count, right_block->loop_count)({ typeof (left_block->loop_count) _a = (left_block->loop_count
); typeof (right_block->loop_count) _b = (right_block->
loop_count); (_a < _b) ? _a : _b; })
;
163 int pivot_idx = end_idx;
164 int k;
165 for (i = end_idx - 1; i >= 0; i--)
166 {
167 if (left_block->loops[i + left_start_idx].statement_count > 0)
168 {
169 for (j = i + 1, k = i + 1; j < end_idx; j++)
170 if (left_loop_idx[j + left_start_idx] == UNASSIGNED)
171 {
172 left_loop_idx[j + left_start_idx] = k + left_start_idx;
173 // If the right one can be referenced pass previous pivot_idx, it is not right.
174 if (left_right_link[j + left_start_idx] >= pivot_idx + right_start_idx)
175 return 0;
176 right_loop_idx[left_right_link[j + left_start_idx]] = k + right_start_idx;
177 ++k;
178 if (k > pivot_idx)
179 return 0;
180 }
181 assert(k == pivot_idx)((void) sizeof ((k == pivot_idx) ? 1 : 0), __extension__ ({ if
(k == pivot_idx) ; else __assert_fail ("k == pivot_idx", "ccv_nnc_micro_simplify.c"
, 181, __extension__ __PRETTY_FUNCTION__); }))
;
182 pivot_idx = i + 1;
183 }
184 if (right_block->loops[i + right_start_idx].statement_count > 0)
185 {
186 for (j = i + 1, k = i + 1; j < end_idx; j++)
187 if (right_loop_idx[j + left_start_idx] == UNASSIGNED)
188 {
189 right_loop_idx[j + right_start_idx] = k + right_start_idx;
190 // If the left one can be referenced pass previous pivot_idx, it is not right.
191 if (right_left_link[j + right_start_idx] >= pivot_idx + left_start_idx)
192 return 0;
193 left_loop_idx[right_left_link[j + right_start_idx]] = k + left_start_idx;
194 ++k;
195 if (k > pivot_idx)
196 return 0;
197 }
198 assert(k == pivot_idx)((void) sizeof ((k == pivot_idx) ? 1 : 0), __extension__ ({ if
(k == pivot_idx) ; else __assert_fail ("k == pivot_idx", "ccv_nnc_micro_simplify.c"
, 198, __extension__ __PRETTY_FUNCTION__); }))
;
199 pivot_idx = i + 1;
200 }
201 }
202 if (end_idx == 0)
203 return 1;
204 // Finally, to distribute the rest.
205 for (j = 0, k = 0; j < end_idx; j++)
206 {
207 if (left_loop_idx[j + left_start_idx] == UNASSIGNED)
208 {
209 left_loop_idx[j + left_start_idx] = k + left_start_idx;
210 // If the right one can be referenced pass previous pivot_idx, it is not right.
211 if (left_right_link[j + left_start_idx] >= pivot_idx + right_start_idx)
212 return 0;
213 right_loop_idx[left_right_link[j + left_start_idx]] = k + right_start_idx;
214 ++k;
215 if (k > pivot_idx)
216 return 0;
217 }
218 }
219 assert(k == pivot_idx)((void) sizeof ((k == pivot_idx) ? 1 : 0), __extension__ ({ if
(k == pivot_idx) ; else __assert_fail ("k == pivot_idx", "ccv_nnc_micro_simplify.c"
, 219, __extension__ __PRETTY_FUNCTION__); }))
;
220 return 1;
221}
222
223static void _ccv_nnc_loop_order_by(ccv_nnc_micro_loop_block_t* const block, int* const loop_idx, ccv_nnc_micro_loop_t* const loops)
224{
225 int i;
226 for (i = 0; i < block->loop_count; i++)
227 if (loop_idx[i] >= 0)
228 loops[loop_idx[i]] = block->loops[i];
229 else
230 loops[i] = block->loops[i];
231 for (i = 0; i < block->loop_count; i++)
232 {
233 // Essentially, we don't need to move statements, loop-carried variables, just the loop id and the start / end index.
234 block->loops[i].id = loops[i].id;
235 block->loops[i].start_index = loops[i].start_index;
236 block->loops[i].end_index = loops[i].end_index;
237 }
238}
239
240static void _ccv_nnc_expression_rename_carrieds(ccv_nnc_micro_loop_expression_t* const expression, const int start_idx)
241{
242 switch (expression->type)
243 {
244 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_ID:
245 assert(expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID)((void) sizeof ((expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ? 1 : 0), __extension__ ({ if (expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ; else __assert_fail ("expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID"
, "ccv_nnc_micro_simplify.c", 245, __extension__ __PRETTY_FUNCTION__
); }))
;
246 expression->id.id += start_idx;
247 break;
248 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_TERNAY:
249 _ccv_nnc_expression_rename_carrieds(expression->ternary.pivot, start_idx);
250 _ccv_nnc_expression_rename_carrieds(expression->ternary.left, start_idx);
251 _ccv_nnc_expression_rename_carrieds(expression->ternary.right, start_idx);
252 break;
253 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_BINARY:
254 _ccv_nnc_expression_rename_carrieds(expression->binary.left, start_idx);
255 _ccv_nnc_expression_rename_carrieds(expression->binary.right, start_idx);
256 break;
257 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_UNARY:
258 _ccv_nnc_expression_rename_carrieds(expression->unary.x, start_idx);
259 break;
260 // We don't need to care about other expressions because loop-carried variable cannot participate these operations.
261 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR:
262 break;
263 }
264}
265
266static void _ccv_nnc_loop_rename_carrieds(ccv_nnc_micro_loop_block_t* const block, const int start_idx)
267{
268 int i, j;
269 const int loop_count = block->loop_count;
270 ccv_nnc_micro_loop_t* const loops = block->loops;
271 for (i = 0; i < loop_count; i++)
272 {
273 for (j = 0; j < loops[i].carried_count; j++)
274 loops[i].carrieds[j].id.id += start_idx;
275 for (j = 0; j < loops[i].statement_count; j++)
276 switch (loops[i].statements[j].type)
277 {
278 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_ASSIGNMENT:
279 _ccv_nnc_expression_rename_carrieds(&loops[i].statements[j].compound_assignment.rvalue, start_idx);
280 break;
281 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT:
282 if (loops[i].statements[j].compound_assignment.lvalue.type == CCV_NNC_MICRO_LOOP_EXPR_TYPE_ID)
283 {
284 assert(loops[i].statements[j].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID)((void) sizeof ((loops[i].statements[j].compound_assignment.lvalue
.id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID) ? 1 : 0), __extension__
({ if (loops[i].statements[j].compound_assignment.lvalue.id.
type == CCV_NNC_MICRO_LOOP_CARRIED_ID) ; else __assert_fail (
"loops[i].statements[j].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID"
, "ccv_nnc_micro_simplify.c", 284, __extension__ __PRETTY_FUNCTION__
); }))
;
285 loops[i].statements[j].compound_assignment.lvalue.id.id += start_idx;
286 }
287 _ccv_nnc_expression_rename_carrieds(&loops[i].statements[j].compound_assignment.rvalue, start_idx);
288 break;
289 }
290 }
291}
292
293static int _ccv_nnc_only_var_in_expression(const int id, const ccv_nnc_micro_loop_variable_t var, const ccv_nnc_micro_loop_expression_t* const expression, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
294{
295 switch (expression->type)
296 {
297 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR:
298 if (expression->variable.id.type == CCV_NNC_MICRO_TENSOR_ID && expression->variable.id.id == id)
299 {
300 if (var.index_count != expression->variable.index_count)
301 return 2;
302 int i;
303 for (i = 0; i < var.index_count; i++)
304 if (!_ccv_nnc_same_index_term(var.index[i], expression->variable.index[i], groups, axis_id_groups))
305 return 2;
306 return 1;
307 } else
308 return 0;
309 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_TERNAY: {
310 const int pivot = _ccv_nnc_only_var_in_expression(id, var, expression->ternary.pivot, groups, axis_id_groups);
311 const int left = _ccv_nnc_only_var_in_expression(id, var, expression->ternary.left, groups, axis_id_groups);
312 const int right = _ccv_nnc_only_var_in_expression(id, var, expression->ternary.right, groups, axis_id_groups);
313 if (pivot == 2 || left == 2 || right == 2)
314 return 2;
315 return (pivot || left || right);
316 }
317 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_BINARY: {
318 const int left = _ccv_nnc_only_var_in_expression(id, var, expression->binary.left, groups, axis_id_groups);
319 const int right = _ccv_nnc_only_var_in_expression(id, var, expression->binary.right, groups, axis_id_groups);
320 if (left == 2 || right == 2)
321 return 2;
322 return (left || right);
323 }
324 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_UNARY:
325 return _ccv_nnc_only_var_in_expression(id, var, expression->unary.x, groups, axis_id_groups);
326 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_ID:
327 assert(expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID)((void) sizeof ((expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ? 1 : 0), __extension__ ({ if (expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ; else __assert_fail ("expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID"
, "ccv_nnc_micro_simplify.c", 327, __extension__ __PRETTY_FUNCTION__
); }))
;
328 return 0;
329 }
330 return 0;
331}
332
333static int _ccv_nnc_only_var_in_rvalue(const int id, const ccv_nnc_micro_loop_variable_t var, const ccv_nnc_micro_loop_statement_t statement, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
334{
335 switch (statement.type)
336 {
337 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_ASSIGNMENT:
338 return _ccv_nnc_only_var_in_expression(id, var, &statement.assignment.rvalue, groups, axis_id_groups);
339 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT:
340 return _ccv_nnc_only_var_in_expression(id, var, &statement.compound_assignment.rvalue, groups, axis_id_groups);
341 }
342 return 0;
343}
344
345static ccv_nnc_micro_loop_expression_t _ccv_nnc_expression_deep_copy(const ccv_nnc_micro_loop_expression_t* const expression)
346{
347 switch (expression->type)
348 {
349 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_TERNAY: {
350 ccv_nnc_micro_loop_expression_t copy = *expression;
351 copy.ternary.pivot = (ccv_nnc_micro_loop_expression_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_expression_t));
352 *copy.ternary.pivot = _ccv_nnc_expression_deep_copy(expression->ternary.pivot);
353 copy.ternary.left = (ccv_nnc_micro_loop_expression_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_expression_t));
354 *copy.ternary.left = _ccv_nnc_expression_deep_copy(expression->ternary.left);
355 copy.ternary.right = (ccv_nnc_micro_loop_expression_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_expression_t));
356 *copy.ternary.right = _ccv_nnc_expression_deep_copy(expression->ternary.right);
357 return copy;
358 }
359 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_BINARY: {
360 ccv_nnc_micro_loop_expression_t copy = *expression;
361 copy.binary.left = (ccv_nnc_micro_loop_expression_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_expression_t));
362 *copy.binary.left = _ccv_nnc_expression_deep_copy(expression->binary.left);
363 copy.binary.right = (ccv_nnc_micro_loop_expression_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_expression_t));
364 *copy.binary.right = _ccv_nnc_expression_deep_copy(expression->binary.right);
365 return copy;
366 }
367 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_UNARY: {
368 ccv_nnc_micro_loop_expression_t copy = *expression;
369 copy.unary.x = (ccv_nnc_micro_loop_expression_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_expression_t));
370 *copy.unary.x = _ccv_nnc_expression_deep_copy(expression->unary.x);
371 return copy;
372 }
373 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR: {
374 ccv_nnc_micro_loop_expression_t copy = *expression;
375 int i;
376 for (i = 0; i < copy.variable.index_count; i++)
377 copy.variable.index[i] = ccv_nnc_micro_loop_index_deep_copy(&copy.variable.index[i]);
378 return copy;
379 }
380 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_ID:
381 return *expression;
382 }
383 return *expression;
384}
385
386static void _ccv_nnc_replacing_id_in_expression(ccv_nnc_micro_loop_expression_t* const expression, const int id, ccv_nnc_micro_loop_expression_t rvalue, int* const count)
387{
388 switch (expression->type)
389 {
390 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR:
391 if (expression->variable.id.type == CCV_NNC_MICRO_TENSOR_ID && expression->variable.id.id == id)
392 {
393 ccv_nnc_micro_loop_variable_free(&expression->variable);
394 if (*count == 0) // First time, just assign to expression.
395 *expression = rvalue;
396 else // Otherwise, need to make deep copy of it.
397 *expression = _ccv_nnc_expression_deep_copy(&rvalue);
398 ++(*count);
399 }
400 break;
401 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_TERNAY:
402 _ccv_nnc_replacing_id_in_expression(expression->ternary.pivot, id, rvalue, count);
403 _ccv_nnc_replacing_id_in_expression(expression->ternary.left, id, rvalue, count);
404 _ccv_nnc_replacing_id_in_expression(expression->ternary.right, id, rvalue, count);
405 break;
406 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_BINARY:
407 _ccv_nnc_replacing_id_in_expression(expression->binary.left, id, rvalue, count);
408 _ccv_nnc_replacing_id_in_expression(expression->binary.right, id, rvalue, count);
409 break;
410 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_UNARY:
411 _ccv_nnc_replacing_id_in_expression(expression->unary.x, id, rvalue, count);
412 break;
413 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_ID:
414 assert(expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID)((void) sizeof ((expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ? 1 : 0), __extension__ ({ if (expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ; else __assert_fail ("expression->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID"
, "ccv_nnc_micro_simplify.c", 414, __extension__ __PRETTY_FUNCTION__
); }))
;
415 break;
416 }
417}
418
419static void _ccv_nnc_replacing_id_in_rvalue(ccv_nnc_micro_loop_statement_t* const statement, const int id, ccv_nnc_micro_loop_expression_t rvalue, int* const count)
420{
421 switch (statement->type)
422 {
423 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_ASSIGNMENT:
424 _ccv_nnc_replacing_id_in_expression(&statement->assignment.rvalue, id, rvalue, count);
425 break;
426 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT:
427 // Not going to be in lvalue (which is the carried variable only).
428 _ccv_nnc_replacing_id_in_expression(&statement->compound_assignment.rvalue, id, rvalue, count);
429 break;
430 }
431}
432
433typedef struct {
434 int flag;
435 int merge_to;
436 ccv_array_t* writes;
437 ccv_array_t* reads;
438} ccv_nnc_micro_loop_block_dependency_t;
439
440typedef struct {
441 int flag;
442 ccv_array_t* writes;
443 ccv_array_t* reads;
444} ccv_nnc_micro_tensor_dependency_t;
445
446static void _ccv_nnc_micro_block_dependencies_from_rvalue(const ccv_nnc_micro_loop_expression_t* const rvalue, const int i, ccv_nnc_micro_loop_block_dependency_t* const block_dependencies, ccv_nnc_micro_tensor_dependency_t* const tensor_dependencies)
447{
448 switch (rvalue->type)
449 {
450 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR:
451 if (rvalue->variable.id.type == CCV_NNC_MICRO_TENSOR_ID)
452 {
453 if (!block_dependencies[i].reads)
454 block_dependencies[i].reads = ccv_array_new(sizeof(int), 1, 0);
455 const int id = rvalue->variable.id.id;
456 ccv_array_add_unique_int(block_dependencies[i].reads, id);
457 if (!tensor_dependencies[id].reads)
458 tensor_dependencies[id].reads = ccv_array_new(sizeof(int), 1, 0);
459 ccv_array_add_unique_int(tensor_dependencies[id].reads, i);
460 }
461 break;
462 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_TERNAY:
463 _ccv_nnc_micro_block_dependencies_from_rvalue(rvalue->ternary.pivot, i, block_dependencies, tensor_dependencies);
464 _ccv_nnc_micro_block_dependencies_from_rvalue(rvalue->ternary.left, i, block_dependencies, tensor_dependencies);
465 _ccv_nnc_micro_block_dependencies_from_rvalue(rvalue->ternary.right, i, block_dependencies, tensor_dependencies);
466 break;
467 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_BINARY:
468 _ccv_nnc_micro_block_dependencies_from_rvalue(rvalue->binary.left, i, block_dependencies, tensor_dependencies);
469 _ccv_nnc_micro_block_dependencies_from_rvalue(rvalue->binary.right, i, block_dependencies, tensor_dependencies);
470 break;
471 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_UNARY:
472 _ccv_nnc_micro_block_dependencies_from_rvalue(rvalue->unary.x, i, block_dependencies, tensor_dependencies);
473 break;
474 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_ID:
475 assert(rvalue->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID)((void) sizeof ((rvalue->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ? 1 : 0), __extension__ ({ if (rvalue->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID
) ; else __assert_fail ("rvalue->id.type == CCV_NNC_MICRO_LOOP_CARRIED_ID"
, "ccv_nnc_micro_simplify.c", 475, __extension__ __PRETTY_FUNCTION__
); }))
;
476 break;
477 }
478}
479
480static void _ccv_nnc_micro_block_dependencies(const ccv_nnc_micro_loop_block_t* const blocks, const int block_size, const int var_count, ccv_nnc_micro_loop_block_dependency_t** const block_dependencies_ref, ccv_nnc_micro_tensor_dependency_t** const tensor_dependencies_ref)
481{
482 ccv_nnc_micro_loop_block_dependency_t* const block_dependencies = (ccv_nnc_micro_loop_block_dependency_t*)cccalloccalloc(block_size, sizeof(ccv_nnc_micro_loop_block_dependency_t));
483 ccv_nnc_micro_tensor_dependency_t* const tensor_dependencies = (ccv_nnc_micro_tensor_dependency_t*)cccalloccalloc(var_count, sizeof(ccv_nnc_micro_tensor_dependency_t));
484 int i, j, k;
485 for (i = 0; i < block_size; i++)
486 {
487 block_dependencies[i].merge_to = i;
488 const ccv_nnc_micro_loop_t* const loops = blocks[i].loops;
489 const int loop_count = blocks[i].loop_count;
490 for (j = 0; j < loop_count; j++)
491 {
492 const ccv_nnc_micro_loop_statement_t* const statements = loops[j].statements;
493 const int statement_count = loops[j].statement_count;
494 for (k = 0; k < statement_count; k++)
495 switch (statements[k].type)
496 {
497 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_ASSIGNMENT: {
498 assert(statements[k].assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID)((void) sizeof ((statements[k].assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID
) ? 1 : 0), __extension__ ({ if (statements[k].assignment.lvalue
.id.type == CCV_NNC_MICRO_TENSOR_ID) ; else __assert_fail ("statements[k].assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID"
, "ccv_nnc_micro_simplify.c", 498, __extension__ __PRETTY_FUNCTION__
); }))
;
499 const int id = statements[k].assignment.lvalue.id.id;
500 if (!block_dependencies[i].writes)
501 block_dependencies[i].writes = ccv_array_new(sizeof(int), 1, 0);
502 ccv_array_add_unique_int(block_dependencies[i].writes, id);
503 if (!tensor_dependencies[id].writes)
504 tensor_dependencies[id].writes = ccv_array_new(sizeof(int), 1, 0);
505 ccv_array_add_unique_int(tensor_dependencies[id].writes, i);
506 _ccv_nnc_micro_block_dependencies_from_rvalue(&statements[k].assignment.rvalue, i, block_dependencies, tensor_dependencies);
507 break;
508 }
509 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT: {
510 if (statements[k].compound_assignment.lvalue.type == CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR)
511 {
512 assert(statements[k].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID)((void) sizeof ((statements[k].compound_assignment.lvalue.id.
type == CCV_NNC_MICRO_TENSOR_ID) ? 1 : 0), __extension__ ({ if
(statements[k].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID
) ; else __assert_fail ("statements[k].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID"
, "ccv_nnc_micro_simplify.c", 512, __extension__ __PRETTY_FUNCTION__
); }))
;
513 const int id = statements[k].compound_assignment.lvalue.id.id;
514 if (!block_dependencies[i].writes)
515 block_dependencies[i].writes = ccv_array_new(sizeof(int), 1, 0);
516 ccv_array_add_unique_int(block_dependencies[i].writes, id);
517 if (!tensor_dependencies[id].writes)
518 tensor_dependencies[id].writes = ccv_array_new(sizeof(int), 1, 0);
519 ccv_array_add_unique_int(tensor_dependencies[id].writes, i);
520 if (!block_dependencies[i].reads)
521 block_dependencies[i].reads = ccv_array_new(sizeof(int), 1, 0);
522 ccv_array_add_unique_int(block_dependencies[i].reads, id);
523 if (!tensor_dependencies[id].reads)
524 tensor_dependencies[id].reads = ccv_array_new(sizeof(int), 1, 0);
525 ccv_array_add_unique_int(tensor_dependencies[id].reads, i);
526 }
527 _ccv_nnc_micro_block_dependencies_from_rvalue(&statements[k].compound_assignment.rvalue, i, block_dependencies, tensor_dependencies);
528 break;
529 }
530 }
531 }
532 }
533 *block_dependencies_ref = block_dependencies;
534 *tensor_dependencies_ref = tensor_dependencies;
535}
536
537static void _ccv_nnc_micro_dependencies_free(ccv_nnc_micro_loop_block_dependency_t* const block_dependencies, const int block_size, ccv_nnc_micro_tensor_dependency_t* const tensor_dependencies, const int var_count)
538{
539 int i;
540 for (i = 0; i < block_size; i++)
541 {
542 if (block_dependencies[i].writes)
543 ccv_array_free(block_dependencies[i].writes);
544 if (block_dependencies[i].reads)
545 ccv_array_free(block_dependencies[i].reads);
546 }
547 ccfreefree(block_dependencies);
548 for (i = 0; i < var_count; i++)
549 {
550 if (tensor_dependencies[i].writes)
551 ccv_array_free(tensor_dependencies[i].writes);
552 if (tensor_dependencies[i].reads)
553 ccv_array_free(tensor_dependencies[i].reads);
554 }
555 ccfreefree(tensor_dependencies);
556}
557
558static int _ccv_nnc_tensor_reads_in_y_from_writes_after_x(const ccv_nnc_micro_loop_block_dependency_t* const block_dependencies, const ccv_nnc_micro_tensor_dependency_t* const tensor_dependencies, const int x, const int y)
559{
560 int i, j;
561 int flag = 0;
562 for (i = 0; !flag && i < block_dependencies[y].reads->rnum; i++)
563 {
564 const int read_idx = *(int*)ccv_array_get(block_dependencies[y].reads, i)((void*)(((char*)((block_dependencies[y].reads)->data)) + (
size_t)(block_dependencies[y].reads)->rsize * (size_t)(i))
)
;
565 if (tensor_dependencies[read_idx].writes)
566 for (j = 0; !flag && j < tensor_dependencies[read_idx].writes->rnum; j++)
567 {
568 int block_idx = *(int*)ccv_array_get(tensor_dependencies[read_idx].writes, j)((void*)(((char*)((tensor_dependencies[read_idx].writes)->
data)) + (size_t)(tensor_dependencies[read_idx].writes)->rsize
* (size_t)(j)))
;
569 while (block_idx != block_dependencies[block_idx].merge_to)
570 block_idx = block_dependencies[block_idx].merge_to;
571 if (!block_dependencies[block_idx].flag) // Not in use, continue.
572 continue;
573 assert(block_idx <= y)((void) sizeof ((block_idx <= y) ? 1 : 0), __extension__ (
{ if (block_idx <= y) ; else __assert_fail ("block_idx <= y"
, "ccv_nnc_micro_simplify.c", 573, __extension__ __PRETTY_FUNCTION__
); }))
;
574 // If the block_idx is between i and j (and not neither). We cannot merge.
575 if (block_idx > x && block_idx != y)
576 flag = block_idx;
577 }
578 }
579 return flag;
580}
581
582static int _ccv_nnc_tensor_writes_in_x_reads_before_y(const ccv_nnc_micro_loop_block_dependency_t* const block_dependencies, const ccv_nnc_micro_tensor_dependency_t* const tensor_dependencies, const int x, const int y)
583{
584 int i, j;
585 int flag = 0;
586 for (i = 0; !flag && i < block_dependencies[x].writes->rnum; i++)
587 {
588 const int write_idx = *(int*)ccv_array_get(block_dependencies[x].writes, i)((void*)(((char*)((block_dependencies[x].writes)->data)) +
(size_t)(block_dependencies[x].writes)->rsize * (size_t)(
i)))
;
589 if (tensor_dependencies[write_idx].reads)
590 for (j = 0; !flag && j < tensor_dependencies[write_idx].reads->rnum; j++)
591 {
592 int block_idx = *(int*)ccv_array_get(tensor_dependencies[write_idx].reads, j)((void*)(((char*)((tensor_dependencies[write_idx].reads)->
data)) + (size_t)(tensor_dependencies[write_idx].reads)->rsize
* (size_t)(j)))
;
593 while (block_idx != block_dependencies[block_idx].merge_to)
594 block_idx = block_dependencies[block_idx].merge_to;
595 if (!block_dependencies[block_idx].flag) // Not in use, continue.
596 continue;
597 assert(block_idx >= x)((void) sizeof ((block_idx >= x) ? 1 : 0), __extension__ (
{ if (block_idx >= x) ; else __assert_fail ("block_idx >= x"
, "ccv_nnc_micro_simplify.c", 597, __extension__ __PRETTY_FUNCTION__
); }))
;
598 // If the block_idx is between i and j (and not neither). We cannot merge.
599 if (block_idx < y && block_idx != x)
600 flag = block_idx;
601 }
602 }
603 return flag;
604}
605
606static void _ccv_nnc_tensor_remove_dead_store(const ccv_nnc_micro_tensor_dependency_t* const tensor_dependency, const int tensor_idx, ccv_array_t* const blocks)
607{
608 int i, j, k, l;;
609 if (tensor_dependency->writes)
610 for (i = 0; i < tensor_dependency->writes->rnum; i++)
611 {
612 const int write_idx = *(int*)ccv_array_get(tensor_dependency->writes, i)((void*)(((char*)((tensor_dependency->writes)->data)) +
(size_t)(tensor_dependency->writes)->rsize * (size_t)(
i)))
;
613 ccv_nnc_micro_loop_block_t* const block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, write_idx)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(write_idx)))
;
614 int flag = 0;
615 for (j = 0; j < block->loop_count; j++)
616 {
617 ccv_nnc_micro_loop_statement_t* const statements = block->loops[j].statements;
618 for (k = 0, l = 0; k < block->loops[j].statement_count; k++)
619 {
620 // It cannot be compound assignment, in this case, this tensor will be in read, and
621 // it will be in active use (anything "read" in an active block will be marked as in use).
622 assert(!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT &&((void) sizeof ((!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT
&& statements[k].compound_assignment.lvalue.id.type ==
CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment
.lvalue.id.id == tensor_idx)) ? 1 : 0), __extension__ ({ if (
!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT
&& statements[k].compound_assignment.lvalue.id.type ==
CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment
.lvalue.id.id == tensor_idx)) ; else __assert_fail ("!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT && statements[k].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment.lvalue.id.id == tensor_idx)"
, "ccv_nnc_micro_simplify.c", 624, __extension__ __PRETTY_FUNCTION__
); }))
623 statements[k].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID &&((void) sizeof ((!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT
&& statements[k].compound_assignment.lvalue.id.type ==
CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment
.lvalue.id.id == tensor_idx)) ? 1 : 0), __extension__ ({ if (
!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT
&& statements[k].compound_assignment.lvalue.id.type ==
CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment
.lvalue.id.id == tensor_idx)) ; else __assert_fail ("!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT && statements[k].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment.lvalue.id.id == tensor_idx)"
, "ccv_nnc_micro_simplify.c", 624, __extension__ __PRETTY_FUNCTION__
); }))
624 statements[k].compound_assignment.lvalue.id.id == tensor_idx))((void) sizeof ((!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT
&& statements[k].compound_assignment.lvalue.id.type ==
CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment
.lvalue.id.id == tensor_idx)) ? 1 : 0), __extension__ ({ if (
!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT
&& statements[k].compound_assignment.lvalue.id.type ==
CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment
.lvalue.id.id == tensor_idx)) ; else __assert_fail ("!(statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT && statements[k].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID && statements[k].compound_assignment.lvalue.id.id == tensor_idx)"
, "ccv_nnc_micro_simplify.c", 624, __extension__ __PRETTY_FUNCTION__
); }))
;
625 if (statements[k].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_ASSIGNMENT &&
626 statements[k].assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID &&
627 statements[k].assignment.lvalue.id.id == tensor_idx)
628 {
629 // This is a dead store, prepare to remove.
630 ccv_nnc_micro_loop_statement_free(&statements[k]);
631 } else {
632 statements[l] = statements[k];
633 ++l;
634 }
635 }
636 if (l < block->loops[j].statement_count)
637 {
638 if (l > 0)
639 block->loops[j].statements = (ccv_nnc_micro_loop_statement_t*)ccreallocrealloc(block->loops[j].statements, sizeof(ccv_nnc_micro_loop_statement_t) * l);
640 else {
641 ccfreefree(block->loops[j].statements);
642 block->loops[j].statements = 0;
643 }
644 block->loops[j].statement_count = 0;
645 }
646 if (block->loops[j].statement_count > 0)
647 flag = 1;
648 }
649 if (!flag) // No statement for this block, remove this whole block.
650 {
651 ccv_nnc_micro_loops_free(block->loops, block->loop_count);
652 ccfreefree(block->loops);
653 block->loops = 0;
654 block->loop_count = 0;
655 }
656 }
657}
658
659static void _ccv_nnc_loop_merging(ccv_nnc_micro_loop_block_dependency_t* const block_dependencies, const ccv_nnc_micro_tensor_dependency_t* const tensor_dependencies, ccv_array_t* const blocks, const int max_loop_count, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
660{
661 int i, j;
662 int left_loop_idx[max_loop_count];
663 int right_loop_idx[max_loop_count];
664 ccv_nnc_micro_loop_t loops[max_loop_count];
665 // Merge loops from blocks.
666 for (i = 0; i < blocks->rnum - 1; i++)
667 {
668 ccv_nnc_micro_loop_block_t* const left_block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, i)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(i)))
;
669 if (left_block->loop_count == 0)
670 continue;
671 for (j = i + 1; j < blocks->rnum; j++)
672 {
673 // We always merge from right block to left block. Thus, the right block will always be
674 // in the original form.
675 ccv_nnc_micro_loop_block_t* const right_block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, j)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(j)))
;
676 if (right_block->loop_count == 0)
677 continue;
678 int merge_to_right = 0;
679 // First check whether between left and right, there are any blocks that the right block
680 // depends on. If that is the case, we cannot merge the right block into the left block.
681 if (j > i + 1 && block_dependencies[j].reads)
682 {
683 const int block_idx = _ccv_nnc_tensor_reads_in_y_from_writes_after_x(block_dependencies, tensor_dependencies, i, j);
684 // Cannot merge because we have dependencies in between. Merging will violate that
685 // dependency relationship.
686 if (block_idx)
687 {
688 // Now check to see if left can be merged into right? If so, we are lucky.
689 if (_ccv_nnc_tensor_writes_in_x_reads_before_y(block_dependencies, tensor_dependencies, i, j))
690 continue;
691 merge_to_right = 1;
692 }
693 }
694 // This method not only compares whether they have the same loop or not, but also gives indexes that
695 // to match the loop start / end index, where they should move to. For example, if:
696 // left_loop_idx[2] = 3
697 // right_loop_idx[0] = 3
698 // That means right now, loop at index 2 on the left is the same as loop at index 0 on the right.
699 // And to match exactly, they both need to move to index 3.
700 if (_ccv_nnc_same_loop(left_block, right_block, groups, axis_id_groups, left_loop_idx, right_loop_idx))
701 {
702 // Make sure if we have extra loop, they are on the left.
703 if (right_block->loop_count > left_block->loop_count)
704 {
705 ccv_nnc_micro_loop_block_t t;
706 CCV_SWAP(*left_block, *right_block, t)((t) = (*left_block), (*left_block) = (*right_block), (*right_block
) = (t))
;
707 }
708 assert(left_block->loop_count == right_block->loop_count || left_block->loop_count == right_block->loop_count + 1)((void) sizeof ((left_block->loop_count == right_block->
loop_count || left_block->loop_count == right_block->loop_count
+ 1) ? 1 : 0), __extension__ ({ if (left_block->loop_count
== right_block->loop_count || left_block->loop_count ==
right_block->loop_count + 1) ; else __assert_fail ("left_block->loop_count == right_block->loop_count || left_block->loop_count == right_block->loop_count + 1"
, "ccv_nnc_micro_simplify.c", 708, __extension__ __PRETTY_FUNCTION__
); }))
;
709 _ccv_nnc_loop_order_by(left_block, left_loop_idx, loops);
710 _ccv_nnc_loop_order_by(right_block, right_loop_idx, loops);
711 const int left_start_idx = left_block->loop_count - right_block->loop_count;
712 if (left_block->carried_count > 0)
713 _ccv_nnc_loop_rename_carrieds(right_block, left_block->carried_count);
714 left_block->carried_count += right_block->carried_count;
715 int k;
716 for (k = 0; k < right_block->loop_count; k++) // Merge loops.
717 {
718 const int left_idx = left_start_idx + k;
719 if (right_block->loops[k].carried_count > 0)
720 {
721 if (left_block->loops[left_idx].carried_count > 0)
722 {
723 left_block->loops[left_idx].carrieds = (ccv_nnc_micro_loop_carried_t*)ccreallocrealloc(left_block->loops[left_idx].carrieds, sizeof(ccv_nnc_micro_loop_carried_t) * (left_block->loops[left_idx].carried_count + right_block->loops[k].carried_count));
724 memcpy(left_block->loops[left_idx].carrieds + left_block->loops[left_idx].carried_count, right_block->loops[k].carrieds, sizeof(ccv_nnc_micro_loop_carried_t) * right_block->loops[k].carried_count);
725 ccfreefree(right_block->loops[k].carrieds);
726 } else
727 left_block->loops[left_idx].carrieds = right_block->loops[k].carrieds;
728 left_block->loops[left_idx].carried_count += right_block->loops[k].carried_count;
729 right_block->loops[k].carrieds = 0;
730 right_block->loops[k].carried_count = 0;
731 }
732 if (right_block->loops[k].statement_count > 0)
733 {
734 if (left_block->loops[left_idx].statement_count > 0)
735 {
736 left_block->loops[left_idx].statements = (ccv_nnc_micro_loop_statement_t*)ccreallocrealloc(left_block->loops[left_idx].statements, sizeof(ccv_nnc_micro_loop_statement_t) * (left_block->loops[left_idx].statement_count + right_block->loops[k].statement_count));
737 memcpy(left_block->loops[left_idx].statements + left_block->loops[left_idx].statement_count, right_block->loops[k].statements, sizeof(ccv_nnc_micro_loop_statement_t) * right_block->loops[k].statement_count);
738 ccfreefree(right_block->loops[k].statements);
739 } else
740 left_block->loops[left_idx].statements = right_block->loops[k].statements;
741 left_block->loops[left_idx].statement_count += right_block->loops[k].statement_count;
742 right_block->loops[k].statements = 0;
743 right_block->loops[k].statement_count = 0;
744 }
745 }
746 // Once merged, free the loop.
747 ccfreefree(right_block->loops);
748 right_block->loops = 0;
749 right_block->loop_count = 0;
750 int x = i, y = j;
751 if (merge_to_right) // If this is merge to right.
752 {
753 ccv_nnc_micro_loop_block_t t;
754 CCV_SWAP(*left_block, *right_block, t)((t) = (*left_block), (*left_block) = (*right_block), (*right_block
) = (t))
;
755 x = j, y = i;
756 }
757 // Merge all reads and writes tensors into block dependency.
758 if (block_dependencies[y].writes && block_dependencies[y].writes->rnum)
759 {
760 if (!block_dependencies[x].writes)
761 block_dependencies[x].writes = ccv_array_new(sizeof(int), 1, 0);
762 for (k = 0; k < block_dependencies[y].writes->rnum; k++)
763 ccv_array_push(block_dependencies[x].writes, ccv_array_get(block_dependencies[y].writes, k)((void*)(((char*)((block_dependencies[y].writes)->data)) +
(size_t)(block_dependencies[y].writes)->rsize * (size_t)(
k)))
);
764 }
765 if (block_dependencies[y].reads && block_dependencies[y].reads->rnum)
766 {
767 if (!block_dependencies[x].reads)
768 block_dependencies[x].reads = ccv_array_new(sizeof(int), 1, 0);
769 for (k = 0; k < block_dependencies[y].reads->rnum; k++)
770 ccv_array_push(block_dependencies[x].reads, ccv_array_get(block_dependencies[y].reads, k)((void*)(((char*)((block_dependencies[y].reads)->data)) + (
size_t)(block_dependencies[y].reads)->rsize * (size_t)(k))
)
);
771 }
772 // Merged, mark the proper merging dependency.
773 block_dependencies[y].merge_to = x;
774 if (merge_to_right) // If this is merge to right, now left is empty, break.
775 break;
776 }
777 }
778 }
779}
780
781static void _ccv_nnc_var_subst(ccv_nnc_micro_tensor_t* const vars, const int var_count, const ccv_nnc_micro_io_t* const inputs, const int input_size, const ccv_nnc_micro_io_t* const outputs, const int output_size, ccv_array_t* const blocks, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
782{
783 int i, j;
784 // These are simple programs, so we are going to loop over all blocks to see whether a non-output-input
785 // var only write / read in one loop. If that is the case, we are going to remove that var.
786 // We have to do this replacement from bottom to top though.
787 for (i = 0; i < var_count; i++)
788 {
789 int flag = 0;
790 for (j = 0; !flag && j < input_size; j++)
791 flag = (inputs[j]->id == i);
792 for (j = 0; !flag && j < output_size; j++)
793 flag = (outputs[j]->id == i);
794 if (flag) // This is in outputs or inputs.
795 continue;
796 int count_var = 0;
797 ccv_nnc_micro_loop_variable_t lvalue;
798 ccv_nnc_micro_loop_expression_t rvalue;
799 int block_idx, loop_idx, statement_idx;
800 for (j = 0; j < blocks->rnum; j++)
801 {
802 const ccv_nnc_micro_loop_block_t* const block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, j)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(j)))
;
803 int k, l;
804 const int loop_count = block->loop_count;
805 const ccv_nnc_micro_loop_t* const loops = block->loops;
806 int var_per_block = 0;
807 for (k = 0; k < loop_count; k++)
808 {
809 int flag = 0;
810 const int statement_count = loops[k].statement_count;
811 ccv_nnc_micro_loop_statement_t* const statements = loops[k].statements;
812 for (l = 0; l < statement_count; l++)
813 if (statements[l].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_ASSIGNMENT &&
814 statements[l].assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID &&
815 statements[l].assignment.lvalue.id.id == i)
816 {
817 lvalue = statements[l].assignment.lvalue;
818 if (_ccv_nnc_only_var_in_rvalue(i, lvalue, statements[l], groups, axis_id_groups))
819 flag = 2;
820 else {
821 // If the variable not showing up on the right-side, we can continue.
822 rvalue = statements[l].assignment.rvalue;
823 block_idx = j;
824 loop_idx = k;
825 statement_idx = l;
826 ++flag;
827 }
828 } else if (statements[l].type == CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT &&
829 statements[l].compound_assignment.lvalue.id.type == CCV_NNC_MICRO_TENSOR_ID &&
830 statements[l].compound_assignment.lvalue.id.id == i) {
831 // This is compound assignment, automatically increase by 2.
832 flag += 2;
833 }
834 if (flag > 1) // We have more than 1 assignment for this id, it is not good. We cannot remove it.
835 {
836 var_per_block += flag;
837 continue;
838 }
839 for (l = 0; l < statement_count; l++)
840 flag = ccv_max(flag, _ccv_nnc_only_var_in_rvalue(i, lvalue, statements[l], groups, axis_id_groups))({ typeof (flag) _a = (flag); typeof (_ccv_nnc_only_var_in_rvalue
(i, lvalue, statements[l], groups, axis_id_groups)) _b = (_ccv_nnc_only_var_in_rvalue
(i, lvalue, statements[l], groups, axis_id_groups)); (_a >
_b) ? _a : _b; })
;
841 // If flag == 2, meaning it found a var with a different index. This is a bad news.
842 var_per_block += flag;
843 }
844 count_var += var_per_block;
845 }
846 // If this is used more than one place (write multiple times, have different index, or used in different blocks),
847 // I cannot get rid of it.
848 if (count_var != 1)
849 continue;
850 // Otherwise, now loop again and prepare to get rid of it.
851 ccv_nnc_micro_loop_block_t* const block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, block_idx)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(block_idx)))
;
852 ccv_nnc_micro_loop_statement_t* statements = block->loops[loop_idx].statements;
853 ccv_nnc_micro_loop_statement_t statement = statements[statement_idx];
854 // First, remove the assignment.
855 if (statement_idx < block->loops[loop_idx].statement_count - 1)
856 memmove(statements + statement_idx, statements + statement_idx + 1, sizeof(ccv_nnc_micro_loop_statement_t) * (block->loops[loop_idx].statement_count - statement_idx - 1));
857 --block->loops[loop_idx].statement_count;
858 const int statement_count = block->loops[loop_idx].statement_count;
859 statements = block->loops[loop_idx].statements = (ccv_nnc_micro_loop_statement_t*)ccreallocrealloc(statements, sizeof(ccv_nnc_micro_loop_statement_t) * statement_count);
860 int k = 0;
861 for (j = 0; j < statement_count; j++)
862 _ccv_nnc_replacing_id_in_rvalue(&statements[j], i, rvalue, &k);
863 if (k == 0) // If nothing to replace, free up everything.
864 ccv_nnc_micro_loop_statement_free(&statement);
865 else
866 ccv_nnc_micro_loop_statement_lvalue_free(&statement);
867 // No need to allocate for this var. It is not used, only useful for shape computation.
868 vars[i].no_alloc = 1;
869 }
870}
871
872static int _ccv_nnc_index_binary_size(const ccv_nnc_micro_loop_index_term_t index)
873{
874 switch (index.type)
875 {
876 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_NONE:
877 return 0;
878 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL:
879 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
880 return 1;
881 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY:
882 if (index.binary->op == CCV_NNC_MICRO_BINARY_OP_PLUS || index.binary->op == CCV_NNC_MICRO_BINARY_OP_MINUS)
883 return _ccv_nnc_index_binary_size(index.binary->left) + _ccv_nnc_index_binary_size(index.binary->right);
884 else
885 return 1;
886 }
887 return 0;
888}
889
890typedef struct {
891 int sign:7;
892 int ignore:1;
893 ccv_nnc_micro_loop_index_term_t term;
894} ccv_nnc_micro_loop_binary_term_t;
895
896static void _ccv_nnc_index_term_flatten(ccv_nnc_micro_loop_binary_term_t* const binary_terms, const ccv_nnc_micro_loop_index_term_t index, const int sign, int* const i)
897{
898 switch (index.type)
899 {
900 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_NONE: // No need to occupy.
901 break;
902 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL:
903 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
904 binary_terms[*i].term = index;
905 binary_terms[*i].sign = sign;
906 binary_terms[*i].ignore = 0;
907 ++(*i);
908 break;
909 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY:
910 if (index.binary->op == CCV_NNC_MICRO_BINARY_OP_PLUS || index.binary->op == CCV_NNC_MICRO_BINARY_OP_MINUS)
911 {
912 _ccv_nnc_index_term_flatten(binary_terms, index.binary->left, sign, i);
913 if (index.binary->op == CCV_NNC_MICRO_BINARY_OP_MINUS) // Switch sign.
914 _ccv_nnc_index_term_flatten(binary_terms, index.binary->right, sign == CCV_NNC_MICRO_BINARY_OP_PLUS ? CCV_NNC_MICRO_BINARY_OP_MINUS : CCV_NNC_MICRO_BINARY_OP_PLUS, i);
915 else
916 _ccv_nnc_index_term_flatten(binary_terms, index.binary->right, sign, i);
917 } else {
918 binary_terms[*i].term = index;
919 binary_terms[*i].sign = sign;
920 binary_terms[*i].ignore = 0;
921 ++(*i);
922 }
923 break;
924 }
925}
926
927// 0 is we don't understand, -1 is false, 1 is true.
928static int _ccv_nnc_index_less_than_or_equal_to(const ccv_nnc_micro_loop_index_term_t left, const ccv_nnc_micro_loop_index_term_t right, const ccv_nnc_micro_tensor_t* const vars, const int var_count, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
929{
930 // Special case 1.
931 if (left.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL && right.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL)
932 return left.immediate_value <= right.immediate_value ? 1 : -1;
933 // Special case 2.
934 if (left.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL && left.immediate_value == 0 && right.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID && right.id.type == CCV_NNC_MICRO_AXIS_SIZE_ID)
935 return 1;
936 // Special case 3.
937 if (left.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID && left.id.type == CCV_NNC_MICRO_AXIS_SIZE_ID && right.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL && right.immediate_value == 0)
938 return -1;
939 // Now, we only have one variable in both left and right, need to flat the binary tree (if possible) and reduce it to constant if possible.
940 // We can only flatten if it is + / - at the moment.
941 const int left_binary_size = _ccv_nnc_index_binary_size(left);
942 assert(left_binary_size >= 1)((void) sizeof ((left_binary_size >= 1) ? 1 : 0), __extension__
({ if (left_binary_size >= 1) ; else __assert_fail ("left_binary_size >= 1"
, "ccv_nnc_micro_simplify.c", 942, __extension__ __PRETTY_FUNCTION__
); }))
;
943 const int right_binary_size = _ccv_nnc_index_binary_size(right);
944 assert(right_binary_size >= 1)((void) sizeof ((right_binary_size >= 1) ? 1 : 0), __extension__
({ if (right_binary_size >= 1) ; else __assert_fail ("right_binary_size >= 1"
, "ccv_nnc_micro_simplify.c", 944, __extension__ __PRETTY_FUNCTION__
); }))
;
945 ccv_nnc_micro_loop_binary_term_t* const left_binary_terms = (ccv_nnc_micro_loop_binary_term_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_binary_term_t) * (left_binary_size + right_binary_size));
946 ccv_nnc_micro_loop_binary_term_t* const right_binary_terms = left_binary_terms + left_binary_size;
947 int i, j;
948 i = 0;
949 _ccv_nnc_index_term_flatten(left_binary_terms, left, CCV_NNC_MICRO_BINARY_OP_PLUS, &i);
950 assert(i == left_binary_size)((void) sizeof ((i == left_binary_size) ? 1 : 0), __extension__
({ if (i == left_binary_size) ; else __assert_fail ("i == left_binary_size"
, "ccv_nnc_micro_simplify.c", 950, __extension__ __PRETTY_FUNCTION__
); }))
;
951 i = 0;
952 _ccv_nnc_index_term_flatten(right_binary_terms, right, CCV_NNC_MICRO_BINARY_OP_PLUS, &i);
953 assert(i == right_binary_size)((void) sizeof ((i == right_binary_size) ? 1 : 0), __extension__
({ if (i == right_binary_size) ; else __assert_fail ("i == right_binary_size"
, "ccv_nnc_micro_simplify.c", 953, __extension__ __PRETTY_FUNCTION__
); }))
;
954 // Matching signs in left terms.
955 for (i = 0; i < left_binary_size - 1; i++)
956 for (j = i + 1; j < left_binary_size; j++)
957 if (!left_binary_terms[i].ignore && !left_binary_terms[j].ignore &&
958 _ccv_nnc_same_index_term(left_binary_terms[i].term, left_binary_terms[j].term, groups, axis_id_groups) &&
959 left_binary_terms[i].sign != left_binary_terms[j].sign)
960 {
961 left_binary_terms[i].ignore = -1;
962 left_binary_terms[j].ignore = -1;
963 }
964 // Matching signs in right terms.
965 for (i = 0; i < right_binary_size - 1; i++)
966 for (j = i + 1; j < right_binary_size; j++)
967 if (!right_binary_terms[i].ignore && !right_binary_terms[j].ignore &&
968 _ccv_nnc_same_index_term(right_binary_terms[i].term, right_binary_terms[j].term, groups, axis_id_groups) &&
969 right_binary_terms[i].sign != right_binary_terms[j].sign)
970 {
971 right_binary_terms[i].ignore = -1;
972 right_binary_terms[j].ignore = -1;
973 }
974 // Matching left to right.
975 for (i = 0; i < left_binary_size; i++)
976 for (j = 0; j < right_binary_size; j++)
977 // If they are the same, we can ignore now.
978 if (!left_binary_terms[i].ignore && !right_binary_terms[j].ignore &&
979 _ccv_nnc_same_index_term(left_binary_terms[i].term, right_binary_terms[j].term, groups, axis_id_groups) &&
980 left_binary_terms[i].sign == right_binary_terms[j].sign)
981 {
982 left_binary_terms[i].ignore = -1;
983 right_binary_terms[j].ignore = -1;
984 }
985 // After reduced, we should only have immediate values left, otherwise we cannot progress.
986 int left_val = 0;
987 for (i = 0; i < left_binary_size; i++)
988 if (!left_binary_terms[i].ignore)
989 {
990 if (left_binary_terms[i].term.type != CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL)
991 {
992 ccfreefree(left_binary_terms);
993 return 0;
994 } else
995 left_val += left_binary_terms[i].sign == CCV_NNC_MICRO_BINARY_OP_PLUS ? left_binary_terms[i].term.immediate_value : -left_binary_terms[i].term.immediate_value;
996 }
997 int right_val = 0;
998 for (i = 0; i < right_binary_size; i++)
999 if (!right_binary_terms[i].ignore)
1000 {
1001 if (right_binary_terms[i].term.type != CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL)
1002 {
1003 ccfreefree(left_binary_terms);
1004 return 0;
1005 } else
1006 right_val += right_binary_terms[i].sign == CCV_NNC_MICRO_BINARY_OP_PLUS ? right_binary_terms[i].term.immediate_value : -right_binary_terms[i].term.immediate_value;
1007 }
1008 ccfreefree(left_binary_terms);
1009 return left_val <= right_val ? 1 : -1;
1010}
1011
1012// If this index term refers to an axis size that actually has a expression, refer to that instead (like for reindex operation).
1013static ccv_nnc_micro_loop_index_term_t _ccv_nnc_micro_index_shape_merging(const ccv_nnc_micro_loop_index_term_t index, const ccv_nnc_micro_tensor_t* const vars, const int var_count, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
1014{
1015 ccv_nnc_micro_loop_index_term_t result = index;
1016 for (;;)
1017 {
1018 if (!(result.type == CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID && result.id.type == CCV_NNC_MICRO_AXIS_SIZE_ID))
1019 return result;
1020 int root = groups[result.id.id];
1021 while (groups[root] != root)
1022 root = groups[root];
1023 if (vars[root].shape == 0)
1024 return result;
1025 assert(result.id.d >= 0 && result.id.d < vars[root].dimensions)((void) sizeof ((result.id.d >= 0 && result.id.d <
vars[root].dimensions) ? 1 : 0), __extension__ ({ if (result
.id.d >= 0 && result.id.d < vars[root].dimensions
) ; else __assert_fail ("result.id.d >= 0 && result.id.d < vars[root].dimensions"
, "ccv_nnc_micro_simplify.c", 1025, __extension__ __PRETTY_FUNCTION__
); }))
;
1026 result = vars[root].shape[result.id.d];
1027 }
1028}
1029
1030static int _ccv_nnc_micro_low_high_bound_from_index(const ccv_nnc_micro_loop_index_term_t index, ccv_nnc_micro_loop_index_term_t* const low_ref, ccv_nnc_micro_loop_index_term_t* const high_ref, const ccv_nnc_micro_loop_t* const loops, const int loop_count, const ccv_nnc_micro_tensor_t* const vars, const int var_count, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
1031{
1032 switch (index.type)
1033 {
1034 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_NONE:
1035 *low_ref = (ccv_nnc_micro_loop_index_term_t){
1036 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL,
1037 .immediate_value = 0
1038 };
1039 *high_ref = (ccv_nnc_micro_loop_index_term_t){
1040 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL,
1041 .immediate_value = 0
1042 };
1043 return 1;
1044 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
1045 if (index.id.type == CCV_NNC_MICRO_LOOP_ID)
1046 {
1047 int loop_idx = -1;
1048 int i;
1049 for (i = 0; loop_idx < 0 && i < loop_count; i++)
1050 if (loops[i].id.id == index.id.id)
1051 loop_idx = i;
1052 assert(loop_idx >= 0)((void) sizeof ((loop_idx >= 0) ? 1 : 0), __extension__ ({
if (loop_idx >= 0) ; else __assert_fail ("loop_idx >= 0"
, "ccv_nnc_micro_simplify.c", 1052, __extension__ __PRETTY_FUNCTION__
); }))
;
1053 const ccv_nnc_micro_loop_index_term_t start_index = _ccv_nnc_micro_index_shape_merging(loops[loop_idx].start_index, vars, var_count, groups, axis_id_groups);
1054 const ccv_nnc_micro_loop_index_term_t end_index = _ccv_nnc_micro_index_shape_merging(loops[loop_idx].end_index, vars, var_count, groups, axis_id_groups);
1055 *low_ref = ccv_nnc_micro_loop_index_deep_copy(&start_index);
1056 *high_ref = ccv_nnc_micro_loop_index_deep_copy(&end_index);
1057 } else {
1058 *low_ref = index;
1059 *high_ref = index;
1060 }
1061 return 1;
1062 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL:
1063 *low_ref = index;
1064 *high_ref = index;
1065 return 1;
1066 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: {
1067 // Get low, high from both left and right, and then construct new low / high.
1068 ccv_nnc_micro_loop_index_term_t left_low, left_high;
1069 if (!_ccv_nnc_micro_low_high_bound_from_index(index.binary->left, &left_low, &left_high, loops, loop_count, vars, var_count, groups, axis_id_groups))
1070 return 0;
1071 ccv_nnc_micro_loop_index_term_t right_low, right_high;
1072 if (!_ccv_nnc_micro_low_high_bound_from_index(index.binary->right, &right_low, &right_high, loops, loop_count, vars, var_count, groups, axis_id_groups))
1073 {
1074 ccv_nnc_micro_loop_index_free(&left_low);
1075 ccv_nnc_micro_loop_index_free(&left_high);
1076 return 0;
1077 }
1078 // If left is not a range, or right is not a range, it is simple, just copy over.
1079 if (_ccv_nnc_same_index_term(left_low, left_high, groups, axis_id_groups) || _ccv_nnc_same_index_term(right_low, right_high, groups, axis_id_groups))
1080 {
1081 *low_ref = (ccv_nnc_micro_loop_index_term_t){
1082 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1083 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1084 };
1085 low_ref->binary->op = index.binary->op;
1086 low_ref->binary->left = left_low;
1087 low_ref->binary->right = right_low;
1088 *high_ref = (ccv_nnc_micro_loop_index_term_t){
1089 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1090 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1091 };
1092 high_ref->binary->op = index.binary->op;
1093 high_ref->binary->left = left_high;
1094 high_ref->binary->right = right_high;
1095 return 1;
1096 }
1097 // Cannot handle -, because lower bound will go to negative, similar for /. Only can handle + and *.
1098 if (!(index.binary->op == CCV_NNC_MICRO_BINARY_OP_PLUS || index.binary->op == CCV_NNC_MICRO_BINARY_OP_MUL) ||
1099 // If lower bound is not a non-negative integer, we cannot compute interesting low / high bound, abort.
1100 (left_low.type != CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL || left_low.immediate_value < 0) ||
1101 (right_low.type != CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL || right_low.immediate_value < 0))
1102 {
1103 ccv_nnc_micro_loop_index_free(&left_low);
1104 ccv_nnc_micro_loop_index_free(&left_high);
1105 ccv_nnc_micro_loop_index_free(&right_low);
1106 ccv_nnc_micro_loop_index_free(&right_high);
1107 return 0;
1108 }
1109 *low_ref = (ccv_nnc_micro_loop_index_term_t){
1110 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL,
1111 .immediate_value = index.binary->op == CCV_NNC_MICRO_BINARY_OP_PLUS ? left_low.immediate_value + right_low.immediate_value : left_low.immediate_value * right_low.immediate_value,
1112 };
1113 // higher bound is not inclusive, hence, we need to minus extra 1 for this.
1114 if (index.binary->op == CCV_NNC_MICRO_BINARY_OP_PLUS)
1115 {
1116 // (left - 1) + (right - 1) + 1
1117 ccv_nnc_micro_loop_index_term_t sum = {
1118 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1119 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1120 };
1121 sum.binary->op = CCV_NNC_MICRO_BINARY_OP_PLUS;
1122 sum.binary->left = left_high;
1123 sum.binary->right = right_high;
1124 *high_ref = (ccv_nnc_micro_loop_index_term_t){
1125 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1126 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1127 };
1128 high_ref->binary->op = CCV_NNC_MICRO_BINARY_OP_MINUS;
1129 high_ref->binary->left = sum;
1130 high_ref->binary->right = (ccv_nnc_micro_loop_index_term_t){
1131 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL,
1132 .immediate_value = 1
1133 };
1134 } else {
1135 // (left - 1) * (right - 1) + 1
1136 ccv_nnc_micro_loop_index_term_t prod = {
1137 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1138 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1139 };
1140 prod.binary->op = CCV_NNC_MICRO_BINARY_OP_MUL;
1141 prod.binary->left = left_high;
1142 prod.binary->right = right_high;
1143 ccv_nnc_micro_loop_index_term_t minus_left = {
1144 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1145 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1146 };
1147 minus_left.binary->op = CCV_NNC_MICRO_BINARY_OP_MINUS;
1148 minus_left.binary->left = prod;
1149 minus_left.binary->right = left_high;
1150 ccv_nnc_micro_loop_index_term_t minus_right = {
1151 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1152 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1153 };
1154 minus_right.binary->op = CCV_NNC_MICRO_BINARY_OP_MINUS;
1155 minus_right.binary->left = minus_left;
1156 minus_right.binary->right = right_high;
1157 *high_ref = (ccv_nnc_micro_loop_index_term_t){
1158 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY,
1159 .binary = (ccv_nnc_micro_loop_index_binary_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_index_binary_t))
1160 };
1161 high_ref->binary->op = CCV_NNC_MICRO_BINARY_OP_PLUS;
1162 high_ref->binary->left = minus_right;
1163 high_ref->binary->right = (ccv_nnc_micro_loop_index_term_t){
1164 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL,
1165 .immediate_value = 2
1166 };
1167 }
1168 return 1;
1169 }
1170 }
1171 return 0;
1172}
1173
1174static void _ccv_nnc_micro_check_bound_for_variable(ccv_nnc_micro_loop_variable_t* const variable, const ccv_nnc_micro_loop_t* const loops, const int loop_count, const ccv_nnc_micro_tensor_t* const vars, const int var_count, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
1175{
1176 if (variable->id.type != CCV_NNC_MICRO_TENSOR_ID)
1177 return;
1178 int i, j;
1179 assert(variable->id.id >= 0 && variable->id.id < var_count)((void) sizeof ((variable->id.id >= 0 && variable
->id.id < var_count) ? 1 : 0), __extension__ ({ if (variable
->id.id >= 0 && variable->id.id < var_count
) ; else __assert_fail ("variable->id.id >= 0 && variable->id.id < var_count"
, "ccv_nnc_micro_simplify.c", 1179, __extension__ __PRETTY_FUNCTION__
); }))
;
1180 ccv_nnc_micro_loop_index_term_t index_zero = {
1181 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL,
1182 .immediate_value = 0
1183 };
1184 for (i = 0; i < variable->index_count; i++)
1185 {
1186 const ccv_nnc_micro_loop_index_term_t shape = _ccv_nnc_micro_index_shape_merging((ccv_nnc_micro_loop_index_term_t){
1187 .type = CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID,
1188 .id = {
1189 .type = CCV_NNC_MICRO_AXIS_SIZE_ID,
1190 .id = variable->id.id,
1191 .d = i
1192 }
1193 }, vars, var_count, groups, axis_id_groups);
1194 switch (variable->index[i].type)
1195 {
1196 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_ID:
1197 // For loop id, we can check the range to see if it is within the shape.
1198 if (variable->index[i].id.type == CCV_NNC_MICRO_LOOP_ID)
1199 {
1200 int loop_idx = -1;
1201 for (j = 0; loop_idx < 0 && j < loop_count; j++)
1202 if (loops[j].id.id == variable->index[i].id.id)
1203 loop_idx = j;
1204 assert(loop_idx >= 0)((void) sizeof ((loop_idx >= 0) ? 1 : 0), __extension__ ({
if (loop_idx >= 0) ; else __assert_fail ("loop_idx >= 0"
, "ccv_nnc_micro_simplify.c", 1204, __extension__ __PRETTY_FUNCTION__
); }))
;
1205 const ccv_nnc_micro_loop_index_term_t start_index = _ccv_nnc_micro_index_shape_merging(loops[loop_idx].start_index, vars, var_count, groups, axis_id_groups);
1206 const ccv_nnc_micro_loop_index_term_t end_index = _ccv_nnc_micro_index_shape_merging(loops[loop_idx].end_index, vars, var_count, groups, axis_id_groups);
1207 if (_ccv_nnc_index_less_than_or_equal_to(index_zero, start_index, vars, var_count, groups, axis_id_groups) == 1 &&
1208 _ccv_nnc_index_less_than_or_equal_to(end_index, shape, vars, var_count, groups, axis_id_groups) == 1)
1209 variable->no_check_bound[i] = 1;
1210 else
1211 variable->no_check_bound[i] = 0;
1212 } else // If it is anything other than loop id, we have to check the bound.
1213 variable->no_check_bound[i] = 0;
1214 break;
1215 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_BINARY: {
1216 // Compute higher / lower bounds along the expression.
1217 ccv_nnc_micro_loop_index_term_t low, high;
1218 // Cannot find high low, mark no_check_bound[i] = 0
1219 if (!_ccv_nnc_micro_low_high_bound_from_index(variable->index[i], &low, &high, loops, loop_count, vars, var_count, groups, axis_id_groups))
1220 {
1221 variable->no_check_bound[i] = 0;
1222 break;
1223 }
1224 if (_ccv_nnc_index_less_than_or_equal_to(index_zero, low, vars, var_count, groups, axis_id_groups) == 1 &&
1225 _ccv_nnc_index_less_than_or_equal_to(high, shape, vars, var_count, groups, axis_id_groups) == 1)
1226 variable->no_check_bound[i] = 1;
1227 else
1228 variable->no_check_bound[i] = 0;
1229 ccv_nnc_micro_loop_index_free(&low);
1230 ccv_nnc_micro_loop_index_free(&high);
1231 break;
1232 }
1233 case CCV_NNC_MICRO_LOOP_INDEX_TYPE_VAL:
1234 // If the index is an integer, and it is bigger than 0, we need to check bound (there is no assertion the end index is larger than anything other than 0).
1235 if (variable->index[i].immediate_value == 0)
1236 variable->no_check_bound[i] = 1;
1237 else
1238 variable->no_check_bound[i] = 0;
1239 break;
1240 }
1241 }
1242}
1243
1244static void _ccv_nnc_micro_check_bound_for_expression(ccv_nnc_micro_loop_expression_t* const expression, const ccv_nnc_micro_loop_t* const loops, const int loop_count, const ccv_nnc_micro_tensor_t* const vars, const int var_count, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
1245{
1246 switch (expression->type)
1247 {
1248 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR:
1249 _ccv_nnc_micro_check_bound_for_variable(&expression->variable, loops, loop_count, vars, var_count, groups, axis_id_groups);
1250 break;
1251 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_TERNAY:
1252 _ccv_nnc_micro_check_bound_for_expression(expression->ternary.pivot, loops, loop_count, vars, var_count, groups, axis_id_groups);
1253 _ccv_nnc_micro_check_bound_for_expression(expression->ternary.left, loops, loop_count, vars, var_count, groups, axis_id_groups);
1254 _ccv_nnc_micro_check_bound_for_expression(expression->ternary.right, loops, loop_count, vars, var_count, groups, axis_id_groups);
1255 break;
1256 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_BINARY:
1257 _ccv_nnc_micro_check_bound_for_expression(expression->binary.left, loops, loop_count, vars, var_count, groups, axis_id_groups);
1258 _ccv_nnc_micro_check_bound_for_expression(expression->binary.right, loops, loop_count, vars, var_count, groups, axis_id_groups);
1259 break;
1260 case CCV_NNC_MICRO_LOOP_EXPR_TYPE_UNARY:
1261 _ccv_nnc_micro_check_bound_for_expression(expression->unary.x, loops, loop_count, vars, var_count, groups, axis_id_groups);
1262 break;
1263 }
1264}
1265
1266static void _ccv_nnc_micro_check_bound_for_block(ccv_nnc_micro_loop_block_t* const block, const ccv_nnc_micro_tensor_t* const vars, const int var_count, const int* const groups, khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups)
1267{
1268 int i, j;
1269 for (i = 0; i < block->loop_count; i++)
1270 {
1271 const int statement_count = block->loops[i].statement_count;
1272 ccv_nnc_micro_loop_statement_t* const statements = block->loops[i].statements;
1273 for (j = 0; j < statement_count; j++)
1274 {
1275 switch (statements[j].type)
1276 {
1277 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_ASSIGNMENT:
1278 _ccv_nnc_micro_check_bound_for_variable(&statements[j].assignment.lvalue, block->loops, block->loop_count, vars, var_count, groups, axis_id_groups);
1279 _ccv_nnc_micro_check_bound_for_expression(&statements[j].assignment.rvalue, block->loops, block->loop_count, vars, var_count, groups, axis_id_groups);
1280 break;
1281 case CCV_NNC_MICRO_LOOP_STATEMENT_TYPE_COMPOUND_ASSIGNMENT:
1282 if (statements[j].compound_assignment.lvalue.type == CCV_NNC_MICRO_LOOP_EXPR_TYPE_VAR)
1283 _ccv_nnc_micro_check_bound_for_variable(&statements[j].compound_assignment.lvalue.variable, block->loops, block->loop_count, vars, var_count, groups, axis_id_groups);
1284 _ccv_nnc_micro_check_bound_for_expression(&statements[j].compound_assignment.rvalue, block->loops, block->loop_count, vars, var_count, groups, axis_id_groups);
1285 break;
1286 }
1287 }
1288 }
1289}
1290
1291void ccv_nnc_micro_program_simplify(ccv_nnc_micro_program_t* const program, const ccv_nnc_micro_io_t* const inputs, const int input_size, const ccv_nnc_micro_io_t* const outputs, const int output_size, const ccv_array_t* const equal_assertions)
1292{
1293 // Nothing to simplify for.
1294 if (program->function_count < 1)
1
Assuming field 'function_count' is >= 1
1295 return;
1296 // Only one block, nothing to simplify for.
1297 if (program->function_count == 1 && program->functions[0].block_count == 1)
2
Assuming field 'function_count' is not equal to 1
1298 return;
1299 if (input_size == 0 || output_size == 0)
3
Assuming 'input_size' is not equal to 0
4
Assuming 'output_size' is not equal to 0
5
Taking false branch
1300 return;
1301 // Union-find to group all variables with the same shape.
1302 ccv_nnc_micro_tensor_t* const vars = program->vars;
1303 const int var_count = program->var_count;
1304 int* const groups = (int*)ccmallocmalloc(sizeof(int) * var_count);
1305 int i, j;
1306 for (i = 0; i < var_count; i++)
6
Assuming 'i' is >= 'var_count'
7
Loop condition is false. Execution continues on line 1309
1307 groups[i] = i;
1308 // If no shape, they should match these input.
1309 for (i = 0; i
7.1
'i' is >= 'var_count'
< var_count; i++)
8
Loop condition is false. Execution continues on line 1317
1310 if (vars[i].input >= 0 && !vars[i].shape)
1311 {
1312 int root = vars[i].input;
1313 while (groups[root] != root)
1314 root = groups[root];
1315 groups[i] = root;
1316 }
1317 for (i = 0; i
8.1
'i' is >= 'var_count'
< var_count; i++)
9
Loop condition is false. Execution continues on line 1329
1318 {
1319 // If this is input (no other tensor as the input), we skip.
1320 if (vars[i].input < 0)
1321 continue;
1322 int root = i;
1323 while (groups[root] != root)
1324 root = groups[root];
1325 // If the sibling exists and we haven't visited yet, mark them has the same group as us.
1326 if (vars[i].sibling >= 0 && vars[i].sibling < i && groups[vars[i].sibling] < 0)
1327 groups[vars[i].sibling] = root;
1328 }
1329 for (i = var_count - 1; i > 0; i--)
10
Assuming 'i' is <= 0
11
Loop condition is false. Execution continues on line 1343
1330 {
1331 // Now matching the shape.
1332 if (vars[i].input < 0 || !vars[i].shape)
1333 continue;
1334 int root = i;
1335 while (groups[root] != root)
1336 root = groups[root];
1337 for (j = i - 1; j >= 0; j--)
1338 if (vars[j].shape && vars[j].dimensions == vars[i].dimensions &&
1339 _ccv_nnc_same_shape(vars[j].shape, vars[i].shape, vars[i].dimensions))
1340 groups[j] = root;
1341 }
1342 // Group equal assertions on axis together.
1343 khash_t(ccv_nnc_axis_id_group)kh_ccv_nnc_axis_id_group_t* const axis_id_groups = kh_init(ccv_nnc_axis_id_group)kh_init_ccv_nnc_axis_id_group();
12
Calling 'kh_init_ccv_nnc_axis_id_group'
14
Returning from 'kh_init_ccv_nnc_axis_id_group'
1344 for (i = 0; i < equal_assertions->rnum; i++)
15
Assuming 'i' is < field 'rnum'
16
Loop condition is true. Entering loop body
1345 {
1346 const ccv_nnc_micro_id_equal_assertion_t* const equal_assertion = (ccv_nnc_micro_id_equal_assertion_t*)ccv_array_get(equal_assertions, i)((void*)(((char*)((equal_assertions)->data)) + (size_t)(equal_assertions
)->rsize * (size_t)(i)))
;
1347 ccv_nnc_micro_id_t left = equal_assertion->left;
1348 while (groups[left.id] != left.id)
17
Loop condition is false. Execution continues on line 1350
1349 left.id = groups[left.id];
1350 int left_root = MICRO_ID_TO_INT(left)(((left).id << 8) | ((left).d));
1351 khiter_t k;
1352 for (;;) {
18
Loop condition is true. Entering loop body
1353 k = kh_get(ccv_nnc_axis_id_group, axis_id_groups, left_root)kh_get_ccv_nnc_axis_id_group(axis_id_groups, left_root);
1354 if (k
18.1
'k' is equal to field 'n_buckets'
== kh_end(axis_id_groups)((axis_id_groups)->n_buckets))
19
Taking true branch
1355 break;
20
Execution continues on line 1358
1356 left_root = kh_val(axis_id_groups, k)((axis_id_groups)->vals[k]);
1357 }
1358 ccv_nnc_micro_id_t right = equal_assertion->right;
1359 while (groups[right.id] != right.id)
21
Loop condition is false. Execution continues on line 1361
1360 left.id = groups[right.id];
1361 int right_root = MICRO_ID_TO_INT(equal_assertion->right)(((equal_assertion->right).id << 8) | ((equal_assertion
->right).d))
;
1362 for (;;) {
22
Loop condition is true. Entering loop body
1363 k = kh_get(ccv_nnc_axis_id_group, axis_id_groups, right_root)kh_get_ccv_nnc_axis_id_group(axis_id_groups, right_root);
1364 if (k
22.1
'k' is equal to field 'n_buckets'
== kh_end(axis_id_groups)((axis_id_groups)->n_buckets))
23
Taking true branch
1365 break;
1366 right_root = kh_val(axis_id_groups, k)((axis_id_groups)->vals[k]);
1367 }
1368 if (left_root != right_root) // k is the right root at the moment.
24
Execution continues on line 1368
25
Assuming 'left_root' is not equal to 'right_root'
26
Taking true branch
1369 {
1370 int ret;
1371 k = kh_put(ccv_nnc_axis_id_group, axis_id_groups, right_root, &ret)kh_put_ccv_nnc_axis_id_group(axis_id_groups, right_root, &
ret)
;
27
Calling 'kh_put_ccv_nnc_axis_id_group'
1372 assert(ret != 0)((void) sizeof ((ret != 0) ? 1 : 0), __extension__ ({ if (ret
!= 0) ; else __assert_fail ("ret != 0", "ccv_nnc_micro_simplify.c"
, 1372, __extension__ __PRETTY_FUNCTION__); }))
;
1373 kh_val(axis_id_groups, k)((axis_id_groups)->vals[k]) = left_root;
1374 }
1375 }
1376 // First, flat out all functions into blocks.
1377 ccv_array_t* const blocks = ccv_array_new(sizeof(ccv_nnc_micro_loop_block_t), 0, 0);
1378 ccv_nnc_micro_function_t* const functions = program->functions;
1379 const int function_count = program->function_count;
1380 int max_loop_count = 0;
1381 for (i = 0; i < function_count; i++)
1382 {
1383 const int block_count = functions[i].block_count;
1384 ccv_nnc_micro_loop_block_t* const function_blocks = block_count == 1 ? &functions[i].one_block : functions[i].blocks;
1385 for (j = 0; j < block_count; j++)
1386 {
1387 max_loop_count = ccv_max(function_blocks[j].loop_count, max_loop_count)({ typeof (function_blocks[j].loop_count) _a = (function_blocks
[j].loop_count); typeof (max_loop_count) _b = (max_loop_count
); (_a > _b) ? _a : _b; })
;
1388 ccv_array_push(blocks, &function_blocks[j]);
1389 }
1390 }
1391 // Next, find dependencies between these function blocks and marking these that are dependencies for the final outputs.
1392 // We need to build our connections between blocks <-> r/w vars.
1393 ccv_nnc_micro_loop_block_dependency_t* block_dependencies;
1394 ccv_nnc_micro_tensor_dependency_t* tensor_dependencies;
1395 const int block_size = blocks->rnum;
1396 _ccv_nnc_micro_block_dependencies((ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, 0)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(0)))
, block_size, var_count, &block_dependencies, &tensor_dependencies);
1397 ccv_array_t* const in_use = ccv_array_new(sizeof(int), output_size, 0);
1398 // Use the dependencies to mark blocks / vars that are in use.
1399 for (i = 0; i < output_size; i++)
1400 {
1401 tensor_dependencies[outputs[i]->id].flag = 1; // Mark them as in use.
1402 ccv_array_push(in_use, &outputs[i]->id);
1403 }
1404 for (i = 0; i < input_size; i++)
1405 tensor_dependencies[inputs[i]->id].flag = 1; // Mark inputs as in use so we don't go pass them.
1406 for (i = 0; i < in_use->rnum; i++)
1407 {
1408 const int tensor_idx = *(int*)ccv_array_get(in_use, i)((void*)(((char*)((in_use)->data)) + (size_t)(in_use)->
rsize * (size_t)(i)))
;
1409 if (tensor_dependencies[tensor_idx].writes)
1410 for (j = 0; j < tensor_dependencies[tensor_idx].writes->rnum; j++)
1411 {
1412 const int block_idx = *(int*)ccv_array_get(tensor_dependencies[tensor_idx].writes, j)((void*)(((char*)((tensor_dependencies[tensor_idx].writes)->
data)) + (size_t)(tensor_dependencies[tensor_idx].writes)->
rsize * (size_t)(j)))
;
1413 block_dependencies[block_idx].flag = 1;
1414 int k;
1415 if (block_dependencies[block_idx].reads)
1416 for (k = 0; k < block_dependencies[block_idx].reads->rnum; k++)
1417 {
1418 const int read_idx = *(int*)ccv_array_get(block_dependencies[block_idx].reads, k)((void*)(((char*)((block_dependencies[block_idx].reads)->data
)) + (size_t)(block_dependencies[block_idx].reads)->rsize *
(size_t)(k)))
;
1419 if (!tensor_dependencies[read_idx].flag)
1420 {
1421 tensor_dependencies[read_idx].flag = 1;
1422 ccv_array_push(in_use, &read_idx);
1423 }
1424 }
1425 }
1426 }
1427 ccv_array_free(in_use);
1428 for (i = 0; i < block_size; i++)
1429 if (!block_dependencies[i].flag)
1430 {
1431 ccv_nnc_micro_loop_block_t* const block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, i)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(i)))
;
1432 ccv_nnc_micro_loops_free(block->loops, block->loop_count);
1433 ccfreefree(block->loops);
1434 block->loops = 0;
1435 block->loop_count = 0;
1436 }
1437 for (i = 0; i < var_count; i++)
1438 if (!tensor_dependencies[i].flag) // If this tensor is not visited, there is no need to alloc.
1439 {
1440 _ccv_nnc_tensor_remove_dead_store(&tensor_dependencies[i], i, blocks);
1441 vars[i].no_alloc = 1;
1442 }
1443 _ccv_nnc_loop_merging(block_dependencies, tensor_dependencies, blocks, max_loop_count, groups, axis_id_groups);
1444 _ccv_nnc_micro_dependencies_free(block_dependencies, block_size, tensor_dependencies, var_count);
1445 // Culling out empty blocks.
1446 for (i = 0, j = 0; i < blocks->rnum; i++)
1447 {
1448 const ccv_nnc_micro_loop_block_t* const block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, i)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(i)))
;
1449 if (block->loop_count > 0)
1450 {
1451 *(ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, j)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(j)))
= *block;
1452 ++j;
1453 }
1454 }
1455 // Now we moved everything, set the proper block size.
1456 ccv_array_resize(blocks, j);
1457 // Substitute variables.
1458 _ccv_nnc_var_subst(vars, var_count, inputs, input_size, outputs, output_size, blocks, groups, axis_id_groups);
1459 // Mark whether we need to check bound for a particular variable or not.
1460 for (i = 0; i < blocks->rnum; i++)
1461 {
1462 ccv_nnc_micro_loop_block_t* const block = (ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, i)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(i)))
;
1463 _ccv_nnc_micro_check_bound_for_block(block, vars, var_count, groups, axis_id_groups);
1464 }
1465 free(groups);
1466 kh_destroy(ccv_nnc_axis_id_group, axis_id_groups)kh_destroy_ccv_nnc_axis_id_group(axis_id_groups);
1467 // Reallocate function to be 1.
1468 for (i = 0; i < function_count; i++)
1469 if (functions[i].block_count > 1)
1470 ccfreefree(functions[i].blocks);
1471 program->functions = (ccv_nnc_micro_function_t*)ccreallocrealloc(program->functions, sizeof(ccv_nnc_micro_function_t));
1472 program->functions[0].block_count = blocks->rnum;
1473 if (blocks->rnum > 1)
1474 {
1475 program->functions[0].blocks = (ccv_nnc_micro_loop_block_t*)ccmallocmalloc(sizeof(ccv_nnc_micro_loop_block_t) * blocks->rnum);
1476 memcpy(program->functions[0].blocks, ccv_array_get(blocks, 0)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(0)))
, sizeof(ccv_nnc_micro_loop_block_t) * blocks->rnum);
1477 } else
1478 program->functions[0].one_block = *(ccv_nnc_micro_loop_block_t*)ccv_array_get(blocks, 0)((void*)(((char*)((blocks)->data)) + (size_t)(blocks)->
rsize * (size_t)(0)))
;
1479 program->function_count = 1;
1480 ccv_array_free(blocks);
1481}