Bug Summary

File:nnc/ccv_nnc_symbolic_graph_compile.c
Warning:line 3721, column 4
Passed-by-value struct argument contains uninitialized data (e.g., field: 'd')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_symbolic_graph_compile.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/13.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2021-11-15-195359-102457-1 -x c ccv_nnc_symbolic_graph_compile.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#ifdef HAVE_CUDA1
6#include "gpu/ccv_nnc_compat.h"
7#endif
8#include "_ccv_nnc_graph.h"
9#include "_ccv_nnc_symbolic_graph.h"
10
11// MARK - Level-3 API
12
13typedef struct {
14 int flags;
15 int type;
16 int pin_mem; // This memory need to be pinned.
17 int ref; // Reference to another tensor block. Start with 1.
18 int bypass_ref; // Copy over the bypass_ref from tensor symbol underneath. Start with 1.
19 int companion_ref; // Reference to another block that they two share the same memory region. Start with 1. the current crude implementation requires the two mutually be companion. Because there are two, we took the one that companion_ref <= i as the primary and companion_ref > i is the secondary. For allocation algorithm, we use the primary throughout.
20 int unfoldable_except_ref; // Reference to a tensor block that can be the exception to unfoldable (as output). Start with 1.
21 ccv_array_t* r_refs; // If this is referenced by another block, the array point back to these blocks. Start with 1.
22 uint64_t size; // The size of the tensor expected.
23 int p_refs[2]; // Reference to the parent tensor block, at max there will be only two. Start with 1.
24 ccv_array_t* dup_p_refs; // Reference to the parent tensor block from the duplicated tensor blocks. It could be many. Start with 0.
25 ccv_array_t* head; // The head nodes (it could be multiple if from the graph, one cannot determine which is the first).
26 ccv_array_t* tail; // The tail nodes (it could be multiple if from the graph, one cannot determine which is the last).
27} ccv_nnc_tensor_block_t; // Tensor Arena Block
28
29#define IS_PRIMARY_COMPANION(idx, block)((idx) < (uint32_t)((block).companion_ref - 1)) ((idx) < (uint32_t)((block).companion_ref - 1))
30
31enum {
32 UNASSIGNED = 0x1,
33 ALIAS = 0x2,
34 READ_ONLY = 0x4,
35 WRITE_ONLY = 0x8,
36 READ_WRITE = 0xc,
37 ANONYMOUS = 0x10, // Mark this block as anonymous (thus, not reference to any specific tensor).
38 UNFOLDABLE_AS_INPUT = 0x20, // If this block is used as input, it cannot be folded into any output blocks.
39 UNFOLDABLE_AS_OUTPUT = 0x40, // If this block is used as output, it cannot be folded into any input blocks.
40};
41
42#define TENSOR_EXPECT_ORDINARY(t)((t.flags & 0x3) == 0) ((t.flags & 0x3) == 0)
43#define TENSOR_EXPECT_SET_ORDINARY(t)(t.flags = (t.flags & ~0x3)) (t.flags = (t.flags & ~0x3))
44#define TENSOR_EXPECT_UNASSIGNED(t)((t.flags & 0x3) == UNASSIGNED) ((t.flags & 0x3) == UNASSIGNED)
45#define TENSOR_EXPECT_SET_UNASSIGNED(t)(t.flags = ((t.flags & ~0x3) | UNASSIGNED)) (t.flags = ((t.flags & ~0x3) | UNASSIGNED))
46#define TENSOR_EXPECT_UNSET_UNASSIGNED(t)(t.flags = (t.flags & ~0x1)) (t.flags = (t.flags & ~0x1))
47#define TENSOR_EXPECT_ALIAS(t)((t.flags & 0x3) == ALIAS) ((t.flags & 0x3) == ALIAS)
48#define TENSOR_EXPECT_COMPUTABLE(t)(!((t.flags & 0x3) == ALIAS) && !((t.flags & 0x3
) == UNASSIGNED))
(!TENSOR_EXPECT_ALIAS(t)((t.flags & 0x3) == ALIAS) && !TENSOR_EXPECT_UNASSIGNED(t)((t.flags & 0x3) == UNASSIGNED))
49#define TENSOR_READ_WRITE(t)(t.flags & 0xc) (t.flags & 0xc)
50#define TENSOR_SET_READ_WRITE(t, rw)(t.flags = ((t.flags & ~0xc) | rw)) (t.flags = ((t.flags & ~0xc) | rw))
51#define TENSOR_SET_ANONYMOUS(t)(t.flags = ((t.flags & ~0x10) | ANONYMOUS)) (t.flags = ((t.flags & ~0x10) | ANONYMOUS))
52#define TENSOR_IS_ANONYMOUS(t)(t.flags & ANONYMOUS) (t.flags & ANONYMOUS)
53#define TENSOR_SET_UNFOLDABLE_AS_INPUT(t)(t.flags = (t.flags | UNFOLDABLE_AS_INPUT)) (t.flags = (t.flags | UNFOLDABLE_AS_INPUT))
54#define TENSOR_IS_UNFOLDABLE_AS_INPUT(t)(t.flags & UNFOLDABLE_AS_INPUT) (t.flags & UNFOLDABLE_AS_INPUT)
55#define TENSOR_SET_UNFOLDABLE_AS_OUTPUT(t)(t.flags = (t.flags | UNFOLDABLE_AS_OUTPUT)) (t.flags = (t.flags | UNFOLDABLE_AS_OUTPUT))
56#define TENSOR_IS_UNFOLDABLE_AS_OUTPUT(t)(t.flags & UNFOLDABLE_AS_OUTPUT) (t.flags & UNFOLDABLE_AS_OUTPUT)
57
58#define TENSOR_REQUIRE_INIT(flags)(((flags) & CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS) || ((flags)
& CCV_NNC_TENSOR_SYMBOL_INIT_ONES))
(((flags) & CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS) || ((flags) & CCV_NNC_TENSOR_SYMBOL_INIT_ONES))
59
60// Holds additional information about the exe nodes.
61typedef struct {
62 int flags;
63} ccv_nnc_graph_exec_flag_t;
64
65enum {
66 CCV_NNC_GRAPH_EXEC_ATTR_CASE_OF_NO_BYPASS_IO = 0x1, // Need to insert additional IO transfer for case..of statement.
67};
68
69typedef struct {
70 int index;
71 int oc;
72 int type;
73 uint64_t size;
74} ccv_nnc_tensor_opt_t;
75
76// We first sort the same type together (because they won't be reused at all.
77// And then we sort by size, after that, sort by oc.
78#define more_than(i1, i2, aux) (((i1).size > (i2).size) || ((i1).size == (i2).size && (i1).oc >= (i2).oc))
79static CCV_IMPLEMENT_QSORT(_ccv_nnc_tensor_opt_sort_by_size_and_oc, ccv_nnc_tensor_opt_t, more_than)void _ccv_nnc_tensor_opt_sort_by_size_and_oc(ccv_nnc_tensor_opt_t
*array, size_t total, int aux) { int isort_thresh = 7; ccv_nnc_tensor_opt_t
t; int sp = 0; struct { ccv_nnc_tensor_opt_t *lb; ccv_nnc_tensor_opt_t
*ub; } stack[48]; if( total <= 1 ) return; stack[0].lb = array
; stack[0].ub = array + (total - 1); while( sp >= 0 ) { ccv_nnc_tensor_opt_t
* left = stack[sp].lb; ccv_nnc_tensor_opt_t* right = stack[sp
--].ub; for(;;) { int i, n = (int)(right - left) + 1, m; ccv_nnc_tensor_opt_t
* ptr; ccv_nnc_tensor_opt_t* ptr2; if( n <= isort_thresh )
{ insert_sort: for( ptr = left + 1; ptr <= right; ptr++ )
{ for( ptr2 = ptr; ptr2 > left && more_than(ptr2[
0],ptr2[-1], aux); ptr2--) (((t)) = ((ptr2[0])), ((ptr2[0])) =
((ptr2[-1])), ((ptr2[-1])) = ((t))); } break; } else { ccv_nnc_tensor_opt_t
* left0; ccv_nnc_tensor_opt_t* left1; ccv_nnc_tensor_opt_t* right0
; ccv_nnc_tensor_opt_t* right1; ccv_nnc_tensor_opt_t* pivot; ccv_nnc_tensor_opt_t
* a; ccv_nnc_tensor_opt_t* b; ccv_nnc_tensor_opt_t* c; int swap_cnt
= 0; left0 = left; right0 = right; pivot = left + (n/2); if(
n > 40 ) { int d = n / 8; a = left, b = left + d, c = left
+ 2*d; left = more_than(*a, *b, aux) ? (more_than(*b, *c, aux
) ? b : (more_than(*a, *c, aux) ? c : a)) : (more_than(*c, *b
, aux) ? b : (more_than(*a, *c, aux) ? a : c)); a = pivot - d
, b = pivot, c = pivot + d; pivot = more_than(*a, *b, aux) ? (
more_than(*b, *c, aux) ? b : (more_than(*a, *c, aux) ? c : a)
) : (more_than(*c, *b, aux) ? b : (more_than(*a, *c, aux) ? a
: c)); a = right - 2*d, b = right - d, c = right; right = more_than
(*a, *b, aux) ? (more_than(*b, *c, aux) ? b : (more_than(*a, *
c, aux) ? c : a)) : (more_than(*c, *b, aux) ? b : (more_than(
*a, *c, aux) ? a : c)); } a = left, b = pivot, c = right; pivot
= more_than(*a, *b, aux) ? (more_than(*b, *c, aux) ? b : (more_than
(*a, *c, aux) ? c : a)) : (more_than(*c, *b, aux) ? b : (more_than
(*a, *c, aux) ? a : c)); if( pivot != left0 ) { (((t)) = ((*pivot
)), ((*pivot)) = ((*left0)), ((*left0)) = ((t))); pivot = left0
; } left = left1 = left0 + 1; right = right1 = right0; for(;;
) { while( left <= right && !more_than(*pivot, *left
, aux) ) { if( !more_than(*left, *pivot, aux) ) { if( left >
left1 ) (((t)) = ((*left1)), ((*left1)) = ((*left)), ((*left
)) = ((t))); swap_cnt = 1; left1++; } left++; } while( left <=
right && !more_than(*right, *pivot, aux) ) { if( !more_than
(*pivot, *right, aux) ) { if( right < right1 ) (((t)) = ((
*right1)), ((*right1)) = ((*right)), ((*right)) = ((t))); swap_cnt
= 1; right1--; } right--; } if( left > right ) break; (((
t)) = ((*left)), ((*left)) = ((*right)), ((*right)) = ((t)));
swap_cnt = 1; left++; right--; } if( swap_cnt == 0 ) { left =
left0, right = right0; goto insert_sort; } n = ({ typeof ((int
)(left1 - left0)) _a = ((int)(left1 - left0)); typeof ((int)(
left - left1)) _b = ((int)(left - left1)); (_a < _b) ? _a :
_b; }); for( i = 0; i < n; i++ ) (((t)) = ((left0[i])), (
(left0[i])) = ((left[i-n])), ((left[i-n])) = ((t))); n = ({ typeof
((int)(right0 - right1)) _a = ((int)(right0 - right1)); typeof
((int)(right1 - right)) _b = ((int)(right1 - right)); (_a <
_b) ? _a : _b; }); for( i = 0; i < n; i++ ) (((t)) = ((left
[i])), ((left[i])) = ((right0[i-n+1])), ((right0[i-n+1])) = (
(t))); n = (int)(left - left1); m = (int)(right1 - right); if
( n > 1 ) { if( m > 1 ) { if( n > m ) { stack[++sp].
lb = left0; stack[sp].ub = left0 + n - 1; left = right0 - m +
1, right = right0; } else { stack[++sp].lb = right0 - m + 1;
stack[sp].ub = right0; left = left0, right = left0 + n - 1; }
} else left = left0, right = left0 + n - 1; } else if( m >
1 ) left = right0 - m + 1, right = right0; else break; } } }
}
80#undef more_than
81
82// If b has items overlap with a, a is still after b (inclusive).
83static int _ccv_nnc_tensor_block_a_after_b_inclusively(const ccv_sparse_matrix_t* const exec_dep, const ccv_array_t* const a, const ccv_array_t* const b)
84{
85 assert(a)((void) sizeof ((a) ? 1 : 0), __extension__ ({ if (a) ; else __assert_fail
("a", "ccv_nnc_symbolic_graph_compile.c", 85, __extension__ __PRETTY_FUNCTION__
); }))
;
86 assert(b)((void) sizeof ((b) ? 1 : 0), __extension__ ({ if (b) ; else __assert_fail
("b", "ccv_nnc_symbolic_graph_compile.c", 86, __extension__ __PRETTY_FUNCTION__
); }))
;
87 int x, y;
88 for (x = 0; x < b->rnum; x++)
89 {
90 const int p = *(int*)ccv_array_get(b, x)((void*)(((char*)((b)->data)) + (size_t)(b)->rsize * (size_t
)(x)))
;
91 int flag = 0;
92 // In extreme cases where a is a superset of b, then a is still after b, we are good.
93 for (y = 0; !flag && y < a->rnum; y++)
94 {
95 const int q = *(int*)ccv_array_get(a, y)((void*)(((char*)((a)->data)) + (size_t)(a)->rsize * (size_t
)(y)))
;
96 flag = (p == q);
97 }
98 if (!flag)
99 for (y = 0; y < a->rnum; y++)
100 {
101 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, *(int*)ccv_array_get(a, y)((void*)(((char*)((a)->data)) + (size_t)(a)->rsize * (size_t
)(y)))
, p);
102 if (!cell.i32 || cell.i32[0] == 0)
103 return 0;
104 }
105 }
106 // If b->rnum == 0, a is after b for sure.
107 // Otherwise, if a->rnum == 0, we don't check any, buf if b->rnum > 0, then we cannot say a is after b.
108 // if both a->rnum > 0 and b->rnum > 0, above logic should checked all.
109 return (a->rnum > 0 || b->rnum == 0);
110}
111
112static int _ccv_nnc_tensor_block_a_after_b_exclusively(const ccv_sparse_matrix_t* const exec_dep, const ccv_array_t* const a, const ccv_array_t* const b)
113{
114 assert(a)((void) sizeof ((a) ? 1 : 0), __extension__ ({ if (a) ; else __assert_fail
("a", "ccv_nnc_symbolic_graph_compile.c", 114, __extension__
__PRETTY_FUNCTION__); }))
;
115 assert(b)((void) sizeof ((b) ? 1 : 0), __extension__ ({ if (b) ; else __assert_fail
("b", "ccv_nnc_symbolic_graph_compile.c", 115, __extension__
__PRETTY_FUNCTION__); }))
;
116 int x, y, max_hop = 0;
117 for (x = 0; x < a->rnum; x++)
118 for (y = 0; y < b->rnum; y++)
119 {
120 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, *(int*)ccv_array_get(a, x)((void*)(((char*)((a)->data)) + (size_t)(a)->rsize * (size_t
)(x)))
, *(int*)ccv_array_get(b, y)((void*)(((char*)((b)->data)) + (size_t)(b)->rsize * (size_t
)(y)))
);
121 if (!cell.i32 || cell.i32[0] == 0)
122 return 0;
123 max_hop = ccv_max(cell.i32[0], max_hop)({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (max_hop) _b
= (max_hop); (_a > _b) ? _a : _b; })
;
124 }
125 // We've entered this nested-for loop, therefore, it must be verifiably, deterministically after b now.
126 // The max hop also denotes if that is the case, how many hops, maximally speaking, we need to get from a to b.
127 return max_hop;
128}
129
130// If every a's head is deterministically after b's tail
131static int _ccv_nnc_tensor_block_head_after_tail(const ccv_sparse_matrix_t* const exec_dep, const ccv_nnc_tensor_block_t a, const ccv_nnc_tensor_block_t b)
132{
133 return _ccv_nnc_tensor_block_a_after_b_exclusively(exec_dep, a.head, b.tail);
134}
135
136typedef struct {
137 ccv_array_t** alloc_dep;
138 int vt_block_size;
139 int buffer_size;
140 int block_size;
141 int* vt_blocks; // A reference to the block, because blocks only contains available block (thus, doesn't consider alias etc.). -1 means no block pointed to. Starts at 0.
142 struct {
143 int type; // The type from tensor blocks.
144 int pin_mem; // Whether this is pinned memory.
145 int flags; // The flags (currently for READ_ONLY or not).
146 uint64_t size; // The size of the buffer allocated.
147 int p_refs[2]; // Reference to the upper level block, Starts at 1. Only index 0 is valid throughout, I do use two in the code as a temporary placeholder.
148 ccv_array_t* dup_p_refs; // Reference to the parent tensor block from the duplicated tensor blocks. From buffer, it can point to multiple because it can be associated with multiple tensor blocks that points to different outputs (for example, in 1st unroll, pointing to one block while in 2nd unroll, pointing to another). Start with 0.
149 }* buffers;
150 struct {
151 int buffer_ref; // A reference for block to which buffer to use. Starts at 0.
152 int block_ref; // A reference to which block in the given tensor_block to use.
153 uint64_t offset; // The offset of this block.
154 }* blocks;
155} ccv_nnc_tensor_alloc_prep_t;
156
157typedef struct ccv_nnc_symbolic_graph_prep_s {
158 int flags;
159 int while_count_tensor; // This graph will generate a while count tensor. If this is set to 1, we reserve tensor_metadata at 0 for this.
160 int p_idx; // Reference to the index in its parent graph's sub-graph array, Starts at 1.
161 int exec_idx;
162 int unroll_count; // How many times this graph is unrolled before we can have proper assignment.
163 int tensor_symbol_info_size;
164 int exec_symbol_info_size;
165 int tensor_block_size;
166 int sub_prep_size;
167 ccv_nnc_tensor_block_t* tensor_blocks;
168 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info;
169 ccv_nnc_graph_exec_flag_t* exec_flags;
170 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info;
171 int* dup_tensor_block_ref;
172 ccv_nnc_graph_visit_t* visit;
173 ccv_nnc_tensor_alloc_prep_t* alloc_prep;
174 struct ccv_nnc_symbolic_graph_prep_s* p;
175 struct ccv_nnc_symbolic_graph_prep_s** sub_preps; // The preps of its sub-graphs.
176 // Structures that don't require to be freed after deallocation.
177 const ccv_nnc_symbolic_graph_t* symbolic_graph; // Constant because I cannot modify it.
178 ccv_nnc_graph_t* graph; // Materialized graph, not managed by prep after created.
179 ccv_nnc_tensor_arena_t* tensor_arena; // Tensor arena, not managed by prep as well.
180 ccv_array_t* dup_breakpoints; // The noop breakpoints, used to extend the inputs life-cycle for while expr.
181} ccv_nnc_symbolic_graph_prep_t;
182
183typedef struct {
184 int oc;
185 ccv_array_t* itf;
186} ccv_nnc_tensor_block_adjacent_t;
187
188static ccv_nnc_tensor_alloc_prep_t* _ccv_nnc_tensor_alloc_prep_new(const ccv_sparse_matrix_t* const exec_dep, const ccv_nnc_tensor_block_t* const tensor_blocks, const int tensor_block_size)
189{
190 // Compute how many dis-continuous buffers are needed.
191 // We prefer to have several dis-continuous buffers instead of one big buffer because
192 // in this way, we can rely on system memory allocators (jemalloc, tcmalloc, or CUDA's allocator)
193 // to fully utilize memory.
194 int i, j, k;
195 ccv_array_t** const alloc_dep = (ccv_array_t**)cccalloccalloc(tensor_block_size, sizeof(ccv_array_t*));
196 int allocable_tensor_size = 0, available_tensor_size = 0;
197 for (i = 0; i < tensor_block_size; i++)
198 if (!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED))
199 {
200 // Tensors that we need the header info.
201 ++available_tensor_size;
202 if (!TENSOR_EXPECT_ALIAS(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == ALIAS))
203 // Tensors that we actually need to allocate (exclude the alias).
204 ++allocable_tensor_size;
205 }
206 ccv_sparse_matrix_t* const tensor_df = ccv_sparse_matrix_new(tensor_block_size, tensor_block_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
207 ccv_sparse_matrix_t* const tensor_dt = ccv_sparse_matrix_new(tensor_block_size, tensor_block_size, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
208 ccv_nnc_tensor_block_adjacent_t* const adj = (ccv_nnc_tensor_block_adjacent_t*)cccalloccalloc(tensor_block_size, sizeof(ccv_nnc_tensor_block_adjacent_t));
209 // Overlap count.
210 for (i = 0; i < tensor_block_size; i++)
211 if (TENSOR_EXPECT_COMPUTABLE(tensor_blocks[i])(!((tensor_blocks[i].flags & 0x3) == ALIAS) && !(
(tensor_blocks[i].flags & 0x3) == UNASSIGNED))
)
212 for (j = i + 1; j < tensor_block_size; j++)
213 if (TENSOR_EXPECT_COMPUTABLE(tensor_blocks[j])(!((tensor_blocks[j].flags & 0x3) == ALIAS) && !(
(tensor_blocks[j].flags & 0x3) == UNASSIGNED))
)
214 {
215 // Check to see if they interfere (default to yes).
216 // If any of the i's head is deterministically later than j's tail
217 // or any of the i's tail is deterministically earlier than j's head, they don't interfere.
218 const int i_hop_j = _ccv_nnc_tensor_block_head_after_tail(exec_dep, tensor_blocks[i], tensor_blocks[j]);
219 if (i_hop_j > 0)
220 {
221 ccv_set_sparse_matrix_cell(tensor_dt, i, j, &i_hop_j);
222 ccv_set_sparse_matrix_cell(tensor_df, j, i, &i_hop_j);
223 }
224 const int j_hop_i = _ccv_nnc_tensor_block_head_after_tail(exec_dep, tensor_blocks[j], tensor_blocks[i]);
225 if (j_hop_i > 0)
226 {
227 ccv_set_sparse_matrix_cell(tensor_dt, j, i, &j_hop_i);
228 ccv_set_sparse_matrix_cell(tensor_df, i, j, &j_hop_i);
229 }
230 // It cannot be that both i can hop to j can j can hop to i.
231 assert(!(i_hop_j > 0 && j_hop_i > 0))((void) sizeof ((!(i_hop_j > 0 && j_hop_i > 0))
? 1 : 0), __extension__ ({ if (!(i_hop_j > 0 && j_hop_i
> 0)) ; else __assert_fail ("!(i_hop_j > 0 && j_hop_i > 0)"
, "ccv_nnc_symbolic_graph_compile.c", 231, __extension__ __PRETTY_FUNCTION__
); }))
;
232 if (!i_hop_j && !j_hop_i && tensor_blocks[i].type == tensor_blocks[j].type)
233 {
234 if (!adj[i].itf)
235 adj[i].itf = ccv_array_new(sizeof(int), 1, 0);
236 ccv_array_push(adj[i].itf, &j);
237 ++adj[i].oc;
238 if (!adj[j].itf)
239 adj[j].itf = ccv_array_new(sizeof(int), 1, 0);
240 ccv_array_push(adj[j].itf, &i);
241 ++adj[j].oc;
242 }
243 }
244 int* const buf = (int*)ccmallocmalloc(sizeof(int) * tensor_block_size);
245 int* const assigned = (int*)cccalloccalloc(tensor_block_size, sizeof(int));
246 uint64_t* const allocated_offset = (uint64_t*)cccalloccalloc(tensor_block_size, sizeof(uint64_t));
247 uint64_t* const allocated_size = (uint64_t*)cccalloccalloc(tensor_block_size, sizeof(uint64_t));
248 int num_assigned = 0;
249 // I can do a bit optimization here to assign out const tensor first, but heck, this just works for now.
250 // Allocation graph (assuming there is a source node, and a destination node, which is 0, and (tensor_block_size + 1)
251 // The first channel denotes the bytes available for allocation,
252 // the second channel denotes the offset available for the allocation,
253 ccv_sparse_matrix_t* alloc = ccv_sparse_matrix_new(tensor_block_size + 2, tensor_block_size + 2, CCV_64S | CCV_C2, CCV_SPARSE_ROW_MAJOR, 0);
254 ccv_array_t* opt = ccv_array_new(sizeof(ccv_nnc_tensor_opt_t), 1, 0);
255 for (j = 0; j < allocable_tensor_size;)
256 {
257 // Find the one with largest overlap (in case overlap is the same, larger size), and it is not assigned.
258 uint64_t max_size = 0;
259 ccv_array_clear(opt);
260 int current_type = 0; // Deal with one type at a time.
261 for (i = 0; i < tensor_block_size; i++)
262 if (tensor_blocks[i].size >= max_size &&
263 TENSOR_EXPECT_COMPUTABLE(tensor_blocks[i])(!((tensor_blocks[i].flags & 0x3) == ALIAS) && !(
(tensor_blocks[i].flags & 0x3) == UNASSIGNED))
&& !assigned[i] &&
264 IS_PRIMARY_COMPANION(i, tensor_blocks[i])((i) < (uint32_t)((tensor_blocks[i]).companion_ref - 1)) &&
265 (!current_type || tensor_blocks[i].type == current_type))
266 {
267 ccv_nnc_tensor_opt_t a = {
268 .size = tensor_blocks[i].size,
269 .index = i,
270 .oc = adj[i].oc,
271 .type = tensor_blocks[i].type,
272 };
273 assert(a.type)((void) sizeof ((a.type) ? 1 : 0), __extension__ ({ if (a.type
) ; else __assert_fail ("a.type", "ccv_nnc_symbolic_graph_compile.c"
, 273, __extension__ __PRETTY_FUNCTION__); }))
;
274 current_type = a.type; // Now we now the primary type we should deal with.
275 if (tensor_blocks[i].companion_ref)
276 {
277 const int companion_ref = tensor_blocks[i].companion_ref - 1;
278 a.size = ccv_max(a.size, tensor_blocks[companion_ref].size)({ typeof (a.size) _a = (a.size); typeof (tensor_blocks[companion_ref
].size) _b = (tensor_blocks[companion_ref].size); (_a > _b
) ? _a : _b; })
;
279 a.oc += adj[companion_ref].oc;
280 }
281 // In case we have a tie, take them all in the array.
282 if (a.size > max_size)
283 ccv_array_clear(opt), max_size = a.size;
284 ccv_array_push(opt, &a);
285 }
286 assert(opt->rnum > 0)((void) sizeof ((opt->rnum > 0) ? 1 : 0), __extension__
({ if (opt->rnum > 0) ; else __assert_fail ("opt->rnum > 0"
, "ccv_nnc_symbolic_graph_compile.c", 286, __extension__ __PRETTY_FUNCTION__
); }))
;
287 // Order opt array by the oc because type and size should be equal at this point.
288 _ccv_nnc_tensor_opt_sort_by_size_and_oc((ccv_nnc_tensor_opt_t*)opt->data, opt->rnum, 0);
289 // Go through opt array again, this time, it is ordered by size, therefore, if we found a place to insert, we are good.
290 int min_y = 0, min_x = tensor_block_size + 1, min_i = -1, min_hop = exec_dep->rows * 3;
291 uint64_t min_val[2] = {
292 0, 0
293 };
294 for (i = 0; i < opt->rnum; i++)
295 {
296 ccv_nnc_tensor_opt_t a = *(ccv_nnc_tensor_opt_t*)ccv_array_get(opt, i)((void*)(((char*)((opt)->data)) + (size_t)(opt)->rsize *
(size_t)(i)))
;
297 // Now, determine the order between a and c. After this, we can always check whether y
298 // can hop to the earliest one and if the latest one can hop to x.
299 // The earliest one will be called p and the latest one will be called q.
300 int p = a.index;
301 int q = a.index;
302 if (tensor_blocks[a.index].companion_ref)
303 {
304 const int companion_ref = tensor_blocks[a.index].companion_ref - 1;
305 const ccv_numeric_data_t b_hop_p = ccv_get_sparse_matrix_cell(tensor_dt, p, companion_ref);
306 if (b_hop_p.i32 && b_hop_p.i32[0] > 0)
307 p = companion_ref;
308 else {
309 const ccv_numeric_data_t q_hop_b = ccv_get_sparse_matrix_cell(tensor_dt, companion_ref, q);
310 if (q_hop_b.i32 && q_hop_b.i32[0] > 0)
311 q = companion_ref;
312 else { // Otherwise, b is in between p and q.
313 const ccv_numeric_data_t p_hop_b = ccv_get_sparse_matrix_cell(tensor_dt, companion_ref, p);
314 const ccv_numeric_data_t b_hop_q = ccv_get_sparse_matrix_cell(tensor_dt, q, companion_ref);
315 assert(p_hop_b.i32 && p_hop_b.i32[0] > 0 && b_hop_q.i32 && b_hop_q.i32[0] > 0)((void) sizeof ((p_hop_b.i32 && p_hop_b.i32[0] > 0
&& b_hop_q.i32 && b_hop_q.i32[0] > 0) ? 1
: 0), __extension__ ({ if (p_hop_b.i32 && p_hop_b.i32
[0] > 0 && b_hop_q.i32 && b_hop_q.i32[0] >
0) ; else __assert_fail ("p_hop_b.i32 && p_hop_b.i32[0] > 0 && b_hop_q.i32 && b_hop_q.i32[0] > 0"
, "ccv_nnc_symbolic_graph_compile.c", 315, __extension__ __PRETTY_FUNCTION__
); }))
;
316 }
317 }
318 }
319 assert(tensor_blocks[q].type == tensor_blocks[p].type)((void) sizeof ((tensor_blocks[q].type == tensor_blocks[p].type
) ? 1 : 0), __extension__ ({ if (tensor_blocks[q].type == tensor_blocks
[p].type) ; else __assert_fail ("tensor_blocks[q].type == tensor_blocks[p].type"
, "ccv_nnc_symbolic_graph_compile.c", 319, __extension__ __PRETTY_FUNCTION__
); }))
;
320 const int type = tensor_blocks[p].type;
321 // y is always earlier than x, but this is hard to assert now.
322 // If this edge satisfy the requirement, now we need to find the ones with tightest possible bounds.
323 // Thus, the hop between y and x (through a) should be smallest ones.
324 // We optimized this by first find all allocated nodes that comes to p, and all allocated nodes that
325 // out of q. For these nodes, we try to verify whether they form a connection (by checking against
326 // alloc sparse matrix). If they do, try to see whether we can insert with tightest bound.
327 int y_size = 0;
328 int* const y_buf = buf;
329#define for_block(y, val) do { \
330 if (((int*)val)[0] > 0 && assigned[y] && tensor_blocks[y].type == type) \
331 y_buf[y_size++] = y + 1; \
332 } while(0)
333 ccv_sparse_matrix_vector_t* const y_vector = ccv_get_sparse_matrix_vector(tensor_dt, p);
334 if (y_vector)
335 CCV_SPARSE_VECTOR_FOREACH(tensor_dt, y_vector, for_block)do { switch ((((tensor_dt)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_dt)->type) & 0xFFF); if ((tensor_dt)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (y_vector)->
size; _i_++) { for_block((_i_), ((y_vector)->data.i32 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_dt)->type) & 0xFF000
) >> 12] * (((tensor_dt)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(y_vector)->index;
for (_i_ = 0; _i_ < (y_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_dt)->type) & 0xFFF); if ((tensor_dt)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (y_vector)->
size; _i_++) { for_block((_i_), ((y_vector)->data.f32 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_dt)->type) & 0xFF000
) >> 12] * (((tensor_dt)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(y_vector)->index;
for (_i_ = 0; _i_ < (y_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_dt)->type) & 0xFFF); if ((tensor_dt)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (y_vector)->
size; _i_++) { for_block((_i_), ((y_vector)->data.i64 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_dt)->type) & 0xFF000
) >> 12] * (((tensor_dt)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(y_vector)->index;
for (_i_ = 0; _i_ < (y_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_dt)->type) & 0xFFF); if ((tensor_dt)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (y_vector)->
size; _i_++) { for_block((_i_), ((y_vector)->data.f64 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_dt)->type) & 0xFF000
) >> 12] * (((tensor_dt)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(y_vector)->index;
for (_i_ = 0; _i_ < (y_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((tensor_dt
)->type) & 0xFFF); if ((tensor_dt)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (y_vector)->size; _i_++) { for_block
((_i_), ((y_vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((tensor_dt)->type) & 0xFF000) >> 12] * (((tensor_dt
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(y_vector)->index; for (_i_ = 0; _i_ < (y_vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
336#undef for_block
337 assert(y_size <= tensor_block_size)((void) sizeof ((y_size <= tensor_block_size) ? 1 : 0), __extension__
({ if (y_size <= tensor_block_size) ; else __assert_fail (
"y_size <= tensor_block_size", "ccv_nnc_symbolic_graph_compile.c"
, 337, __extension__ __PRETTY_FUNCTION__); }))
;
338 int x_size = 0;
339 int* const x_buf = buf + y_size;
340#define for_block(x, val) do { \
341 if (((int*)val)[0] > 0 && assigned[x] && tensor_blocks[x].type == type) \
342 x_buf[x_size++] = x + 1; \
343 } while(0)
344 ccv_sparse_matrix_vector_t* const x_vector = ccv_get_sparse_matrix_vector(tensor_df, q);
345 if (x_vector)
346 CCV_SPARSE_VECTOR_FOREACH(tensor_df, x_vector, for_block)do { switch ((((tensor_df)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_df)->type) & 0xFFF); if ((tensor_df)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (x_vector)->
size; _i_++) { for_block((_i_), ((x_vector)->data.i32 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_df)->type) & 0xFF000
) >> 12] * (((tensor_df)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(x_vector)->index;
for (_i_ = 0; _i_ < (x_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_df)->type) & 0xFFF); if ((tensor_df)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (x_vector)->
size; _i_++) { for_block((_i_), ((x_vector)->data.f32 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_df)->type) & 0xFF000
) >> 12] * (((tensor_df)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(x_vector)->index;
for (_i_ = 0; _i_ < (x_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_df)->type) & 0xFFF); if ((tensor_df)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (x_vector)->
size; _i_++) { for_block((_i_), ((x_vector)->data.i64 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_df)->type) & 0xFF000
) >> 12] * (((tensor_df)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(x_vector)->index;
for (_i_ = 0; _i_ < (x_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((tensor_df)->type) & 0xFFF); if ((tensor_df)->type
& CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (x_vector)->
size; _i_++) { for_block((_i_), ((x_vector)->data.f64 + (_i_
* _c_))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((tensor_df)->type) & 0xFF000
) >> 12] * (((tensor_df)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(x_vector)->index;
for (_i_ = 0; _i_ < (x_vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((tensor_df
)->type) & 0xFFF); if ((tensor_df)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (x_vector)->size; _i_++) { for_block
((_i_), ((x_vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((tensor_df)->type) & 0xFF000) >> 12] * (((tensor_df
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(x_vector)->index; for (_i_ = 0; _i_ < (x_vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
347#undef for_block
348 assert(y_size + x_size <= tensor_block_size)((void) sizeof ((y_size + x_size <= tensor_block_size) ? 1
: 0), __extension__ ({ if (y_size + x_size <= tensor_block_size
) ; else __assert_fail ("y_size + x_size <= tensor_block_size"
, "ccv_nnc_symbolic_graph_compile.c", 348, __extension__ __PRETTY_FUNCTION__
); }))
;
349 int x, y;
350 for (y = 0; y < y_size; y++)
351 {
352 const ccv_numeric_data_t val = ccv_get_sparse_matrix_cell(alloc, y_buf[y], tensor_block_size + 1);
353 if (val.u64 && val.u64[0] >= a.size)
354 {
355 const ccv_numeric_data_t y_hop_p = ccv_get_sparse_matrix_cell(tensor_dt, p, y_buf[y] - 1);
356 assert(y_hop_p.i32 && y_hop_p.i32[0] > 0)((void) sizeof ((y_hop_p.i32 && y_hop_p.i32[0] > 0
) ? 1 : 0), __extension__ ({ if (y_hop_p.i32 && y_hop_p
.i32[0] > 0) ; else __assert_fail ("y_hop_p.i32 && y_hop_p.i32[0] > 0"
, "ccv_nnc_symbolic_graph_compile.c", 356, __extension__ __PRETTY_FUNCTION__
); }))
;
357 const int hop = exec_dep->rows + y_hop_p.i32[0];
358 if (hop < min_hop)
359 min_y = y_buf[y], min_x = tensor_block_size + 1, min_hop = hop,
360 min_val[0] = val.u64[0], min_val[1] = val.u64[1];
361 }
362 }
363 for (x = 0; x < x_size; x++)
364 {
365 const ccv_numeric_data_t val = ccv_get_sparse_matrix_cell(alloc, 0, x_buf[x]);
366 if (val.u64 && val.u64[0] >= a.size)
367 {
368 const ccv_numeric_data_t q_hop_x = ccv_get_sparse_matrix_cell(tensor_dt, x_buf[x] - 1, q);
369 assert(q_hop_x.i32 && q_hop_x.i32[0] > 0)((void) sizeof ((q_hop_x.i32 && q_hop_x.i32[0] > 0
) ? 1 : 0), __extension__ ({ if (q_hop_x.i32 && q_hop_x
.i32[0] > 0) ; else __assert_fail ("q_hop_x.i32 && q_hop_x.i32[0] > 0"
, "ccv_nnc_symbolic_graph_compile.c", 369, __extension__ __PRETTY_FUNCTION__
); }))
;
370 const int hop = exec_dep->rows + q_hop_x.i32[0];
371 if (hop < min_hop)
372 min_y = 0, min_x = x_buf[x], min_hop = hop,
373 min_val[0] = val.u64[0], min_val[1] = val.u64[1];
374 }
375 }
376 for (y = 0; y < y_size; y++)
377 {
378 ccv_sparse_matrix_vector_t* const y_vector = ccv_get_sparse_matrix_vector(alloc, y_buf[y]);
379 if (y_vector)
380 for (x = 0; x < x_size; x++)
381 {
382 const ccv_numeric_data_t val = ccv_get_sparse_matrix_cell_from_vector(alloc, y_vector, x_buf[x]);
383 if (val.u64 && val.u64[0] >= a.size)
384 {
385 const ccv_numeric_data_t y_hop_p = ccv_get_sparse_matrix_cell(tensor_dt, p, y_buf[y] - 1);
386 const ccv_numeric_data_t q_hop_x = ccv_get_sparse_matrix_cell(tensor_dt, x_buf[x] - 1, q);
387 assert(y_hop_p.i32 && y_hop_p.i32[0] > 0)((void) sizeof ((y_hop_p.i32 && y_hop_p.i32[0] > 0
) ? 1 : 0), __extension__ ({ if (y_hop_p.i32 && y_hop_p
.i32[0] > 0) ; else __assert_fail ("y_hop_p.i32 && y_hop_p.i32[0] > 0"
, "ccv_nnc_symbolic_graph_compile.c", 387, __extension__ __PRETTY_FUNCTION__
); }))
;
388 assert(q_hop_x.i32 && q_hop_x.i32[0] > 0)((void) sizeof ((q_hop_x.i32 && q_hop_x.i32[0] > 0
) ? 1 : 0), __extension__ ({ if (q_hop_x.i32 && q_hop_x
.i32[0] > 0) ; else __assert_fail ("q_hop_x.i32 && q_hop_x.i32[0] > 0"
, "ccv_nnc_symbolic_graph_compile.c", 388, __extension__ __PRETTY_FUNCTION__
); }))
;
389 const int hop = y_hop_p.i32[0] + q_hop_x.i32[0];
390 if (hop < min_hop)
391 min_y = y_buf[y], min_x = x_buf[x], min_hop = hop,
392 min_val[0] = val.u64[0], min_val[1] = val.u64[1];
393 }
394 }
395 }
396 // If I found a place, stop, and exit.
397 if (min_y > 0 || min_x < tensor_block_size + 1)
398 {
399 min_i = i;
400 break;
401 }
402 }
403 // If I cannot find a place, then start a new connection between min_y and min_x (a new assignment group).
404 // and default to largest size available.
405 ccv_nnc_tensor_opt_t a = *(ccv_nnc_tensor_opt_t*)ccv_array_get(opt, ccv_max(0, min_i))((void*)(((char*)((opt)->data)) + (size_t)(opt)->rsize *
(size_t)(({ typeof (0) _a = (0); typeof (min_i) _b = (min_i)
; (_a > _b) ? _a : _b; }))))
;
406 if (min_i == -1)
407 {
408 allocated_size[num_assigned] = a.size;
409 ++num_assigned;
410 }
411 int assign_group = num_assigned;
412 if (min_y > 0)
413 {
414 assign_group = assigned[min_y - 1];
415 // The y and x should belong to the same assigned group.
416 assert(min_x == tensor_block_size + 1 || assigned[min_x - 1] == assign_group)((void) sizeof ((min_x == tensor_block_size + 1 || assigned[min_x
- 1] == assign_group) ? 1 : 0), __extension__ ({ if (min_x ==
tensor_block_size + 1 || assigned[min_x - 1] == assign_group
) ; else __assert_fail ("min_x == tensor_block_size + 1 || assigned[min_x - 1] == assign_group"
, "ccv_nnc_symbolic_graph_compile.c", 416, __extension__ __PRETTY_FUNCTION__
); }))
;
417 } else if (min_x < tensor_block_size + 1)
418 assign_group = assigned[min_x - 1];
419 // If min_y is source and min_x is destination, we don't need to do anything, otherwise, decrease the weight on that edge.
420 if (min_y != 0 || min_x != tensor_block_size + 1)
421 {
422 uint64_t val[2] = {
423 min_val[0], min_val[1]
424 };
425 assert(val[0] >= a.size)((void) sizeof ((val[0] >= a.size) ? 1 : 0), __extension__
({ if (val[0] >= a.size) ; else __assert_fail ("val[0] >= a.size"
, "ccv_nnc_symbolic_graph_compile.c", 425, __extension__ __PRETTY_FUNCTION__
); }))
;
426 val[0] -= a.size;
427 val[1] = val[1] + a.size; // Move the offset to the next one.
428 ccv_set_sparse_matrix_cell(alloc, min_y, min_x, val);
429 }
430 int strings[3];
431 strings[0] = a.index + 1;
432 int string_size = 1;
433 // Assign out designated companion if it exist.
434 if (tensor_blocks[a.index].companion_ref)
435 {
436 const int companion_ref = tensor_blocks[a.index].companion_ref - 1;
437 assert(tensor_blocks[a.index].type == tensor_blocks[companion_ref].type)((void) sizeof ((tensor_blocks[a.index].type == tensor_blocks
[companion_ref].type) ? 1 : 0), __extension__ ({ if (tensor_blocks
[a.index].type == tensor_blocks[companion_ref].type) ; else __assert_fail
("tensor_blocks[a.index].type == tensor_blocks[companion_ref].type"
, "ccv_nnc_symbolic_graph_compile.c", 437, __extension__ __PRETTY_FUNCTION__
); }))
;
438 const ccv_numeric_data_t b_hop_p = ccv_get_sparse_matrix_cell(tensor_dt, strings[0] - 1, companion_ref);
439 if (b_hop_p.i32 && b_hop_p.i32[0] > 0)
440 {
441 for (i = 0; i < string_size; i++)
442 strings[i + 1] = strings[i];
443 strings[0] = companion_ref + 1;
444 } else {
445 const ccv_numeric_data_t q_hop_b = ccv_get_sparse_matrix_cell(tensor_dt, companion_ref, strings[string_size - 1] - 1);
446 if (q_hop_b.i32 && q_hop_b.i32[0] > 0)
447 strings[string_size] = companion_ref + 1;
448 else {
449 // Because b_hop_p is 0, q_hop_b is nil, p != q, and b must in between p and q. Therefore, I must have 2 allocations.
450 assert(string_size == 2)((void) sizeof ((string_size == 2) ? 1 : 0), __extension__ ({
if (string_size == 2) ; else __assert_fail ("string_size == 2"
, "ccv_nnc_symbolic_graph_compile.c", 450, __extension__ __PRETTY_FUNCTION__
); }))
;
451 strings[2] = strings[1];
452 strings[1] = companion_ref + 1;
453 }
454 }
455 ++string_size;
456 }
457 // Assign out and update oc.
458 for (i = 0; i < string_size; i++)
459 {
460 const int index = strings[i] - 1;
461 // Assign out the selected one.
462 assigned[index] = assign_group;
463 // The offset for this one, should be either 0 (started a new group, when min_i == -1), or the offset on this edge.
464 allocated_offset[index] = min_val[1];
465 if (adj[index].itf)
466 for (k = 0; k < adj[index].itf->rnum; k++)
467 {
468 const int d = *(int*)ccv_array_get(adj[index].itf, k)((void*)(((char*)((adj[index].itf)->data)) + (size_t)(adj[
index].itf)->rsize * (size_t)(k)))
;
469 if (!assigned[d] && TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d])(!((tensor_blocks[d].flags & 0x3) == ALIAS) && !(
(tensor_blocks[d].flags & 0x3) == UNASSIGNED))
)
470 --adj[d].oc;
471 }
472 }
473 uint64_t val[2] = {
474 a.size, min_val[1]
475 };
476 uint64_t consumed_size = 0;
477 // Go over from min_y to string_size (excluding min_x).
478 for (i = 0; i < string_size; i++)
479 {
480 const uint64_t size = tensor_blocks[strings[i] - 1].size;
481 assert(size <= a.size)((void) sizeof ((size <= a.size) ? 1 : 0), __extension__ (
{ if (size <= a.size) ; else __assert_fail ("size <= a.size"
, "ccv_nnc_symbolic_graph_compile.c", 481, __extension__ __PRETTY_FUNCTION__
); }))
;
482 // Update consumed size if it is bigger than "size".
483 if (size > consumed_size)
484 {
485 val[0] = size - consumed_size;
486 ccv_set_sparse_matrix_cell(alloc, min_y, strings[i], val);
487 consumed_size = size;
488 val[1] = min_val[1] + consumed_size;
489 }
490 // If it consumed all the flow, break out.
491 if (consumed_size == a.size)
492 break;
493 }
494 for (i = 0; i < string_size; i++)
495 {
496 const uint64_t i_size = tensor_blocks[strings[i] - 1].size;
497 uint64_t val[2] = {
498 i_size, min_val[1]
499 };
500 uint64_t consumed_size = 0;
501 for (k = i + 1; k < string_size; k++)
502 {
503 const uint64_t size = ccv_min(i_size, tensor_blocks[strings[k] - 1].size)({ typeof (i_size) _a = (i_size); typeof (tensor_blocks[strings
[k] - 1].size) _b = (tensor_blocks[strings[k] - 1].size); (_a
< _b) ? _a : _b; })
;
504 // Update consumed size if it is bigger than "size".
505 if (size > consumed_size)
506 {
507 val[0] = size - consumed_size;
508 ccv_set_sparse_matrix_cell(alloc, strings[i], strings[k], val);
509 consumed_size = size;
510 val[1] = min_val[1] + consumed_size;
511 }
512 // If it consumed all the flow, break out.
513 if (consumed_size == i_size)
514 break;
515 }
516 val[0] = i_size - consumed_size;
517 // Still have residual, flow it to min_x.
518 if (val[0] > 0)
519 ccv_set_sparse_matrix_cell(alloc, strings[i], min_x, val);
520 }
521 j += string_size;
522 }
523 ccfreefree(buf);
524 ccv_array_free(opt);
525 ccv_matrix_free(tensor_df);
526 ccv_matrix_free(tensor_dt);
527#define for_block(y, x, val) do { \
528 if (((uint64_t*)val)[0] > 0 && y > 0 && x < tensor_block_size + 1) \
529 { \
530 if (!alloc_dep[x - 1]) \
531 alloc_dep[x - 1] = ccv_array_new(sizeof(int), 1, 0); \
532 ccv_array_add_unique_int(alloc_dep[x - 1], y - 1); \
533 } \
534 } while (0)
535 CCV_SPARSE_FOREACH(alloc, for_block)do { if ((alloc)->major & CCV_SPARSE_COL_MAJOR) { switch
((((alloc)->type) & 0xFF000)) { case CCV_32S: { do { uint32_t
_i_, _j_; const uint32_t _size_ = (alloc)->size; __attribute__
((unused)) const size_t _c_ = (((alloc)->type) & 0xFFF
); if ((alloc)->type & CCV_DENSE_VECTOR) { for (_i_ = 0
; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t* const _idx_
= (alloc)->index + _i_; ccv_sparse_matrix_vector_t* const
_v_ = (alloc)->vector + _i_; if (_idx_->ifbit <= 1 ||
!_v_->size) continue; for (_j_ = 0; _j_ < _v_->size
; _j_++) { for_block((_j_), (_idx_->i), (_v_->data.i32 +
(_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_j_->
i), (_idx_->i), (_d_.i32 + (0))); } } } } while (0); break
; } case CCV_32F: { do { uint32_t _i_, _j_; const uint32_t _size_
= (alloc)->size; __attribute__((unused)) const size_t _c_
= (((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_j_), (_idx_->i), (_v_->data.
f32 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_j_->
i), (_idx_->i), (_d_.f32 + (0))); } } } } while (0); break
; } case CCV_64S: { do { uint32_t _i_, _j_; const uint32_t _size_
= (alloc)->size; __attribute__((unused)) const size_t _c_
= (((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_j_), (_idx_->i), (_v_->data.
i64 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_j_->
i), (_idx_->i), (_d_.i64 + (0))); } } } } while (0); break
; } case CCV_64F: { do { uint32_t _i_, _j_; const uint32_t _size_
= (alloc)->size; __attribute__((unused)) const size_t _c_
= (((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_j_), (_idx_->i), (_v_->data.
f64 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_j_->
i), (_idx_->i), (_d_.f64 + (0))); } } } } while (0); break
; } default: { do { uint32_t _i_, _j_; const uint32_t _size_ =
(alloc)->size; __attribute__((unused)) const size_t _c_ =
(((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_j_), (_idx_->i), (_v_->data.
u8 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_j_->
i), (_idx_->i), (_d_.u8 + (0))); } } } } while (0); } } } else
{ switch ((((alloc)->type) & 0xFF000)) { case CCV_32S
: { do { uint32_t _i_, _j_; const uint32_t _size_ = (alloc)->
size; __attribute__((unused)) const size_t _c_ = (((alloc)->
type) & 0xFFF); if ((alloc)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_idx_->i), (_j_), (_v_->data.
i32 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_->
i), (_idx_j_->i), (_d_.i32 + (0))); } } } } while (0); break
; } case CCV_32F: { do { uint32_t _i_, _j_; const uint32_t _size_
= (alloc)->size; __attribute__((unused)) const size_t _c_
= (((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_idx_->i), (_j_), (_v_->data.
f32 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_->
i), (_idx_j_->i), (_d_.f32 + (0))); } } } } while (0); break
; } case CCV_64S: { do { uint32_t _i_, _j_; const uint32_t _size_
= (alloc)->size; __attribute__((unused)) const size_t _c_
= (((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_idx_->i), (_j_), (_v_->data.
i64 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_->
i), (_idx_j_->i), (_d_.i64 + (0))); } } } } while (0); break
; } case CCV_64F: { do { uint32_t _i_, _j_; const uint32_t _size_
= (alloc)->size; __attribute__((unused)) const size_t _c_
= (((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_idx_->i), (_j_), (_v_->data.
f64 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_->
i), (_idx_j_->i), (_d_.f64 + (0))); } } } } while (0); break
; } default: { do { uint32_t _i_, _j_; const uint32_t _size_ =
(alloc)->size; __attribute__((unused)) const size_t _c_ =
(((alloc)->type) & 0xFFF); if ((alloc)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < _size_; _i_++) { ccv_sparse_matrix_index_t
* const _idx_ = (alloc)->index + _i_; ccv_sparse_matrix_vector_t
* const _v_ = (alloc)->vector + _i_; if (_idx_->ifbit <=
1 || !_v_->size) continue; for (_j_ = 0; _j_ < _v_->
size; _j_++) { for_block((_idx_->i), (_j_), (_v_->data.
u8 + (_j_ * _c_))); } } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((alloc
)->type) & 0xFF000) >> 12] * (((alloc)->type)
& 0xFFF) + 3) & -4); for (_i_ = 0; _i_ < _size_; _i_
++) { ccv_sparse_matrix_index_t* const _idx_ = (alloc)->index
+ _i_; ccv_sparse_matrix_vector_t* const _v_ = (alloc)->vector
+ _i_; if (_idx_->ifbit <= 1 || !_v_->rnum) continue
; uint8_t* const _vidx_ = (uint8_t*)_v_->index; for (_j_ =
0; _j_ < _v_->size; _j_++) { ccv_sparse_matrix_index_t
* const _idx_j_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _j_); if (_idx_j_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_j_ + 1) }; for_block((_idx_->
i), (_idx_j_->i), (_d_.u8 + (0))); } } } } while (0); } } }
} while (0)
;
536#undef for_block
537 ccv_matrix_free(alloc);
538 for (i = 0; i < tensor_block_size; i++)
539 if (adj[i].itf)
540 ccv_array_free(adj[i].itf);
541 ccfreefree(adj);
542 ccv_nnc_tensor_alloc_prep_t* alloc_prep = (ccv_nnc_tensor_alloc_prep_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_alloc_prep_t) + sizeof(alloc_prep->blocks[0]) * available_tensor_size + sizeof(alloc_prep->buffers[0]) * num_assigned + sizeof(int) * tensor_block_size);
543 alloc_prep->alloc_dep = alloc_dep;
544 alloc_prep->vt_block_size = tensor_block_size;
545 alloc_prep->buffer_size = num_assigned;
546 alloc_prep->block_size = available_tensor_size;
547 alloc_prep->blocks = (void*)(alloc_prep + 1); // From the biggest structs to smaller ones.
548 alloc_prep->buffers = (void*)(alloc_prep->blocks + available_tensor_size);
549 alloc_prep->vt_blocks = (int*)(alloc_prep->buffers + num_assigned);
550 memset(alloc_prep->buffers, 0, sizeof(alloc_prep->buffers[0]) * num_assigned);
551 for (i = 0; i < num_assigned; i++)
552 alloc_prep->buffers[i].size = allocated_size[i];
553 ccfreefree(allocated_size);
554 j = 0;
555 // Assigning out the tensors (in case of sharing tensors / in-place ops).
556 for (i = 0; i < tensor_block_size; i++)
557 if (!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED))
558 {
559 alloc_prep->blocks[j].block_ref = i;
560 if (!TENSOR_EXPECT_ALIAS(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == ALIAS))
561 {
562 alloc_prep->vt_blocks[i] = j;
563 // Also, set its allocations.
564 assert(assigned[i] > 0)((void) sizeof ((assigned[i] > 0) ? 1 : 0), __extension__ (
{ if (assigned[i] > 0) ; else __assert_fail ("assigned[i] > 0"
, "ccv_nnc_symbolic_graph_compile.c", 564, __extension__ __PRETTY_FUNCTION__
); }))
;
565 const int buffer_ref = alloc_prep->blocks[j].buffer_ref = assigned[i] - 1;
566 alloc_prep->blocks[j].offset = allocated_offset[i];
567 if (!alloc_prep->buffers[buffer_ref].type)
568 alloc_prep->buffers[buffer_ref].type = tensor_blocks[i].type;
569 alloc_prep->buffers[buffer_ref].pin_mem = alloc_prep->buffers[buffer_ref].pin_mem || tensor_blocks[i].pin_mem;
570 alloc_prep->buffers[buffer_ref].flags |= TENSOR_READ_WRITE(tensor_blocks[i])(tensor_blocks[i].flags & 0xc);
571 assert(allocated_offset[i] + tensor_blocks[i].size <= alloc_prep->buffers[buffer_ref].size)((void) sizeof ((allocated_offset[i] + tensor_blocks[i].size <=
alloc_prep->buffers[buffer_ref].size) ? 1 : 0), __extension__
({ if (allocated_offset[i] + tensor_blocks[i].size <= alloc_prep
->buffers[buffer_ref].size) ; else __assert_fail ("allocated_offset[i] + tensor_blocks[i].size <= alloc_prep->buffers[buffer_ref].size"
, "ccv_nnc_symbolic_graph_compile.c", 571, __extension__ __PRETTY_FUNCTION__
); }))
;
572 } else {
573 alloc_prep->vt_blocks[i] = -1;
574 alloc_prep->blocks[j].buffer_ref = -1;
575 alloc_prep->blocks[j].offset = 0;
576 }
577 ++j;
578 } else
579 alloc_prep->vt_blocks[i] = -1;
580 ccfreefree(allocated_offset);
581 ccfreefree(assigned);
582 return alloc_prep;
583}
584
585static void _ccv_nnc_tensor_alloc_prep_free(ccv_nnc_tensor_alloc_prep_t* alloc_prep)
586{
587 int i;
588 for (i = 0; i < alloc_prep->vt_block_size; i++)
589 if (alloc_prep->alloc_dep[i])
590 ccv_array_free(alloc_prep->alloc_dep[i]);
591 for (i = 0; i < alloc_prep->buffer_size; i++)
592 if (alloc_prep->buffers[i].dup_p_refs)
593 ccv_array_free(alloc_prep->buffers[i].dup_p_refs);
594 ccfreefree(alloc_prep->alloc_dep);
595 ccfreefree(alloc_prep);
596}
597
598// Simple allocator from ccv_array_t.
599static int _ccv_nnc_tensor_metadata_pos_new(ccv_array_t* const tensor_metadata, const size_t size)
600{
601 int pos = tensor_metadata->rnum;
602 int rsize = (size + 15) / 16;
603 ccv_array_resize(tensor_metadata, pos + rsize);
604 return (pos << 1) + 1;
605}
606
607static ccv_nnc_tensor_t* _ccv_nnc_tensor_metadata_get(const ccv_array_t* const tensor_metadata, const int pos)
608{
609 assert((pos >> 1) < tensor_metadata->rnum)((void) sizeof (((pos >> 1) < tensor_metadata->rnum
) ? 1 : 0), __extension__ ({ if ((pos >> 1) < tensor_metadata
->rnum) ; else __assert_fail ("(pos >> 1) < tensor_metadata->rnum"
, "ccv_nnc_symbolic_graph_compile.c", 609, __extension__ __PRETTY_FUNCTION__
); }))
;
610 return (ccv_nnc_tensor_t*)ccv_array_get(tensor_metadata, pos >> 1)((void*)(((char*)((tensor_metadata)->data)) + (size_t)(tensor_metadata
)->rsize * (size_t)(pos >> 1)))
;
611}
612
613#define CCV_NNC_IS_METADATA_POS(ptr)((uintptr_t)(ptr) & 1) ((uintptr_t)(ptr) & 1)
614
615static ccv_nnc_tensor_t* _ccv_nnc_tensor_metadata_rewire(const ccv_array_t* const tensor_metadata, ccv_nnc_tensor_t* const vt_tensor)
616{
617 // If the low bit is not 1, this is not a position (but a normal tensor pointer), just return directly.
618 if (!CCV_NNC_IS_METADATA_POS(vt_tensor)((uintptr_t)(vt_tensor) & 1))
619 return vt_tensor;
620 ccv_nnc_tensor_t* const tensor = _ccv_nnc_tensor_metadata_get(tensor_metadata, (int)(intptr_t)vt_tensor);
621 if (tensor->alias_ref && CCV_NNC_IS_METADATA_POS(tensor->alias_ref)((uintptr_t)(tensor->alias_ref) & 1))
622 {
623 const int alias_ref = tensor->alias_ref;
624 tensor->alias_ref = (uintptr_t)_ccv_nnc_tensor_metadata_get(tensor_metadata, (int)tensor->alias_ref);
625 _ccv_nnc_tensor_metadata_rewire(tensor_metadata, (ccv_nnc_tensor_t*)(intptr_t)alias_ref);
626 }
627 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
628 {
629 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
630 int i;
631 const int count = mv->kind + mv->repeat;
632 for (i = 0; i < count; i++)
633 {
634 if (CCV_NNC_IS_METADATA_POS(CCV_NNC_MULTIVIEW_DATA(mv)[i])((uintptr_t)(((mv)->_heap_data ? (mv)->_heap_data : (mv
)->_inline_data)[i]) & 1)
)
635 {
636 const int pos = (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i];
637 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i] = _ccv_nnc_tensor_metadata_get(tensor_metadata, (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
638 _ccv_nnc_tensor_metadata_rewire(tensor_metadata, (ccv_nnc_tensor_t*)(intptr_t)pos);
639 }
640 }
641 // No need to recursively do parent pointer, otherwise we are in deep rewire.
642 if (mv->p && CCV_NNC_IS_METADATA_POS(mv->p)((uintptr_t)(mv->p) & 1))
643 mv->p = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, (int)(intptr_t)mv->p);
644 if (mv->sp)
645 for (i = 0; i < mv->sp->rnum; i++)
646 {
647 ccv_nnc_tensor_t** const tensor = (ccv_nnc_tensor_t**)ccv_array_get(mv->sp, i)((void*)(((char*)((mv->sp)->data)) + (size_t)(mv->sp
)->rsize * (size_t)(i)))
;
648 if (CCV_NNC_IS_METADATA_POS(*tensor)((uintptr_t)(*tensor) & 1))
649 {
650 const int pos = (int)(intptr_t)*tensor;
651 *tensor = _ccv_nnc_tensor_metadata_get(tensor_metadata, pos);
652 assert(!CCV_IS_TENSOR_MULTIVIEW(*tensor))((void) sizeof ((!((*(int*)(*tensor)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (!((*(int*)(*tensor)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(*tensor)",
"ccv_nnc_symbolic_graph_compile.c", 652, __extension__ __PRETTY_FUNCTION__
); }))
;
653 _ccv_nnc_tensor_metadata_rewire(tensor_metadata, (ccv_nnc_tensor_t*)(intptr_t)pos);
654 }
655 }
656 }
657 return tensor;
658}
659
660typedef struct {
661 const uint8_t* ptr;
662 int pos;
663} ccv_nnc_tensor_block_pos_t;
664
665static int _ccv_nnc_tensor_multiview_find_pos(ccv_array_t* const tensor_metadata, const ccv_nnc_tensor_param_t params, const ccv_nnc_symbolic_graph_prep_t* const *const preps, const int block_ref, const int* const ch, const int idx, const ccv_nnc_symbolic_graph_prep_t* prep)
666{
667 int i;
668 int unref_block_ref = block_ref;
669 while (prep->tensor_blocks[unref_block_ref].ref)
670 unref_block_ref = prep->tensor_blocks[unref_block_ref].ref - 1;
671 int vt_ref = prep->alloc_prep->vt_blocks[unref_block_ref];
672 assert(vt_ref >= 0)((void) sizeof ((vt_ref >= 0) ? 1 : 0), __extension__ ({ if
(vt_ref >= 0) ; else __assert_fail ("vt_ref >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 672, __extension__ __PRETTY_FUNCTION__); }))
;
673 assert(unref_block_ref == prep->alloc_prep->blocks[vt_ref].block_ref)((void) sizeof ((unref_block_ref == prep->alloc_prep->blocks
[vt_ref].block_ref) ? 1 : 0), __extension__ ({ if (unref_block_ref
== prep->alloc_prep->blocks[vt_ref].block_ref) ; else __assert_fail
("unref_block_ref == prep->alloc_prep->blocks[vt_ref].block_ref"
, "ccv_nnc_symbolic_graph_compile.c", 673, __extension__ __PRETTY_FUNCTION__
); }))
;
674 const int buffer_ref = prep->alloc_prep->blocks[vt_ref].buffer_ref;
675 uint64_t offset = prep->alloc_prep->blocks[vt_ref].offset;
676 int p_ref = prep->alloc_prep->buffers[buffer_ref].p_refs[0] - 1;
677 for (i = idx - 1; i >= 0; i--)
678 {
679 assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if
(p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 679, __extension__ __PRETTY_FUNCTION__); }))
;
680 const ccv_nnc_symbolic_graph_prep_t* const graph_prep = preps[i];
681 const int unroll_count = graph_prep->unroll_count;
682 if (ch[i]) // Prefer the dup side of things.
683 p_ref = graph_prep->dup_tensor_block_ref[p_ref * unroll_count + ch[i] - 1];
684 int unref_p_ref = p_ref;
685 while (graph_prep->tensor_blocks[unref_p_ref].ref)
686 unref_p_ref = graph_prep->tensor_blocks[unref_p_ref].ref - 1;
687 vt_ref = graph_prep->alloc_prep->vt_blocks[unref_p_ref];
688 const int buffer_ref = graph_prep->alloc_prep->blocks[vt_ref].buffer_ref;
689 offset += graph_prep->alloc_prep->blocks[vt_ref].offset;
690 // If the buffer already exists, prefer that.
691 const uint8_t* ptr = graph_prep->tensor_arena->buffers[buffer_ref].ptr;
692 if (ptr)
693 {
694 // If I have any remaining path that is not covered from 0, I cannot possibly
695 // have any pointer from buffer (that can only happen if it is not dup).
696 for (--i; i >= 0; i--)
697 if (ch[i] != 0)
698 return 0;
699 // Try to find the created tensor block pos in the array, just linear scan.
700 const int tv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_metadata, sizeof(ccv_nnc_tensor_t));
701 ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, tv_pos);
702 *tv = ccv_nnc_tensor(graph_prep->tensor_arena->buffers[buffer_ref].ptr + offset, params, 0);
703 return tv_pos;
704 }
705 p_ref = graph_prep->alloc_prep->buffers[buffer_ref].p_refs[0] - 1;
706 }
707 return 0;
708}
709
710// Descent from root to the prep level, and compose multiview from there.
711static int _ccv_nnc_tensor_multiview_down_find_pos(ccv_array_t* const tensor_metadata, const ccv_nnc_tensor_param_t params, const int preserve, const int assign_update, const ccv_nnc_symbolic_graph_prep_t* const *const preps, const ccv_nnc_symbolic_graph_prep_t* const graph_prep, const int block_ref, int* ch, const int idx, int* const pos_ref)
712{
713 assert(pos_ref)((void) sizeof ((pos_ref) ? 1 : 0), __extension__ ({ if (pos_ref
) ; else __assert_fail ("pos_ref", "ccv_nnc_symbolic_graph_compile.c"
, 713, __extension__ __PRETTY_FUNCTION__); }))
;
714 int i;
715 const ccv_nnc_symbolic_graph_prep_t* const prep = preps[idx];
716 const int unroll_count = prep->unroll_count;
717 if (prep == graph_prep)
718 {
719 const int data_pos = _ccv_nnc_tensor_multiview_find_pos(tensor_metadata, params, preps, block_ref, ch, idx, prep);
720 if (!data_pos)
721 return -1;
722 // Based on ch, go all the way back to find the exact pointer to compose.
723 if (// !assign_update && // If I plan to receive assign update, we don't need to have multiple receiver. Just one tensor to receive update is enough.
724 prep->dup_tensor_block_ref &&
725 prep->dup_tensor_block_ref[block_ref * unroll_count] >= 0 &&
726 prep->dup_tensor_block_ref[block_ref * unroll_count] != block_ref)
727 {
728 int pos[unroll_count + 1];
729 pos[0] = data_pos;
730 for (i = 0; i < unroll_count; i++)
731 pos[i + 1] = _ccv_nnc_tensor_multiview_find_pos(tensor_metadata, params, preps, prep->dup_tensor_block_ref[block_ref * unroll_count + i], ch, idx, prep);
732 const int mv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_metadata, sizeof(ccv_nnc_tensor_multiview_t));
733 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, mv_pos);
734 ccv_nnc_tensor_t* data[unroll_count + 1];
735 for (i = 0; i < unroll_count + 1; i++)
736 data[i] = _ccv_nnc_tensor_metadata_get(tensor_metadata, pos[i]);
737 ccv_nnc_tensor_multiview(data, CCV_NNC_MULTIVIEW_K0N, unroll_count + 1, prep->graph, mv);
738 for (i = 0; i < unroll_count + 1; i++)
739 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i] = (ccv_nnc_tensor_t*)(intptr_t)pos[i];
740 *pos_ref = mv_pos;
741 } else {
742 *pos_ref = data_pos;
743 }
744 if (preserve)
745 {
746 // If need to preserve, this need to be more complicated. At loop 0, I need to access the new assigned tv.
747 // at any other loops, it should be the same. Thus, for this case, I will create a mv tensor as following:
748 // mv of K11, thus, when loop is 0, it unwrap to mv->data[0], otherwise, unwrap to mv->data[1].
749 // mv->data[0] (thin_mv) is a K01, which points to the assigned tv (using 1 as a placeholder here until parent
750 // arena allocated).
751 // mv->data[1] (prev_mv_pos_ is a K01 or K02, depending on whether above we passed raw pointer directly or
752 // a mv structure. If we pass a mv structure, we just pass it here. If we pass a raw pointer, we need to wrap
753 // it to a K01 structure.
754 // Why we didn't wrap it directly as mv->data[0] pointing to a assigned tv pointer and the mv->data[1] pointing
755 // to the raw pointer (as ptr_ref) with K11? The reason is we don't know the assigned tv is pointing to one
756 // memory region, or is a managed by multi-view tensor, which could pointing to different memory regions.
757 int prev_mv_pos = *pos_ref;
758 if (prev_mv_pos == -1)
759 {
760 prev_mv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_metadata, sizeof(ccv_nnc_tensor_multiview_t));
761 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, prev_mv_pos);
762 ccv_nnc_tensor_t* const tv = _ccv_nnc_tensor_metadata_get(tensor_metadata, data_pos);
763 ccv_nnc_tensor_multiview((ccv_nnc_tensor_t*[]){
764 tv,
765 }, CCV_NNC_MULTIVIEW_K0N, 1, prep->graph, mv);
766 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0] = (ccv_nnc_tensor_t*)(intptr_t)data_pos;
767 }
768 const int mv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_metadata, sizeof(ccv_nnc_tensor_multiview_t));
769 ccv_nnc_tensor_multiview_t* const prev_mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, prev_mv_pos);
770 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, mv_pos);
771 ccv_nnc_tensor_multiview((ccv_nnc_tensor_t*[]){
772 CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)),
773 (ccv_nnc_tensor_t*)prev_mv,
774 }, CCV_NNC_MULTIVIEW_K1N, 1, prep->graph, mv);
775 prev_mv->p = (void*)(intptr_t)mv_pos;
776 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0] = CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10));
777 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[1] = (ccv_nnc_tensor_t*)(intptr_t)prev_mv_pos;
778 *pos_ref = mv_pos;
779 }
780 return 0;
781 }
782 ch[idx] = 0;
783 int pos[unroll_count + 1];
784 pos[0] = 0;
785 const int retval = _ccv_nnc_tensor_multiview_down_find_pos(tensor_metadata, params, preserve, assign_update, preps, graph_prep, block_ref, ch, idx + 1, pos);
786 assert(retval == 0)((void) sizeof ((retval == 0) ? 1 : 0), __extension__ ({ if (
retval == 0) ; else __assert_fail ("retval == 0", "ccv_nnc_symbolic_graph_compile.c"
, 786, __extension__ __PRETTY_FUNCTION__); }))
;
787 for (i = 0; i < unroll_count; i++)
788 {
789 ch[idx] = i + 1;
790 pos[i + 1] = 0;
791 const int dup_retval = _ccv_nnc_tensor_multiview_down_find_pos(tensor_metadata, params, preserve, assign_update, preps, graph_prep, block_ref, ch, idx + 1, pos + i + 1);
792 if (dup_retval < 0)
793 {
794 assert(i == 0)((void) sizeof ((i == 0) ? 1 : 0), __extension__ ({ if (i == 0
) ; else __assert_fail ("i == 0", "ccv_nnc_symbolic_graph_compile.c"
, 794, __extension__ __PRETTY_FUNCTION__); }))
;
795 break;
796 }
797 }
798 // If current prep has no dup.
799 if (i == 0)
800 {
801 *pos_ref = pos[0];
802 return 0;
803 }
804 ccv_nnc_tensor_t* data[unroll_count + 1];
805 // Compose to a new multiview.
806 for (i = 0; i < unroll_count + 1; i++)
807 { assert(pos[i] > 0)((void) sizeof ((pos[i] > 0) ? 1 : 0), __extension__ ({ if
(pos[i] > 0) ; else __assert_fail ("pos[i] > 0", "ccv_nnc_symbolic_graph_compile.c"
, 807, __extension__ __PRETTY_FUNCTION__); }))
; }
808 const int mv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_metadata, sizeof(ccv_nnc_tensor_multiview_t));
809 for (i = 0; i < unroll_count + 1; i++)
810 data[i] = _ccv_nnc_tensor_metadata_get(tensor_metadata, pos[i]);
811 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, mv_pos);
812 ccv_nnc_tensor_multiview(data, CCV_NNC_MULTIVIEW_K0N, unroll_count + 1, prep->graph, mv);
813 for (i = 0; i < unroll_count + 1; i++)
814 if (data[i] != CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)) && CCV_IS_TENSOR_MULTIVIEW(data[i])((*(int*)(data[i])) & CCV_TENSOR_MULTIVIEW))
815 ((ccv_nnc_tensor_multiview_t*)data[i])->p = (void*)(intptr_t)mv_pos;
816 for (i = 0; i < unroll_count + 1; i++)
817 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i] = (ccv_nnc_tensor_t*)(intptr_t)pos[i];
818 *pos_ref = mv_pos;
819 return 0;
820}
821
822static int _ccv_nnc_is_symbolic_graph_exec_input_or_output(const int p_ref, const ccv_nnc_graph_exec_symbol_info_t *const node)
823{
824 int i;
825 int is_input = 0;
826 assert(node)((void) sizeof ((node) ? 1 : 0), __extension__ ({ if (node) ;
else __assert_fail ("node", "ccv_nnc_symbolic_graph_compile.c"
, 826, __extension__ __PRETTY_FUNCTION__); }))
;
827 for (i = 0; i < node->input_size && !is_input; i++)
828 if (p_ref == node->inputs[i])
829 is_input = 1;
830 int is_output = 0;
831 for (i = 0; i < node->output_size && !is_output; i++)
832 if (p_ref == node->outputs[i])
833 is_output = 1;
834 // Prefer it is an output if it is both the input and the output.
835 if (is_output)
836 return 1;
837 if (is_input)
838 return -1;
839 return 0;
840}
841
842static int _ccv_nnc_tensor_block_check_preserve(const ccv_nnc_symbolic_graph_prep_t* const graph_prep, const int block_ref)
843{
844 // No need to check whether to preserve if this is not a while loop.
845 if (!(graph_prep->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
846 return 0;
847 assert(block_ref >= 0 && block_ref < graph_prep->tensor_symbol_info_size)((void) sizeof ((block_ref >= 0 && block_ref < graph_prep
->tensor_symbol_info_size) ? 1 : 0), __extension__ ({ if (
block_ref >= 0 && block_ref < graph_prep->tensor_symbol_info_size
) ; else __assert_fail ("block_ref >= 0 && block_ref < graph_prep->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_compile.c", 847, __extension__ __PRETTY_FUNCTION__
); }))
;
848 // If it is unassigned, no need to preserve.
849 if (TENSOR_EXPECT_UNASSIGNED(graph_prep->tensor_blocks[block_ref])((graph_prep->tensor_blocks[block_ref].flags & 0x3) ==
UNASSIGNED)
)
850 return 0;
851 const int p_ref = graph_prep->tensor_blocks[block_ref].p_refs[0] - 1;
852 // If p is not input, no need to preserve at all.
853 if (-1 != _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref, graph_prep->p->exec_symbol_info + (graph_prep->exec_idx - 1)))
854 return 0;
855 const int vt_ref = graph_prep->alloc_prep->vt_blocks[block_ref];
856 assert(vt_ref >= 0)((void) sizeof ((vt_ref >= 0) ? 1 : 0), __extension__ ({ if
(vt_ref >= 0) ; else __assert_fail ("vt_ref >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 856, __extension__ __PRETTY_FUNCTION__); }))
;
857 assert(block_ref == graph_prep->alloc_prep->blocks[vt_ref].block_ref)((void) sizeof ((block_ref == graph_prep->alloc_prep->blocks
[vt_ref].block_ref) ? 1 : 0), __extension__ ({ if (block_ref ==
graph_prep->alloc_prep->blocks[vt_ref].block_ref) ; else
__assert_fail ("block_ref == graph_prep->alloc_prep->blocks[vt_ref].block_ref"
, "ccv_nnc_symbolic_graph_compile.c", 857, __extension__ __PRETTY_FUNCTION__
); }))
;
858 const int buffer_ref = graph_prep->alloc_prep->blocks[vt_ref].buffer_ref;
859 // If the buffer is a truly read-only one, no need to preserve.
860 if (TENSOR_READ_WRITE(graph_prep->alloc_prep->buffers[buffer_ref])(graph_prep->alloc_prep->buffers[buffer_ref].flags &
0xc)
== READ_ONLY)
861 return 0;
862 /* This needs detailed explanation, what does preserve mean?
863 * For a parameterized loop, such as while { y = x + 1 } (y => x), if tensor x is
864 * also used outside of the while loop, we cannot reuse the memory region of x for
865 * the for loop, otherwise we will destroy x when doing y = x + 1 computation (assuming
866 * y uses the same memory region as x). The way to workaround this is by using a different
867 * memory region for y = x + 1, but for the first iteration, having x pointing to the
868 * original. During the allocation process, the way to identify whether x should preserve
869 * its value or not by looking up its parent tensor. If the symbol (tensor_block)'s input
870 * parent tensor is the same as the memory region it plans to use in the buffer, then we are
871 * good (buffer.p_refs[0] == p_refs[0]). A buffer can only point to one parent tensor, and
872 * it is the input tensor whenever that is possible. A tensor block can point to two parent
873 * tensors, one is input tensor, one is the output tensor. p_refs[0] should be the input
874 * tensor whenever that is possible. */
875 if (graph_prep->alloc_prep->buffers[buffer_ref].p_refs[0] - 1 == p_ref)
876 return 0;
877 // Otherwise, return 1 because we now need to preserve.
878 return 1;
879}
880
881static int _ccv_nnc_tensor_block_check_force_broadcast(const ccv_nnc_symbolic_graph_prep_t* const graph_prep, const int block_ref)
882{
883 assert(block_ref >= 0 && block_ref < graph_prep->tensor_symbol_info_size)((void) sizeof ((block_ref >= 0 && block_ref < graph_prep
->tensor_symbol_info_size) ? 1 : 0), __extension__ ({ if (
block_ref >= 0 && block_ref < graph_prep->tensor_symbol_info_size
) ; else __assert_fail ("block_ref >= 0 && block_ref < graph_prep->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_compile.c", 883, __extension__ __PRETTY_FUNCTION__
); }))
;
884 // If it is unassigned, no need to preserve.
885 if (TENSOR_EXPECT_UNASSIGNED(graph_prep->tensor_blocks[block_ref])((graph_prep->tensor_blocks[block_ref].flags & 0x3) ==
UNASSIGNED)
)
886 return 0;
887 // Only tape var need to force broadcast, otherwise we already share the same memory region.
888 if (!(graph_prep->tensor_symbol_info[block_ref].flags & CCV_NNC_TENSOR_SYMBOL_TAPE_VAR))
889 return 0;
890 const int p_ref = graph_prep->tensor_blocks[block_ref].p_refs[0] - 1;
891 // If p is not output, no need to broadcast at all.
892 if (1 != _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref, graph_prep->p->exec_symbol_info + (graph_prep->exec_idx - 1)))
893 return 0;
894 const int vt_ref = graph_prep->alloc_prep->vt_blocks[block_ref];
895 assert(vt_ref >= 0)((void) sizeof ((vt_ref >= 0) ? 1 : 0), __extension__ ({ if
(vt_ref >= 0) ; else __assert_fail ("vt_ref >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 895, __extension__ __PRETTY_FUNCTION__); }))
;
896 assert(block_ref == graph_prep->alloc_prep->blocks[vt_ref].block_ref)((void) sizeof ((block_ref == graph_prep->alloc_prep->blocks
[vt_ref].block_ref) ? 1 : 0), __extension__ ({ if (block_ref ==
graph_prep->alloc_prep->blocks[vt_ref].block_ref) ; else
__assert_fail ("block_ref == graph_prep->alloc_prep->blocks[vt_ref].block_ref"
, "ccv_nnc_symbolic_graph_compile.c", 896, __extension__ __PRETTY_FUNCTION__
); }))
;
897 const int buffer_ref = graph_prep->alloc_prep->blocks[vt_ref].buffer_ref;
898 // If the buffer is a truly read-only one, no need to broadcast.
899 if (TENSOR_READ_WRITE(graph_prep->alloc_prep->buffers[buffer_ref])(graph_prep->alloc_prep->buffers[buffer_ref].flags &
0xc)
== READ_ONLY)
900 return 0;
901 // Otherwise, return 1 because we now need to force broadcast for this tape var.
902 return 1;
903}
904
905static void _ccv_nnc_tensor_multiview_full_pos(ccv_nnc_tensor_multiview_t* const mv, ccv_nnc_tensor_t* const tensor)
906{
907 assert(CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) ?
1 : 0), __extension__ ({ if (((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(mv)", "ccv_nnc_symbolic_graph_compile.c"
, 907, __extension__ __PRETTY_FUNCTION__); }))
;
908 int i;
909 for (i = 0; i < mv->kind + mv->repeat; i++)
910 if (CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i] == CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)))
911 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i] = tensor;
912 else if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
913 _ccv_nnc_tensor_multiview_full_pos((ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i], tensor);
914}
915
916static void _ccv_nnc_tensor_multiview_full_pos_rewire(const ccv_array_t* const tensor_metadata, ccv_nnc_tensor_multiview_t* const mv)
917{
918 assert(CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) ?
1 : 0), __extension__ ({ if (((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(mv)", "ccv_nnc_symbolic_graph_compile.c"
, 918, __extension__ __PRETTY_FUNCTION__); }))
;
919 int i;
920 if (mv->sp)
921 for (i = 0; i < mv->sp->rnum; i++)
922 {
923 ccv_nnc_tensor_t** const tensor = (ccv_nnc_tensor_t**)ccv_array_get(mv->sp, i)((void*)(((char*)((mv->sp)->data)) + (size_t)(mv->sp
)->rsize * (size_t)(i)))
;
924 if (CCV_NNC_IS_METADATA_POS(*tensor)((uintptr_t)(*tensor) & 1))
925 {
926 const int pos = (int)(intptr_t)*tensor;
927 *tensor = _ccv_nnc_tensor_metadata_get(tensor_metadata, pos);
928 assert(!CCV_IS_TENSOR_MULTIVIEW(*tensor))((void) sizeof ((!((*(int*)(*tensor)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (!((*(int*)(*tensor)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(*tensor)",
"ccv_nnc_symbolic_graph_compile.c", 928, __extension__ __PRETTY_FUNCTION__
); }))
;
929 _ccv_nnc_tensor_metadata_rewire(tensor_metadata, (ccv_nnc_tensor_t*)(intptr_t)pos);
930 }
931 }
932 for (i = 0; i < mv->kind + mv->repeat; i++)
933 {
934 if (CCV_NNC_IS_METADATA_POS((int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)[i])((uintptr_t)((int)(intptr_t)((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]) & 1)
)
935 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i] = _ccv_nnc_tensor_metadata_get(tensor_metadata, (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
936 if (CCV_NNC_IS_METADATA_POS((int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)[i]->alias_ref)((uintptr_t)((int)(intptr_t)((mv)->_heap_data ? (mv)->_heap_data
: (mv)->_inline_data)[i]->alias_ref) & 1)
)
937 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->alias_ref = (uintptr_t)_ccv_nnc_tensor_metadata_get(tensor_metadata, (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]->alias_ref);
938 if (CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(mv)[i])((*(int*)(((mv)->_heap_data ? (mv)->_heap_data : (mv)->
_inline_data)[i])) & CCV_TENSOR_MULTIVIEW)
)
939 _ccv_nnc_tensor_multiview_full_pos_rewire(tensor_metadata, (ccv_nnc_tensor_multiview_t*)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
940 }
941}
942
943static int _ccv_nnc_tensor_multiview_gen(ccv_array_t* const tensor_metadata, const int preserve, const int assign_update, const ccv_nnc_tensor_param_t params, const ccv_nnc_symbolic_graph_prep_t* const graph_prep, const ccv_nnc_tensor_arena_t* const tensor_arena, const int block_ref)
944{
945 // Go to the root of the graph.
946 const ccv_nnc_symbolic_graph_prep_t* prep = graph_prep;
947 int i;
948 for (i = 1; prep->p; i++)
949 prep = prep->p;
950 // Root graph should have no dup tensor blocks.
951 assert(!prep->dup_tensor_block_ref)((void) sizeof ((!prep->dup_tensor_block_ref) ? 1 : 0), __extension__
({ if (!prep->dup_tensor_block_ref) ; else __assert_fail (
"!prep->dup_tensor_block_ref", "ccv_nnc_symbolic_graph_compile.c"
, 951, __extension__ __PRETTY_FUNCTION__); }))
;
952 const int c = i;
953 const ccv_nnc_symbolic_graph_prep_t* preps[c];
954 prep = graph_prep;
955 preps[c - 1] = prep;
956 for (i = 0; prep->p; i++)
957 preps[c - 2 - i] = prep = prep->p;
958 int ch[c]; // Use dynamic allocation for array. This is an array to record our selections when recursive from top to bottom.
959 memset(ch, 0, sizeof(int) * c);
960 int pos = 0;
961 _ccv_nnc_tensor_multiview_down_find_pos(tensor_metadata, params, preserve, assign_update, preps, graph_prep, block_ref, ch, 0, &pos);
962 assert(ch[c - 1] == 0)((void) sizeof ((ch[c - 1] == 0) ? 1 : 0), __extension__ ({ if
(ch[c - 1] == 0) ; else __assert_fail ("ch[c - 1] == 0", "ccv_nnc_symbolic_graph_compile.c"
, 962, __extension__ __PRETTY_FUNCTION__); }))
; // This shouldn't never be modified.
963 assert(pos > 0)((void) sizeof ((pos > 0) ? 1 : 0), __extension__ ({ if (pos
> 0) ; else __assert_fail ("pos > 0", "ccv_nnc_symbolic_graph_compile.c"
, 963, __extension__ __PRETTY_FUNCTION__); }))
;
964 return pos;
965}
966
967static int _ccv_nnc_tensor_multiview_preserve_gen(ccv_array_t* const tensor_metadata, const ccv_nnc_tensor_param_t params, const ccv_nnc_symbolic_graph_prep_t* const graph_prep, ccv_nnc_tensor_t* const tensor)
968{
969 const int mv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_metadata, sizeof(ccv_nnc_tensor_multiview_t));
970 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, mv_pos);
971 ccv_nnc_tensor_t* const tv = CCV_NNC_IS_METADATA_POS(tensor)((uintptr_t)(tensor) & 1) ? _ccv_nnc_tensor_metadata_get(tensor_metadata, (int)(intptr_t)tensor) : tensor;
972 ccv_nnc_tensor_multiview((ccv_nnc_tensor_t*[]){
973 CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10)),
974 tv,
975 }, CCV_NNC_MULTIVIEW_K1N, 1, graph_prep->graph, mv);
976 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0] = CCV_NNC_TENSOR_PLACEHOLDER((ccv_nnc_tensor_t*)(intptr_t)(0x10));
977 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[1] = tensor;
978 return mv_pos;
979}
980
981static int _ccv_nnc_tensor_flat_if_multiview(ccv_array_t* const tensor_metadata, const int pos)
982{
983 ccv_nnc_tensor_t* tensor_ptr = _ccv_nnc_tensor_metadata_get(tensor_metadata, pos);
984 const int is_multiview = CCV_IS_TENSOR_MULTIVIEW(tensor_ptr)((*(int*)(tensor_ptr)) & CCV_TENSOR_MULTIVIEW);
985 if (!is_multiview)
986 return pos;
987 while (CCV_IS_TENSOR_MULTIVIEW(tensor_ptr)((*(int*)(tensor_ptr)) & CCV_TENSOR_MULTIVIEW))
988 {
989 const ccv_nnc_tensor_multiview_t* const mv = (const ccv_nnc_tensor_multiview_t*)tensor_ptr;
990 tensor_ptr = _ccv_nnc_tensor_metadata_get(tensor_metadata, (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0]);
991 }
992 const ccv_nnc_tensor_t tensor = *tensor_ptr;
993 const int new_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_metadata, sizeof(ccv_nnc_tensor_t));
994 ccv_nnc_tensor_t* const new_tensor = _ccv_nnc_tensor_metadata_get(tensor_metadata, new_pos);
995 *new_tensor = ccv_nnc_tensor(tensor.data.u8, tensor.info, 0);
996 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_metadata, pos);
997 new_tensor->alias_ref = (uintptr_t)pos;
998 ccv_nnc_tensor_synchronize_to_multiview(mv, (ccv_nnc_tensor_t*)(intptr_t)new_pos);
999 return new_pos;
1000}
1001
1002static ccv_nnc_tensor_arena_t* _ccv_nnc_tensor_arena_new(ccv_nnc_symbolic_graph_prep_t* const graph_prep, const ccv_nnc_symbolic_graph_compile_allocator_t allocator, const ccv_nnc_tensor_arena_t* const p_arena, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size)
1003{
1004 // All tensors assigned out, now, the num_assigned is the number of dis-continuous buffers,
1005 // Each tensor have the designation in assigned array, and offset in allocated_offset.
1006 const ccv_nnc_tensor_alloc_prep_t* const alloc_prep = graph_prep->alloc_prep;
1007 ccv_nnc_tensor_block_t* const tensor_blocks = graph_prep->tensor_blocks;
1008 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = graph_prep->tensor_symbol_info;
1009 const int tensor_symbol_info_size = graph_prep->tensor_symbol_info_size;
1010 const ccv_nnc_symbolic_graph_prep_t* const p_graph_prep = graph_prep->p;
1011 const ccv_nnc_tensor_alloc_prep_t* const p_alloc_prep = p_graph_prep ? p_graph_prep->alloc_prep : 0;
1012 const int* const dup_tensor_block_ref = graph_prep->dup_tensor_block_ref;
1013 const int unroll_count = graph_prep->unroll_count;
1014 int i, j;
1015 for (i = 0; i < tensor_symbol_info_size; i++)
1016 for (j = 0; TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED) && j < unroll_count; j++)
1017 {
1018 const int dup_ref = dup_tensor_block_ref[i * unroll_count + j];
1019 if (dup_ref >= 0 && !TENSOR_EXPECT_UNASSIGNED(tensor_blocks[dup_ref])((tensor_blocks[dup_ref].flags & 0x3) == UNASSIGNED))
1020 TENSOR_EXPECT_UNSET_UNASSIGNED(tensor_blocks[i])(tensor_blocks[i].flags = (tensor_blocks[i].flags & ~0x1)
)
;
1021 }
1022 ccv_nnc_tensor_arena_t* tensor_arena = (ccv_nnc_tensor_arena_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_arena_t) + sizeof(tensor_arena->buffers[0]) * alloc_prep->buffer_size + sizeof(ccv_nnc_tensor_arena_t*) * graph_prep->sub_prep_size + sizeof(ccv_nnc_tensor_t*) * tensor_symbol_info_size + sizeof(int) * tensor_symbol_info_size);
1023 graph_prep->tensor_arena = tensor_arena;
1024 tensor_arena->graph_ref = (intptr_t)graph_prep->symbolic_graph;
1025 tensor_arena->buffers = (void*)(tensor_arena + 1);
1026 tensor_arena->buffer_size = alloc_prep->buffer_size;
1027 tensor_arena->vt_tensor_size = tensor_symbol_info_size;
1028 tensor_arena->sub_arenas = (ccv_nnc_tensor_arena_t**)(tensor_arena->buffers + alloc_prep->buffer_size);
1029 tensor_arena->vt_tensors = (ccv_nnc_tensor_t**)(tensor_arena->sub_arenas + graph_prep->sub_prep_size);
1030 tensor_arena->vt_alias_refs = (int*)(tensor_arena->vt_tensors + tensor_symbol_info_size);
1031 tensor_arena->pb_vt_tensors = 0;
1032 tensor_arena->vt_alias_r_refs_p = 0;
1033 tensor_arena->vt_alias_r_refs = 0;
1034 tensor_arena->vt_sizes = 0;
1035 tensor_arena->sub_arena_size = graph_prep->sub_prep_size;
1036 tensor_arena->tensor_metadata = ccv_array_new(16 /* align to 16 bytes */, 0, 0);
1037 tensor_arena->m_tensor_idx = ccv_array_new(sizeof(int), 0, 0);
1038 tensor_arena->allocator.context.free = allocator.context.free;
1039 tensor_arena->allocator.isa = allocator.isa;
1040 // Copy alias_ref info back to the tensor arena.
1041 for (i = 0; i < tensor_symbol_info_size; i++)
1042 tensor_arena->vt_alias_refs[i] = tensor_symbol_info[i].alias_ref;
1043 // Do the buffer copies.
1044 for (i = 0; i < alloc_prep->buffer_size; i++)
1045 tensor_arena->buffers[i].type = alloc_prep->buffers[i].type,
1046 tensor_arena->buffers[i].pin_mem = alloc_prep->buffers[i].pin_mem,
1047 tensor_arena->buffers[i].size = alloc_prep->buffers[i].size;
1048 if (graph_prep->while_count_tensor)
1049 {
1050 // If we need to have a while count tensor, allocate that first, set its pointer to point the while_count variable.
1051 int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
1052 assert((0 << 1) + 1 == pos)((void) sizeof (((0 << 1) + 1 == pos) ? 1 : 0), __extension__
({ if ((0 << 1) + 1 == pos) ; else __assert_fail ("(0 << 1) + 1 == pos"
, "ccv_nnc_symbolic_graph_compile.c", 1052, __extension__ __PRETTY_FUNCTION__
); }))
; // pos must be 0 position.
1053 ccv_nnc_tensor_t* const tensor = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1054 *tensor = ccv_nnc_tensor_for_while_count(graph_prep->graph);
1055 }
1056 assert((p_arena && p_graph_prep) || (!p_arena && !p_graph_prep))((void) sizeof (((p_arena && p_graph_prep) || (!p_arena
&& !p_graph_prep)) ? 1 : 0), __extension__ ({ if ((p_arena
&& p_graph_prep) || (!p_arena && !p_graph_prep
)) ; else __assert_fail ("(p_arena && p_graph_prep) || (!p_arena && !p_graph_prep)"
, "ccv_nnc_symbolic_graph_compile.c", 1056, __extension__ __PRETTY_FUNCTION__
); }))
;
1057 if (p_arena && p_graph_prep)
1058 {
1059 // Don't need to allocate the actual buffer, just use the pointer from the above.
1060 PRINT(CCV_CLI_VERBOSE, "Buffer assignment for sub arena %p (parent %p)\n", tensor_arena, p_arena)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("Buffer assignment for sub arena %p (parent %p)\n",
tensor_arena, p_arena); fflush(stdout); } } while (0)
;
1061 for (i = 0; i < tensor_arena->buffer_size; i++)
1062 {
1063 const int p_ref = alloc_prep->buffers[i].p_refs[0] - 1;
1064 int unref_p_ref = p_ref;
1065 while (p_graph_prep->tensor_blocks[unref_p_ref].ref)
1066 unref_p_ref = p_graph_prep->tensor_blocks[unref_p_ref].ref - 1;
1067 assert(unref_p_ref >= 0)((void) sizeof ((unref_p_ref >= 0) ? 1 : 0), __extension__
({ if (unref_p_ref >= 0) ; else __assert_fail ("unref_p_ref >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 1067, __extension__ __PRETTY_FUNCTION__
); }))
;
1068 const int p_unroll_count = p_graph_prep->unroll_count;
1069 if (p_graph_prep->dup_tensor_block_ref &&
1070 p_graph_prep->dup_tensor_block_ref[p_ref * p_unroll_count] >= 0 &&
1071 p_graph_prep->dup_tensor_block_ref[p_ref * p_unroll_count] != p_ref)
1072 {
1073 // This condition means in the parent graph, we point to multiple tensor blocks for the same
1074 // buffer, therefore, we cannot have one single pointer assigned in this case.
1075 // Later we will handle this by generate ccv_tensor_multiview_t structure.
1076 tensor_arena->buffers[i].ptr = 0;
1077 PRINT(CCV_CLI_VERBOSE, "|-Cannot assign buffer %d, it points to multiple blocks (multi view tensor required)\n", i)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-Cannot assign buffer %d, it points to multiple blocks (multi view tensor required)\n"
, i); fflush(stdout); } } while (0)
;
1078 continue;
1079 }
1080 // Otherwise, find the actual buffer pointer.
1081 const int vt_ref = p_alloc_prep->vt_blocks[unref_p_ref];
1082 assert(vt_ref >= 0)((void) sizeof ((vt_ref >= 0) ? 1 : 0), __extension__ ({ if
(vt_ref >= 0) ; else __assert_fail ("vt_ref >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 1082, __extension__ __PRETTY_FUNCTION__); }))
;
1083 const int buffer_ref = p_alloc_prep->blocks[vt_ref].buffer_ref;
1084 if (!p_arena->buffers[buffer_ref].ptr)
1085 {
1086 // Pass it down as 0 ptr.
1087 tensor_arena->buffers[i].ptr = 0;
1088 PRINT(CCV_CLI_VERBOSE, "|-Cannot assign buffer %d, it points to multiple blocks (multi view tensor required)\n", i)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-Cannot assign buffer %d, it points to multiple blocks (multi view tensor required)\n"
, i); fflush(stdout); } } while (0)
;
1089 continue;
1090 }
1091 const uint64_t offset = p_alloc_prep->blocks[vt_ref].offset;
1092 tensor_arena->buffers[i].ptr = p_arena->buffers[buffer_ref].ptr + offset;
1093 PRINT(CCV_CLI_VERBOSE, "|-Assign block %d in parent arena to buffer %d with offset %lu\n", vt_ref, i, (unsigned long)offset)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-Assign block %d in parent arena to buffer %d with offset %lu\n"
, vt_ref, i, (unsigned long)offset); fflush(stdout); } } while
(0)
;
1094 }
1095 } else {
1096 // Now, allocate actual buffers.
1097 PRINT(CCV_CLI_VERBOSE, "Buffer allocation for arena %p\n", tensor_arena)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("Buffer allocation for arena %p\n", tensor_arena); fflush
(stdout); } } while (0)
;
1098 for (i = 0; i < tensor_arena->buffer_size; i++)
1099 {
1100 const int buffer_type = tensor_arena->buffers[i].type;
1101 const int memory_type = CCV_TENSOR_GET_MEMORY(buffer_type)((buffer_type) & 0x3);
1102#ifdef HAVE_CUDA1
1103 if (memory_type == CCV_TENSOR_GPU_MEMORY)
1104 {
1105 const int device_id = CCV_TENSOR_GET_DEVICE_ID(buffer_type)(((buffer_type) & 0xfff00) >> 8);
1106 if (allocator.isa && allocator.isa->alloc)
1107 tensor_arena->buffers[i].ptr = (uint8_t*)allocator.isa->alloc(buffer_type, 0, tensor_arena->buffers[i].size, allocator.context.alloc);
1108 else
1109 tensor_arena->buffers[i].ptr = (uint8_t*)cumalloc(device_id, tensor_arena->buffers[i].size);
1110 PRINT(CCV_CLI_VERBOSE, "|-Allocate buffer %d with ptr %p, size %lu\n", i, tensor_arena->buffers[i].ptr, (unsigned long)tensor_arena->buffers[i].size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-Allocate buffer %d with ptr %p, size %lu\n", i, tensor_arena
->buffers[i].ptr, (unsigned long)tensor_arena->buffers[
i].size); fflush(stdout); } } while (0)
;
1111 } else {
1112 assert(memory_type == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((memory_type == CCV_TENSOR_CPU_MEMORY) ? 1 : 0
), __extension__ ({ if (memory_type == CCV_TENSOR_CPU_MEMORY)
; else __assert_fail ("memory_type == CCV_TENSOR_CPU_MEMORY"
, "ccv_nnc_symbolic_graph_compile.c", 1112, __extension__ __PRETTY_FUNCTION__
); }))
;
1113 if (tensor_arena->buffers[i].pin_mem)
1114 tensor_arena->buffers[i].ptr = (uint8_t*)cuhostalloc(tensor_arena->buffers[i].size);
1115 else
1116 ccmemalignposix_memalign((void**)&tensor_arena->buffers[i].ptr, 16, tensor_arena->buffers[i].size);
1117 PRINT(CCV_CLI_VERBOSE, "|-Allocate buffer %d with ptr %p, size %lu\n", i, tensor_arena->buffers[i].ptr, (unsigned long)tensor_arena->buffers[i].size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-Allocate buffer %d with ptr %p, size %lu\n", i, tensor_arena
->buffers[i].ptr, (unsigned long)tensor_arena->buffers[
i].size); fflush(stdout); } } while (0)
;
1118 }
1119#else
1120 assert(memory_type == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((memory_type == CCV_TENSOR_CPU_MEMORY) ? 1 : 0
), __extension__ ({ if (memory_type == CCV_TENSOR_CPU_MEMORY)
; else __assert_fail ("memory_type == CCV_TENSOR_CPU_MEMORY"
, "ccv_nnc_symbolic_graph_compile.c", 1120, __extension__ __PRETTY_FUNCTION__
); }))
;
1121 ccmemalignposix_memalign((void**)&tensor_arena->buffers[i].ptr, 16, tensor_arena->buffers[i].size);
1122 PRINT(CCV_CLI_VERBOSE, "|-Allocate buffer %d with ptr %p, size %lu\n", i, tensor_arena->buffers[i].ptr, (unsigned long)tensor_arena->buffers[i].size)do { if ((CCV_CLI_VERBOSE & ccv_cli_get_output_levels()))
{ printf("|-Allocate buffer %d with ptr %p, size %lu\n", i, tensor_arena
->buffers[i].ptr, (unsigned long)tensor_arena->buffers[
i].size); fflush(stdout); } } while (0)
;
1123#endif
1124 assert(tensor_arena->buffers[i].ptr)((void) sizeof ((tensor_arena->buffers[i].ptr) ? 1 : 0), __extension__
({ if (tensor_arena->buffers[i].ptr) ; else __assert_fail
("tensor_arena->buffers[i].ptr", "ccv_nnc_symbolic_graph_compile.c"
, 1124, __extension__ __PRETTY_FUNCTION__); }))
;
1125 }
1126 }
1127 // Go over sub_preps and allocate arenas for them. Do it this early because
1128 // we may reference tensors from sub arenas, the reason why we need to reference
1129 // tensors from sub arenas is because for output tensors, sub arena's tensor
1130 // will have automatic reference updates.
1131 for (i = 0; i < tensor_arena->sub_arena_size; i++)
1132 if (graph_prep->sub_preps[i])
1133 tensor_arena->sub_arenas[i] = _ccv_nnc_tensor_arena_new(graph_prep->sub_preps[i], allocator, tensor_arena, tensor_binds, tensor_bind_size);
1134 else
1135 tensor_arena->sub_arenas[i] = 0;
1136 memset(tensor_arena->vt_tensors, 0, sizeof(ccv_nnc_tensor_t*) * tensor_symbol_info_size);
1137 // Now sub-arenas are all assigned, go over its outputs to assign out tensors from its output directly.
1138 ccv_nnc_tensor_t** sub_arena_out_tensors = tensor_arena->sub_arena_size ? (ccv_nnc_tensor_t**)cccalloccalloc(tensor_symbol_info_size, sizeof(ccv_nnc_tensor_t*)) : 0;
1139 for (i = 0; i < tensor_arena->sub_arena_size; i++)
1140 if (tensor_arena->sub_arenas[i])
1141 {
1142 assert(graph_prep->sub_preps[i])((void) sizeof ((graph_prep->sub_preps[i]) ? 1 : 0), __extension__
({ if (graph_prep->sub_preps[i]) ; else __assert_fail ("graph_prep->sub_preps[i]"
, "ccv_nnc_symbolic_graph_compile.c", 1142, __extension__ __PRETTY_FUNCTION__
); }))
;
1143 const int exec_idx = graph_prep->sub_preps[i]->exec_idx - 1;
1144 const ccv_nnc_graph_exec_symbol_info_t* const node = graph_prep->exec_symbol_info + exec_idx;
1145 if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
1146 for (j = 0; j < node->output_size; j++)
1147 {
1148 const int idx = node->outputs[j];
1149 const int s_idx = *(int*)ccv_array_get(tensor_symbol_info[idx].s_ref, i)((void*)(((char*)((tensor_symbol_info[idx].s_ref)->data)) +
(size_t)(tensor_symbol_info[idx].s_ref)->rsize * (size_t)
(i)))
- 1;
1150 assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if
(s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 1150, __extension__ __PRETTY_FUNCTION__); }))
;
1151 ccv_nnc_tensor_t* sub_tensor = tensor_arena->sub_arenas[i]->vt_tensors[s_idx];
1152 assert(sub_arena_out_tensors[idx] == 0)((void) sizeof ((sub_arena_out_tensors[idx] == 0) ? 1 : 0), __extension__
({ if (sub_arena_out_tensors[idx] == 0) ; else __assert_fail
("sub_arena_out_tensors[idx] == 0", "ccv_nnc_symbolic_graph_compile.c"
, 1152, __extension__ __PRETTY_FUNCTION__); }))
;
1153 ccv_nnc_tensor_t* sub_alias = (ccv_nnc_tensor_t*)sub_tensor->alias_ref;
1154 // Only assign if it is a multiview tensor.
1155 if (CCV_IS_TENSOR_MULTIVIEW(sub_tensor)((*(int*)(sub_tensor)) & CCV_TENSOR_MULTIVIEW) ||
1156 (sub_alias && CCV_IS_TENSOR_MULTIVIEW(sub_alias)((*(int*)(sub_alias)) & CCV_TENSOR_MULTIVIEW)))
1157 sub_arena_out_tensors[idx] = sub_tensor;
1158 }
1159 }
1160 // Assigning out the tensors (in case of sharing tensors / in-place ops).
1161 for (i = 0; i < tensor_symbol_info_size; i++)
1162 if (TENSOR_EXPECT_COMPUTABLE(tensor_blocks[i])(!((tensor_blocks[i].flags & 0x3) == ALIAS) && !(
(tensor_blocks[i].flags & 0x3) == UNASSIGNED))
)
1163 {
1164 const int vt_ref = alloc_prep->vt_blocks[i];
1165 const int buffer_ref = vt_ref >= 0 ? alloc_prep->blocks[vt_ref].buffer_ref : -1;
1166 // Either we have dup_tensor_block_ref in current layer, or we have that in
1167 // previous layer, therefore, cannot really find the buffer ptr.
1168 if ((!sub_arena_out_tensors || !sub_arena_out_tensors[i]) && // If it is already generated by sub arena, it can be ordinary out tensors. (What if out tensor is not even generated by sub graph when running? In this case, the behavior is undefined anyway).
1169 ((graph_prep->dup_tensor_block_ref &&
1170 graph_prep->dup_tensor_block_ref[i * unroll_count] >= 0 &&
1171 graph_prep->dup_tensor_block_ref[i * unroll_count] != i) ||
1172 (buffer_ref >= 0 && !tensor_arena->buffers[buffer_ref].ptr)))
1173 {
1174 assert(graph_prep->p)((void) sizeof ((graph_prep->p) ? 1 : 0), __extension__ ({
if (graph_prep->p) ; else __assert_fail ("graph_prep->p"
, "ccv_nnc_symbolic_graph_compile.c", 1174, __extension__ __PRETTY_FUNCTION__
); }))
; // This must be in a sub-graph.
1175 // If this is an input tensor, and it need to be preserved, wait until when we go through inputs to preserve.
1176 if (graph_prep->tensor_blocks[i].p_refs[0] && _ccv_nnc_tensor_block_check_preserve(graph_prep, i))
1177 continue;
1178 const int pos = _ccv_nnc_tensor_multiview_gen(tensor_arena->tensor_metadata, 0, tensor_symbol_info[i].assign_ref, tensor_symbol_info[i].info, graph_prep, tensor_arena, i);
1179 tensor_arena->vt_tensors[i] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1180 ccv_array_push(tensor_arena->m_tensor_idx, &pos);
1181 } else if (!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED)) {
1182 // When we want to allocate, we don't really need to if it need force broadcast, because we will handle that later.
1183 const uint64_t offset = alloc_prep->blocks[vt_ref].offset;
1184 // If already created, use the same tensor, and continue.
1185 // Having ptr.
1186 int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
1187 ccv_nnc_tensor_t* const tensor = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1188 // Also, set its allocations.
1189 // Since tensor view is bit compatible with tensor, we can just cast.
1190 *tensor = ccv_nnc_tensor(tensor_arena->buffers[buffer_ref].ptr + offset, tensor_symbol_info[i].info, 0);
1191 assert(offset + tensor_blocks[i].size <= tensor_arena->buffers[buffer_ref].size)((void) sizeof ((offset + tensor_blocks[i].size <= tensor_arena
->buffers[buffer_ref].size) ? 1 : 0), __extension__ ({ if (
offset + tensor_blocks[i].size <= tensor_arena->buffers
[buffer_ref].size) ; else __assert_fail ("offset + tensor_blocks[i].size <= tensor_arena->buffers[buffer_ref].size"
, "ccv_nnc_symbolic_graph_compile.c", 1191, __extension__ __PRETTY_FUNCTION__
); }))
;
1192 // If we need to force broadcast, we need to wrap it in a multiview.
1193 if (graph_prep->tensor_blocks[i].p_refs[0] &&
1194 _ccv_nnc_tensor_block_check_force_broadcast(graph_prep, i))
1195 {
1196 const int mv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_multiview_t));
1197 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, mv_pos);
1198 ccv_nnc_tensor_t* const tv = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1199 ccv_nnc_tensor_multiview((ccv_nnc_tensor_t*[]){
1200 tv,
1201 }, 0, 1, graph_prep->graph, mv);
1202 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1203 pos = mv_pos;
1204 ccv_array_push(tensor_arena->m_tensor_idx, &mv_pos);
1205 }
1206 tensor_arena->vt_tensors[i] = (ccv_nnc_tensor_t*)(intptr_t)pos; // Cast into vt_tensors for now, and later will rewire it.
1207 }
1208 }
1209 // Handle binded tensors. First handle cases without aliases.
1210 for (i = 0; i < tensor_bind_size; i++)
1211 {
1212 assert(tensor_binds[i].tensor)((void) sizeof ((tensor_binds[i].tensor) ? 1 : 0), __extension__
({ if (tensor_binds[i].tensor) ; else __assert_fail ("tensor_binds[i].tensor"
, "ccv_nnc_symbolic_graph_compile.c", 1212, __extension__ __PRETTY_FUNCTION__
); }))
;
1213 const ccv_nnc_tensor_symbol_t resolved_symbol = ccv_nnc_tensor_symbol_resolve(graph_prep->symbolic_graph, tensor_binds[i].symbol);
1214 if (resolved_symbol.d >= 0)
1215 {
1216 int d = resolved_symbol.d;
1217 if (TENSOR_EXPECT_ALIAS(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == ALIAS))
1218 continue;
1219 // This check is for in-place ops. Only in-place op could have unassigned but ref.
1220 // It has nothing to do with alias.
1221 while (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == UNASSIGNED) && tensor_blocks[d].ref)
1222 d = tensor_blocks[d].ref - 1;
1223 // For binded tensors, it shouldn't be assigned yet.
1224 // If it is assigned, the pointer should match the ones from the binded tensor.
1225 // This can only happen if an enforced in-place tensor is binded twice. If that
1226 // happens, we need to make sure it is binded to the same location.
1227 assert(!tensor_arena->vt_tensors[d] || tensor_arena->vt_tensors[d]->data.u8 == tensor_binds[i].tensor->data.u8)((void) sizeof ((!tensor_arena->vt_tensors[d] || tensor_arena
->vt_tensors[d]->data.u8 == tensor_binds[i].tensor->
data.u8) ? 1 : 0), __extension__ ({ if (!tensor_arena->vt_tensors
[d] || tensor_arena->vt_tensors[d]->data.u8 == tensor_binds
[i].tensor->data.u8) ; else __assert_fail ("!tensor_arena->vt_tensors[d] || tensor_arena->vt_tensors[d]->data.u8 == tensor_binds[i].tensor->data.u8"
, "ccv_nnc_symbolic_graph_compile.c", 1227, __extension__ __PRETTY_FUNCTION__
); }))
;
1228 // See above assertion.
1229 if (tensor_arena->vt_tensors[d])
1230 continue;
1231 if (CCV_IS_TENSOR_VIEW(tensor_binds[i].tensor)((*(int*)(tensor_binds[i].tensor)) & CCV_TENSOR_VIEW))
1232 {
1233 int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_view_t));
1234 ccv_nnc_tensor_view_t* const tv = (ccv_nnc_tensor_view_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1235 ccv_nnc_tensor_view_t* const otv = (ccv_nnc_tensor_view_t*)tensor_binds[i].tensor;
1236 if (otv->off > 0) // If there is a off. This has to be the same dimensionality, or smaller at each dimension.
1237 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
1238 { assert(tensor_symbol_info[d].info.dim[j] <= otv->info.dim[j])((void) sizeof ((tensor_symbol_info[d].info.dim[j] <= otv->
info.dim[j]) ? 1 : 0), __extension__ ({ if (tensor_symbol_info
[d].info.dim[j] <= otv->info.dim[j]) ; else __assert_fail
("tensor_symbol_info[d].info.dim[j] <= otv->info.dim[j]"
, "ccv_nnc_symbolic_graph_compile.c", 1238, __extension__ __PRETTY_FUNCTION__
); }))
; }
1239 if (ccv_nnc_dimension_count(otv->inc) > 0)
1240 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
1241 { assert(tensor_symbol_info[d].info.dim[j] <= otv->inc[j])((void) sizeof ((tensor_symbol_info[d].info.dim[j] <= otv->
inc[j]) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[d].
info.dim[j] <= otv->inc[j]) ; else __assert_fail ("tensor_symbol_info[d].info.dim[j] <= otv->inc[j]"
, "ccv_nnc_symbolic_graph_compile.c", 1241, __extension__ __PRETTY_FUNCTION__
); }))
; }
1242 else // if it doesn't have inc, it is OK to be just as a whole smaller or equal to the binded one.
1243 { assert(ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count(tensor_symbol_info[d].info))((void) sizeof ((ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count
(tensor_symbol_info[d].info)) ? 1 : 0), __extension__ ({ if (
ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count
(tensor_symbol_info[d].info)) ; else __assert_fail ("ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count(tensor_symbol_info[d].info)"
, "ccv_nnc_symbolic_graph_compile.c", 1243, __extension__ __PRETTY_FUNCTION__
); }))
; }
1244 memcpy(tv, otv, sizeof(ccv_nnc_tensor_view_t));
1245 memcpy(tv->info.dim, tensor_symbol_info[d].info.dim, sizeof(tv->info.dim));
1246 tensor_arena->vt_tensors[d] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1247 } else {
1248 int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
1249 ccv_nnc_tensor_t* const tv = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1250 *tv = ccv_nnc_tensor(tensor_binds[i].tensor->data.ptr, tensor_symbol_info[d].info, 0);
1251 tensor_arena->vt_tensors[d] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1252 }
1253 }
1254 }
1255 // Handle binded tensors. We handle alias here so it can reference to binded tensors.
1256 for (i = 0; i < tensor_bind_size; i++)
1257 {
1258 assert(tensor_binds[i].tensor)((void) sizeof ((tensor_binds[i].tensor) ? 1 : 0), __extension__
({ if (tensor_binds[i].tensor) ; else __assert_fail ("tensor_binds[i].tensor"
, "ccv_nnc_symbolic_graph_compile.c", 1258, __extension__ __PRETTY_FUNCTION__
); }))
;
1259 const ccv_nnc_tensor_symbol_t resolved_symbol = ccv_nnc_tensor_symbol_resolve(graph_prep->symbolic_graph, tensor_binds[i].symbol);
1260 if (resolved_symbol.d >= 0)
1261 {
1262 int d = resolved_symbol.d;
1263 if (TENSOR_EXPECT_ALIAS(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == ALIAS))
1264 d = tensor_symbol_info[d].alias_ref - 1; // Bind back to the original.
1265 // This check is for in-place ops. Only in-place op could have unassigned but ref.
1266 // It has nothing to do with alias.
1267 while (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == UNASSIGNED) && tensor_blocks[d].ref)
1268 d = tensor_blocks[d].ref - 1;
1269 if (tensor_arena->vt_tensors[d])
1270 continue;
1271 // Assert original alias has no ofs. Otherwise our binding will be problematic.
1272 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
1273 { assert(tensor_symbol_info[resolved_symbol.d].ofs[j] == 0)((void) sizeof ((tensor_symbol_info[resolved_symbol.d].ofs[j]
== 0) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[resolved_symbol
.d].ofs[j] == 0) ; else __assert_fail ("tensor_symbol_info[resolved_symbol.d].ofs[j] == 0"
, "ccv_nnc_symbolic_graph_compile.c", 1273, __extension__ __PRETTY_FUNCTION__
); }))
; }
1274 if (CCV_IS_TENSOR_VIEW(tensor_binds[i].tensor)((*(int*)(tensor_binds[i].tensor)) & CCV_TENSOR_VIEW))
1275 {
1276 int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_view_t));
1277 ccv_nnc_tensor_view_t* const tv = (ccv_nnc_tensor_view_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1278 ccv_nnc_tensor_view_t* const otv = (ccv_nnc_tensor_view_t*)tensor_binds[i].tensor;
1279 if (otv->off > 0) // If there is a off. This has to be the same dimensionality, or smaller at each dimension.
1280 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
1281 { assert(tensor_symbol_info[d].info.dim[j] <= otv->info.dim[j])((void) sizeof ((tensor_symbol_info[d].info.dim[j] <= otv->
info.dim[j]) ? 1 : 0), __extension__ ({ if (tensor_symbol_info
[d].info.dim[j] <= otv->info.dim[j]) ; else __assert_fail
("tensor_symbol_info[d].info.dim[j] <= otv->info.dim[j]"
, "ccv_nnc_symbolic_graph_compile.c", 1281, __extension__ __PRETTY_FUNCTION__
); }))
; }
1282 if (ccv_nnc_dimension_count(otv->inc) > 0)
1283 for (j = 0; j < CCV_NNC_MAX_DIM_ALLOC(12); j++)
1284 { assert(tensor_symbol_info[d].info.dim[j] <= otv->inc[j])((void) sizeof ((tensor_symbol_info[d].info.dim[j] <= otv->
inc[j]) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[d].
info.dim[j] <= otv->inc[j]) ; else __assert_fail ("tensor_symbol_info[d].info.dim[j] <= otv->inc[j]"
, "ccv_nnc_symbolic_graph_compile.c", 1284, __extension__ __PRETTY_FUNCTION__
); }))
; }
1285 else // if it doesn't have inc, it is OK to be just as a whole smaller or equal to the binded one.
1286 { assert(ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count(tensor_symbol_info[d].info))((void) sizeof ((ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count
(tensor_symbol_info[d].info)) ? 1 : 0), __extension__ ({ if (
ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count
(tensor_symbol_info[d].info)) ; else __assert_fail ("ccv_nnc_tensor_count(otv->info) >= ccv_nnc_tensor_count(tensor_symbol_info[d].info)"
, "ccv_nnc_symbolic_graph_compile.c", 1286, __extension__ __PRETTY_FUNCTION__
); }))
; }
1287 memcpy(tv, otv, sizeof(ccv_nnc_tensor_view_t));
1288 memcpy(tv->info.dim, tensor_symbol_info[d].info.dim, sizeof(tv->info.dim));
1289 tensor_arena->vt_tensors[d] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1290 } else {
1291 int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
1292 ccv_nnc_tensor_t* const tv = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1293 *tv = ccv_nnc_tensor(tensor_binds[i].tensor->data.ptr, tensor_symbol_info[d].info, 0);
1294 tensor_arena->vt_tensors[d] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1295 }
1296 }
1297 }
1298 // Assign out refs, refs are simple ones, we should handle it first. (because they point to exactly the same metadata and same region).
1299 for (i = 0; i < tensor_symbol_info_size; i++)
1300 // It could be binded tensor (or unused), in that case, it doesn't have a ref.
1301 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED) && tensor_blocks[i].ref && !tensor_arena->vt_tensors[i])
1302 {
1303 int ref = tensor_blocks[i].ref - 1;
1304 while (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[ref])((tensor_blocks[ref].flags & 0x3) == UNASSIGNED) && tensor_blocks[ref].ref)
1305 ref = tensor_blocks[ref].ref - 1;
1306 assert(tensor_arena->vt_tensors[ref])((void) sizeof ((tensor_arena->vt_tensors[ref]) ? 1 : 0), __extension__
({ if (tensor_arena->vt_tensors[ref]) ; else __assert_fail
("tensor_arena->vt_tensors[ref]", "ccv_nnc_symbolic_graph_compile.c"
, 1306, __extension__ __PRETTY_FUNCTION__); }))
;
1307 tensor_arena->vt_tensors[i] = tensor_arena->vt_tensors[ref];
1308 }
1309 // Now after refs assigned out, handle the case I need to preserve because I am a sub graph of while loop.
1310 if (graph_prep->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
1311 {
1312 assert(graph_prep->p)((void) sizeof ((graph_prep->p) ? 1 : 0), __extension__ ({
if (graph_prep->p) ; else __assert_fail ("graph_prep->p"
, "ccv_nnc_symbolic_graph_compile.c", 1312, __extension__ __PRETTY_FUNCTION__
); }))
;
1313 const ccv_nnc_graph_exec_symbol_info_t* node = graph_prep->p->exec_symbol_info + (graph_prep->exec_idx - 1);
1314 const int p_idx = graph_prep->p_idx - 1;
1315 for (i = 0; i < node->input_size; i++)
1316 {
1317 const int idx = node->inputs[i];
1318 int block_ref = *(int*)ccv_array_get(graph_prep->p->tensor_symbol_info[idx].s_ref, p_idx)((void*)(((char*)((graph_prep->p->tensor_symbol_info[idx
].s_ref)->data)) + (size_t)(graph_prep->p->tensor_symbol_info
[idx].s_ref)->rsize * (size_t)(p_idx)))
- 1;
1319 assert(!tensor_blocks[block_ref].ref)((void) sizeof ((!tensor_blocks[block_ref].ref) ? 1 : 0), __extension__
({ if (!tensor_blocks[block_ref].ref) ; else __assert_fail (
"!tensor_blocks[block_ref].ref", "ccv_nnc_symbolic_graph_compile.c"
, 1319, __extension__ __PRETTY_FUNCTION__); }))
;
1320 const int vt_ref = alloc_prep->vt_blocks[block_ref];
1321 if (!_ccv_nnc_tensor_block_check_preserve(graph_prep, block_ref))
1322 continue;
1323 assert(vt_ref >= 0)((void) sizeof ((vt_ref >= 0) ? 1 : 0), __extension__ ({ if
(vt_ref >= 0) ; else __assert_fail ("vt_ref >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 1323, __extension__ __PRETTY_FUNCTION__); }))
;
1324 const int buffer_ref = alloc_prep->blocks[vt_ref].buffer_ref;
1325 assert(!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[block_ref]))((void) sizeof ((!((tensor_blocks[block_ref].flags & 0x3)
== UNASSIGNED)) ? 1 : 0), __extension__ ({ if (!((tensor_blocks
[block_ref].flags & 0x3) == UNASSIGNED)) ; else __assert_fail
("!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[block_ref])", "ccv_nnc_symbolic_graph_compile.c"
, 1325, __extension__ __PRETTY_FUNCTION__); }))
;
1326 assert(!TENSOR_EXPECT_ALIAS(tensor_blocks[block_ref]))((void) sizeof ((!((tensor_blocks[block_ref].flags & 0x3)
== ALIAS)) ? 1 : 0), __extension__ ({ if (!((tensor_blocks[block_ref
].flags & 0x3) == ALIAS)) ; else __assert_fail ("!TENSOR_EXPECT_ALIAS(tensor_blocks[block_ref])"
, "ccv_nnc_symbolic_graph_compile.c", 1326, __extension__ __PRETTY_FUNCTION__
); }))
;
1327 // Either we have dup_tensor_block_ref in current layer, or we have that in
1328 // previous layer, therefore, cannot really find the buffer ptr.
1329 if ((!sub_arena_out_tensors || !sub_arena_out_tensors[block_ref]) && // If it is already generated by sub arena, it can be ordinary out tensors. (What if out tensor is not even generated by sub graph when running? In this case, the behavior is undefined anyway).
1330 ((graph_prep->dup_tensor_block_ref &&
1331 graph_prep->dup_tensor_block_ref[block_ref * unroll_count] >= 0 &&
1332 graph_prep->dup_tensor_block_ref[block_ref * unroll_count] != block_ref) ||
1333 !tensor_arena->buffers[buffer_ref].ptr))
1334 {
1335 // We haven't allocated anything for this yet.
1336 assert(tensor_arena->vt_tensors[block_ref] == 0)((void) sizeof ((tensor_arena->vt_tensors[block_ref] == 0)
? 1 : 0), __extension__ ({ if (tensor_arena->vt_tensors[block_ref
] == 0) ; else __assert_fail ("tensor_arena->vt_tensors[block_ref] == 0"
, "ccv_nnc_symbolic_graph_compile.c", 1336, __extension__ __PRETTY_FUNCTION__
); }))
;
1337 const int pos = _ccv_nnc_tensor_multiview_gen(tensor_arena->tensor_metadata, 1, tensor_symbol_info[i].assign_ref, tensor_symbol_info[block_ref].info, graph_prep, tensor_arena, block_ref);
1338 tensor_arena->vt_tensors[block_ref] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1339 ccv_array_push(tensor_arena->m_tensor_idx, &pos);
1340 } else {
1341 const int mv_pos = _ccv_nnc_tensor_multiview_preserve_gen(tensor_arena->tensor_metadata, tensor_symbol_info[block_ref].info, graph_prep, tensor_arena->vt_tensors[block_ref]);
1342 tensor_arena->vt_tensors[block_ref] = (ccv_nnc_tensor_t*)(intptr_t)mv_pos; // Cast into vt_tensors for now, and later will rewire.
1343 ccv_array_push(tensor_arena->m_tensor_idx, &mv_pos);
1344 }
1345 }
1346 }
1347 // For case..of statement, the output is a phi variable, thus, if we take the skip branch, we will select the original input.
1348 // This created the multi-view tensor to achieve that.
1349 for (i = 0; i < tensor_symbol_info_size; i++)
1350 if (tensor_blocks[i].bypass_ref && tensor_arena->vt_tensors[i])
1351 {
1352 const int bypass_ref = tensor_blocks[i].bypass_ref - 1;
1353 // Create phi multi-view.
1354 const int mv_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_multiview_t));
1355 const int intv_pos = _ccv_nnc_tensor_flat_if_multiview(tensor_arena->tensor_metadata, (int)(intptr_t)tensor_arena->vt_tensors[bypass_ref]);
1356 const int outv_pos = _ccv_nnc_tensor_flat_if_multiview(tensor_arena->tensor_metadata, (int)(intptr_t)tensor_arena->vt_tensors[i]);
1357 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, mv_pos);
1358 ccv_nnc_tensor_t* const intv = (ccv_nnc_tensor_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, intv_pos);
1359 ccv_nnc_tensor_t* const outv = (ccv_nnc_tensor_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, outv_pos);
1360 ccv_nnc_tensor_multiview((ccv_nnc_tensor_t*[]){
1361 intv,
1362 outv,
1363 }, CCV_NNC_MULTIVIEW_K0N, 2, (ccv_nnc_graph_t*)CCV_NNC_MULTIVIEW_PHI(intptr_t)0x1, mv);
1364 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0] = (ccv_nnc_tensor_t*)(intptr_t)intv_pos;
1365 CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[1] = (ccv_nnc_tensor_t*)(intptr_t)outv_pos;
1366 tensor_arena->vt_tensors[i] = (ccv_nnc_tensor_t*)(intptr_t)mv_pos;
1367 ccv_array_push(tensor_arena->m_tensor_idx, &mv_pos);
1368 }
1369 // Now it is time to handle alias.
1370 for (i = 0; i < alloc_prep->block_size; i++)
1371 if (alloc_prep->blocks[i].block_ref < tensor_symbol_info_size)
1372 {
1373 const int block_ref = alloc_prep->blocks[i].block_ref;
1374 if (TENSOR_EXPECT_ALIAS(tensor_blocks[block_ref])((tensor_blocks[block_ref].flags & 0x3) == ALIAS))
1375 {
1376 // Assigning out the tensor aliases.
1377 assert(tensor_symbol_info[block_ref].alias_ref)((void) sizeof ((tensor_symbol_info[block_ref].alias_ref) ? 1
: 0), __extension__ ({ if (tensor_symbol_info[block_ref].alias_ref
) ; else __assert_fail ("tensor_symbol_info[block_ref].alias_ref"
, "ccv_nnc_symbolic_graph_compile.c", 1377, __extension__ __PRETTY_FUNCTION__
); }))
;
1378 const int alias_ref = tensor_symbol_info[block_ref].alias_ref - 1;
1379 // It referenced to is not an alias.
1380 assert(tensor_arena->vt_tensors[alias_ref])((void) sizeof ((tensor_arena->vt_tensors[alias_ref]) ? 1 :
0), __extension__ ({ if (tensor_arena->vt_tensors[alias_ref
]) ; else __assert_fail ("tensor_arena->vt_tensors[alias_ref]"
, "ccv_nnc_symbolic_graph_compile.c", 1380, __extension__ __PRETTY_FUNCTION__
); }))
;
1381 const int alias_pos = (int)(intptr_t)tensor_arena->vt_tensors[alias_ref];
1382 const ccv_nnc_tensor_t* alias_tensor_ptr = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, alias_pos);
1383 assert(!CCV_IS_TENSOR_VIEW(alias_tensor_ptr))((void) sizeof ((!((*(int*)(alias_tensor_ptr)) & CCV_TENSOR_VIEW
)) ? 1 : 0), __extension__ ({ if (!((*(int*)(alias_tensor_ptr
)) & CCV_TENSOR_VIEW)) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(alias_tensor_ptr)"
, "ccv_nnc_symbolic_graph_compile.c", 1383, __extension__ __PRETTY_FUNCTION__
); }))
;
1384 // Will use that to determine whether insert reference or not.
1385 const int is_multiview = CCV_IS_TENSOR_MULTIVIEW(alias_tensor_ptr)((*(int*)(alias_tensor_ptr)) & CCV_TENSOR_MULTIVIEW);
1386 while (CCV_IS_TENSOR_MULTIVIEW(alias_tensor_ptr)((*(int*)(alias_tensor_ptr)) & CCV_TENSOR_MULTIVIEW))
1387 {
1388 const ccv_nnc_tensor_multiview_t* const mv = (const ccv_nnc_tensor_multiview_t*)alias_tensor_ptr;
1389 alias_tensor_ptr = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0]);
1390 }
1391 const ccv_nnc_tensor_t alias_tensor = *alias_tensor_ptr;
1392 // If there is no ofs, and inc is the same as dim, we take a shortcut and just init as normal tensor.
1393 int pos;
1394 if (memcmp(ccv_nnc_no_ofs, tensor_symbol_info[block_ref].ofs, sizeof(ccv_nnc_no_ofs)) == 0 &&
1395 memcmp(tensor_symbol_info[block_ref].inc, tensor_symbol_info[block_ref].info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0)
1396 {
1397 pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
1398 ccv_nnc_tensor_t* const tensor = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1399 *tensor = ccv_nnc_tensor(alias_tensor.data.u8, tensor_symbol_info[block_ref].info, 0);
1400 } else {
1401 pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_view_t));
1402 ccv_nnc_tensor_view_t* const tensor_view = (ccv_nnc_tensor_view_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
1403 // Otherwise initialize a tensor view
1404 *tensor_view = ccv_nnc_tensor_view(&alias_tensor, tensor_symbol_info[block_ref].info, tensor_symbol_info[block_ref].ofs, tensor_symbol_info[block_ref].inc);
1405 tensor_view->alias_ref = (uintptr_t)alias_pos;
1406 }
1407 tensor_arena->vt_tensors[block_ref] = (ccv_nnc_tensor_t*)(intptr_t)pos;
1408 if (is_multiview)
1409 {
1410 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, alias_pos);
1411 ccv_nnc_tensor_synchronize_to_multiview(mv, (ccv_nnc_tensor_t*)(intptr_t)pos);
1412 }
1413 }
1414 }
1415 // Replacing the tensor placeholder within sub arena's multi-view to the input tensor.
1416 for (i = 0; i < tensor_arena->sub_arena_size; i++)
1417 if (tensor_arena->sub_arenas[i])
1418 {
1419 const int exec_idx = graph_prep->sub_preps[i]->exec_idx - 1;
1420 const ccv_nnc_graph_exec_symbol_info_t* const node = graph_prep->exec_symbol_info + exec_idx;
1421 for (j = 0; j < node->input_size; j++)
1422 {
1423 const int idx = node->inputs[j];
1424 const int s_idx = (tensor_symbol_info[idx].s_ref && tensor_symbol_info[idx].s_ref->rnum > i) ? *(int*)ccv_array_get(tensor_symbol_info[idx].s_ref, i)((void*)(((char*)((tensor_symbol_info[idx].s_ref)->data)) +
(size_t)(tensor_symbol_info[idx].s_ref)->rsize * (size_t)
(i)))
- 1 : -1;
1425 if (s_idx < 0)
1426 continue;
1427 ccv_nnc_tensor_t* sub_tensor = tensor_arena->sub_arenas[i]->vt_tensors[s_idx];
1428 // Only do the replacement if it is a multi-view tensor.
1429 // sub_tensor can be unassigned if it is a tape variable. It will get fixed up later from its pair.
1430 if (sub_tensor && CCV_IS_TENSOR_MULTIVIEW(sub_tensor)((*(int*)(sub_tensor)) & CCV_TENSOR_MULTIVIEW) && !TENSOR_EXPECT_UNASSIGNED(tensor_blocks[idx])((tensor_blocks[idx].flags & 0x3) == UNASSIGNED))
1431 {
1432 // It cannot be binded tensor.
1433 assert(CCV_NNC_IS_METADATA_POS(tensor_arena->vt_tensors[idx]))((void) sizeof ((((uintptr_t)(tensor_arena->vt_tensors[idx
]) & 1)) ? 1 : 0), __extension__ ({ if (((uintptr_t)(tensor_arena
->vt_tensors[idx]) & 1)) ; else __assert_fail ("CCV_NNC_IS_METADATA_POS(tensor_arena->vt_tensors[idx])"
, "ccv_nnc_symbolic_graph_compile.c", 1433, __extension__ __PRETTY_FUNCTION__
); }))
;
1434 const int vt_pos = (int)(intptr_t)tensor_arena->vt_tensors[idx];
1435 const int is_sub_arena_out_tensor = (sub_arena_out_tensors && sub_arena_out_tensors[idx]);
1436 ccv_nnc_tensor_t* const vt_tensor = is_sub_arena_out_tensor ? sub_arena_out_tensors[idx] : _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, vt_pos);
1437 // If this tensor is also an multiview, we need to first generate a new tensor, and then generate a reference
1438 // to this tensor.
1439 if (CCV_IS_TENSOR_MULTIVIEW(vt_tensor)((*(int*)(vt_tensor)) & CCV_TENSOR_MULTIVIEW))
1440 {
1441 const int ref_pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
1442 ccv_nnc_tensor_t* const ref_tensor = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, ref_pos);
1443 ccv_nnc_tensor_multiview_t* const multiview = (ccv_nnc_tensor_multiview_t*)(is_sub_arena_out_tensor ? vt_tensor : _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, vt_pos));
1444 ref_tensor->alias_ref = is_sub_arena_out_tensor ? (uintptr_t)vt_tensor : (uintptr_t)vt_pos;
1445 ccv_nnc_tensor_synchronize_to_multiview(multiview, (ccv_nnc_tensor_t*)(intptr_t)ref_pos);
1446 ccv_nnc_tensor_t* tv = (ccv_nnc_tensor_t*)(CCV_NNC_IS_METADATA_POS(CCV_NNC_MULTIVIEW_DATA(multiview)[0])((uintptr_t)(((multiview)->_heap_data ? (multiview)->_heap_data
: (multiview)->_inline_data)[0]) & 1)
? _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA(multiview)((multiview)->_heap_data ? (multiview)->_heap_data : (multiview
)->_inline_data)
[0]) : CCV_NNC_MULTIVIEW_DATA(multiview)((multiview)->_heap_data ? (multiview)->_heap_data : (multiview
)->_inline_data)
[0]);
1447 while (CCV_IS_TENSOR_MULTIVIEW(tv)((*(int*)(tv)) & CCV_TENSOR_MULTIVIEW))
1448 tv = (ccv_nnc_tensor_t*)(CCV_NNC_IS_METADATA_POS(CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)tv)[0])((uintptr_t)((((ccv_nnc_tensor_multiview_t*)tv)->_heap_data
? ((ccv_nnc_tensor_multiview_t*)tv)->_heap_data : ((ccv_nnc_tensor_multiview_t
*)tv)->_inline_data)[0]) & 1)
? _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, (int)(intptr_t)CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)tv)(((ccv_nnc_tensor_multiview_t*)tv)->_heap_data ? ((ccv_nnc_tensor_multiview_t
*)tv)->_heap_data : ((ccv_nnc_tensor_multiview_t*)tv)->
_inline_data)
[0]) : CCV_NNC_MULTIVIEW_DATA((ccv_nnc_tensor_multiview_t*)tv)(((ccv_nnc_tensor_multiview_t*)tv)->_heap_data ? ((ccv_nnc_tensor_multiview_t
*)tv)->_heap_data : ((ccv_nnc_tensor_multiview_t*)tv)->
_inline_data)
[0]);
1449 *ref_tensor = ccv_nnc_tensor(tv->data.ptr, tv->info, 0);
1450 _ccv_nnc_tensor_multiview_full_pos((ccv_nnc_tensor_multiview_t*)sub_tensor, (ccv_nnc_tensor_t*)(intptr_t)ref_pos);
1451 } else
1452 _ccv_nnc_tensor_multiview_full_pos((ccv_nnc_tensor_multiview_t*)sub_tensor, is_sub_arena_out_tensor ? vt_tensor : (ccv_nnc_tensor_t*)(intptr_t)vt_pos);
1453 }
1454 }
1455 }
1456 // After alias created, for case..of statement, we now revert back to flat tensor rather than multi-view.
1457 // No worries though, this new tensor is subscribed for the phi multi-view. More over, we have logic
1458 // when initialize case..of node, which will take the phi multi-view again.
1459 for (i = 0; i < tensor_symbol_info_size; i++)
1460 if (tensor_blocks[i].bypass_ref && tensor_arena->vt_tensors[i])
1461 {
1462 assert(CCV_NNC_IS_METADATA_POS(tensor_arena->vt_tensors[i]))((void) sizeof ((((uintptr_t)(tensor_arena->vt_tensors[i])
& 1)) ? 1 : 0), __extension__ ({ if (((uintptr_t)(tensor_arena
->vt_tensors[i]) & 1)) ; else __assert_fail ("CCV_NNC_IS_METADATA_POS(tensor_arena->vt_tensors[i])"
, "ccv_nnc_symbolic_graph_compile.c", 1462, __extension__ __PRETTY_FUNCTION__
); }))
;
1463 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, (int)(intptr_t)tensor_arena->vt_tensors[i]);
1464 assert(mv->anchor == CCV_NNC_MULTIVIEW_PHI)((void) sizeof ((mv->anchor == (intptr_t)0x1) ? 1 : 0), __extension__
({ if (mv->anchor == (intptr_t)0x1) ; else __assert_fail (
"mv->anchor == CCV_NNC_MULTIVIEW_PHI", "ccv_nnc_symbolic_graph_compile.c"
, 1464, __extension__ __PRETTY_FUNCTION__); }))
;
1465 tensor_arena->vt_tensors[i] = (ccv_nnc_tensor_t*)(intptr_t)_ccv_nnc_tensor_flat_if_multiview(tensor_arena->tensor_metadata, (int)(intptr_t)tensor_arena->vt_tensors[i]);
1466 }
1467 // rewire the rest. I can rewire multiple times because I can identify whether this is wired or not.
1468 for (i = 0; i < tensor_symbol_info_size; i++)
1469 if (tensor_arena->vt_tensors[i])
1470 tensor_arena->vt_tensors[i] = _ccv_nnc_tensor_metadata_rewire(tensor_arena->tensor_metadata, tensor_arena->vt_tensors[i]);
1471 // Associate multiview tensors from sub arena to the parent.
1472 if (sub_arena_out_tensors)
1473 {
1474 for (i = 0; i < alloc_prep->block_size; i++)
1475 if (alloc_prep->blocks[i].block_ref < tensor_symbol_info_size)
1476 {
1477 const int block_ref = alloc_prep->blocks[i].block_ref;
1478 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[block_ref])((tensor_blocks[block_ref].flags & 0x3) == UNASSIGNED))
1479 continue;
1480 int sub_arena_ref = block_ref;
1481 if (TENSOR_EXPECT_ALIAS(tensor_blocks[block_ref])((tensor_blocks[block_ref].flags & 0x3) == ALIAS))
1482 {
1483 // Assigning out the tensor aliases.
1484 assert(tensor_symbol_info[block_ref].alias_ref)((void) sizeof ((tensor_symbol_info[block_ref].alias_ref) ? 1
: 0), __extension__ ({ if (tensor_symbol_info[block_ref].alias_ref
) ; else __assert_fail ("tensor_symbol_info[block_ref].alias_ref"
, "ccv_nnc_symbolic_graph_compile.c", 1484, __extension__ __PRETTY_FUNCTION__
); }))
;
1485 const int alias_ref = tensor_symbol_info[block_ref].alias_ref - 1;
1486 // It referenced to is not an alias.
1487 assert(tensor_arena->vt_tensors[alias_ref])((void) sizeof ((tensor_arena->vt_tensors[alias_ref]) ? 1 :
0), __extension__ ({ if (tensor_arena->vt_tensors[alias_ref
]) ; else __assert_fail ("tensor_arena->vt_tensors[alias_ref]"
, "ccv_nnc_symbolic_graph_compile.c", 1487, __extension__ __PRETTY_FUNCTION__
); }))
;
1488 sub_arena_ref = alias_ref;
1489 if (!sub_arena_out_tensors[sub_arena_ref])
1490 continue;
1491 }
1492 if (!sub_arena_out_tensors[sub_arena_ref])
1493 continue;
1494 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)(CCV_IS_TENSOR_MULTIVIEW(sub_arena_out_tensors[sub_arena_ref])((*(int*)(sub_arena_out_tensors[sub_arena_ref])) & CCV_TENSOR_MULTIVIEW
)
? sub_arena_out_tensors[sub_arena_ref] : (ccv_nnc_tensor_t*)sub_arena_out_tensors[sub_arena_ref]->alias_ref);
1495 assert(CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW)) ?
1 : 0), __extension__ ({ if (((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("CCV_IS_TENSOR_MULTIVIEW(mv)", "ccv_nnc_symbolic_graph_compile.c"
, 1495, __extension__ __PRETTY_FUNCTION__); }))
;
1496 // This is only possible if the vt_tensors is a phi node.
1497 if (tensor_arena->vt_tensors[block_ref]->alias_ref)
1498 {
1499 // For phi node, the sub_arena_out_tensors are only relevant to its selected output. Therefore, setting that to be the receiver of the broadcast.
1500 ccv_nnc_tensor_multiview_t* const phi = (ccv_nnc_tensor_multiview_t*)(tensor_arena->vt_tensors[block_ref]->alias_ref);
1501 assert(phi->anchor == CCV_NNC_MULTIVIEW_PHI)((void) sizeof ((phi->anchor == (intptr_t)0x1) ? 1 : 0), __extension__
({ if (phi->anchor == (intptr_t)0x1) ; else __assert_fail
("phi->anchor == CCV_NNC_MULTIVIEW_PHI", "ccv_nnc_symbolic_graph_compile.c"
, 1501, __extension__ __PRETTY_FUNCTION__); }))
;
1502 assert(!CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(phi)[1]))((void) sizeof ((!((*(int*)(((phi)->_heap_data ? (phi)->
_heap_data : (phi)->_inline_data)[1])) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (!((*(int*)(((phi)->_heap_data
? (phi)->_heap_data : (phi)->_inline_data)[1])) & CCV_TENSOR_MULTIVIEW
)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(CCV_NNC_MULTIVIEW_DATA(phi)[1])"
, "ccv_nnc_symbolic_graph_compile.c", 1502, __extension__ __PRETTY_FUNCTION__
); }))
;
1503 CCV_NNC_MULTIVIEW_DATA(phi)((phi)->_heap_data ? (phi)->_heap_data : (phi)->_inline_data
)
[1]->alias_ref = (uintptr_t)mv;
1504 ccv_nnc_tensor_synchronize_to_multiview(mv, CCV_NNC_MULTIVIEW_DATA(phi)((phi)->_heap_data ? (phi)->_heap_data : (phi)->_inline_data
)
[1]);
1505 } else {
1506 tensor_arena->vt_tensors[block_ref]->alias_ref = (uintptr_t)mv;
1507 ccv_nnc_tensor_synchronize_to_multiview(mv, tensor_arena->vt_tensors[block_ref]);
1508 }
1509 }
1510 }
1511 // Go over all the tensors that has assign_ref. If the tensor it is assigned from is:
1512 // 1). From sub_arena_out_tensors, it could be possible that it now pointing to an area this arena doesn't know.
1513 // 2). From phi multi-view, for this case, it is in fact that this arena won't know which memory I am going to use prior.
1514 // Therefore, for above two scenarios, the tensor has assign_ref, even it is a multiview tensor, need to subscribe
1515 // to the output of assign_ref tensor.
1516 for (i = 0; i < tensor_symbol_info_size; i++)
1517 if (tensor_arena->vt_tensors[i] && tensor_symbol_info[i].assign_ref)
1518 {
1519 const int assign_ref = tensor_symbol_info[i].assign_ref - 1;
1520 ccv_nnc_tensor_t* assign_tensor;
1521 if (sub_arena_out_tensors && sub_arena_out_tensors[assign_ref])
1522 assign_tensor = CCV_IS_TENSOR_MULTIVIEW(sub_arena_out_tensors[assign_ref])((*(int*)(sub_arena_out_tensors[assign_ref])) & CCV_TENSOR_MULTIVIEW
)
? sub_arena_out_tensors[assign_ref] : (ccv_nnc_tensor_t*)sub_arena_out_tensors[assign_ref]->alias_ref;
1523 else
1524 assign_tensor = tensor_arena->vt_tensors[assign_ref];
1525 ccv_nnc_graph_add_carry_over(graph_prep->graph, assign_tensor, tensor_arena->vt_tensors[i]);
1526 }
1527 // After everything handled, assertion again to make sure the tensors and tensor binds pointing to the right location. This is really just for assertion.
1528 for (i = 0; i < tensor_bind_size; i++)
1529 {
1530 assert(tensor_binds[i].tensor)((void) sizeof ((tensor_binds[i].tensor) ? 1 : 0), __extension__
({ if (tensor_binds[i].tensor) ; else __assert_fail ("tensor_binds[i].tensor"
, "ccv_nnc_symbolic_graph_compile.c", 1530, __extension__ __PRETTY_FUNCTION__
); }))
;
1531 const ccv_nnc_tensor_symbol_t resolved_symbol = ccv_nnc_tensor_symbol_resolve(graph_prep->symbolic_graph, tensor_binds[i].symbol);
1532 if (resolved_symbol.d >= 0)
1533 {
1534 int d = resolved_symbol.d;
1535 // This check is for in-place ops. Only in-place op could have unassigned but ref.
1536 // It has nothing to do with alias.
1537 while (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == UNASSIGNED) && tensor_blocks[d].ref)
1538 d = tensor_blocks[d].ref - 1;
1539 // Note we don't trace back on alias. This is intentional.
1540 assert(tensor_arena->vt_tensors[d]->data.u8 == tensor_binds[i].tensor->data.u8)((void) sizeof ((tensor_arena->vt_tensors[d]->data.u8 ==
tensor_binds[i].tensor->data.u8) ? 1 : 0), __extension__ (
{ if (tensor_arena->vt_tensors[d]->data.u8 == tensor_binds
[i].tensor->data.u8) ; else __assert_fail ("tensor_arena->vt_tensors[d]->data.u8 == tensor_binds[i].tensor->data.u8"
, "ccv_nnc_symbolic_graph_compile.c", 1540, __extension__ __PRETTY_FUNCTION__
); }))
;
1541 }
1542 }
1543 if (sub_arena_out_tensors)
1544 ccfreefree(sub_arena_out_tensors);
1545 // Rewire sub arena's tensor references.
1546 for (i = 0; i < tensor_arena->sub_arena_size; i++)
1547 if (tensor_arena->sub_arenas[i])
1548 {
1549 const int exec_idx = graph_prep->sub_preps[i]->exec_idx - 1;
1550 const ccv_nnc_graph_exec_symbol_info_t* const node = graph_prep->exec_symbol_info + exec_idx;
1551 for (j = 0; j < node->input_size; j++)
1552 {
1553 const int idx = node->inputs[j];
1554 const int s_idx = (tensor_symbol_info[idx].s_ref && tensor_symbol_info[idx].s_ref->rnum > i) ? *(int*)ccv_array_get(tensor_symbol_info[idx].s_ref, i)((void*)(((char*)((tensor_symbol_info[idx].s_ref)->data)) +
(size_t)(tensor_symbol_info[idx].s_ref)->rsize * (size_t)
(i)))
- 1 : -1;
1555 if (s_idx < 0)
1556 continue;
1557 ccv_nnc_tensor_t* sub_tensor = tensor_arena->sub_arenas[i]->vt_tensors[s_idx];
1558 // Only do the replacement if it is a multi-view tensor.
1559 // sub_tensor can be unassigned if it is a tape variable. It will get fixed up later from its pair.
1560 if (sub_tensor && CCV_IS_TENSOR_MULTIVIEW(sub_tensor)((*(int*)(sub_tensor)) & CCV_TENSOR_MULTIVIEW))
1561 {
1562 // This is binded tensor, bind it now.
1563 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[idx])((tensor_blocks[idx].flags & 0x3) == UNASSIGNED))
1564 _ccv_nnc_tensor_multiview_full_pos((ccv_nnc_tensor_multiview_t*)sub_tensor, tensor_arena->vt_tensors[idx]);
1565 else
1566 _ccv_nnc_tensor_multiview_full_pos_rewire(tensor_arena->tensor_metadata, (ccv_nnc_tensor_multiview_t*)sub_tensor);
1567 }
1568 }
1569 }
1570 return tensor_arena;
1571}
1572
1573static ccv_nnc_tensor_t* _ccv_nnc_tensor_arena_find_pair_ref(const ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph, const int pair_ref)
1574{
1575 assert(graph)((void) sizeof ((graph) ? 1 : 0), __extension__ ({ if (graph)
; else __assert_fail ("graph", "ccv_nnc_symbolic_graph_compile.c"
, 1575, __extension__ __PRETTY_FUNCTION__); }))
;
1576 if ((intptr_t)graph == tensor_arena->graph_ref)
1577 {
1578 assert(pair_ref >= 0 && pair_ref < tensor_arena->vt_tensor_size)((void) sizeof ((pair_ref >= 0 && pair_ref < tensor_arena
->vt_tensor_size) ? 1 : 0), __extension__ ({ if (pair_ref >=
0 && pair_ref < tensor_arena->vt_tensor_size) ;
else __assert_fail ("pair_ref >= 0 && pair_ref < tensor_arena->vt_tensor_size"
, "ccv_nnc_symbolic_graph_compile.c", 1578, __extension__ __PRETTY_FUNCTION__
); }))
;
1579 return tensor_arena->vt_tensors[pair_ref];
1580 }
1581 int i;
1582 for (i = 0; i < tensor_arena->sub_arena_size; i++)
1583 if (tensor_arena->sub_arenas[i])
1584 {
1585 ccv_nnc_tensor_t* const tensor = _ccv_nnc_tensor_arena_find_pair_ref(tensor_arena->sub_arenas[i], graph, pair_ref);
1586 if (tensor)
1587 return tensor;
1588 }
1589 return 0;
1590}
1591
1592static void _ccv_nnc_tensor_mark_as_tape_var(ccv_nnc_tensor_t* const tensor)
1593{
1594 if (!CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1595 tensor->type |= CCV_TAPE_ALLOC;
1596 else {
1597 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)tensor;
1598 mv->type |= CCV_TAPE_ALLOC;
1599 int i;
1600 for (i = 0; i < mv->repeat + mv->kind; i++)
1601 _ccv_nnc_tensor_mark_as_tape_var(CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i]);
1602 }
1603}
1604
1605static void _ccv_nnc_tensor_arena_fixup_pair_ref_and_tape_var(const ccv_nnc_tensor_arena_t* const root_arena, const ccv_nnc_symbolic_graph_prep_t* const graph_prep, ccv_nnc_tensor_arena_t* const tensor_arena)
1606{
1607 assert(tensor_arena->graph_ref == (intptr_t)graph_prep->symbolic_graph)((void) sizeof ((tensor_arena->graph_ref == (intptr_t)graph_prep
->symbolic_graph) ? 1 : 0), __extension__ ({ if (tensor_arena
->graph_ref == (intptr_t)graph_prep->symbolic_graph) ; else
__assert_fail ("tensor_arena->graph_ref == (intptr_t)graph_prep->symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 1607, __extension__ __PRETTY_FUNCTION__
); }))
;
1608 int i;
1609 for (i = 0; i < graph_prep->tensor_symbol_info_size; i++)
1610 {
1611 if (graph_prep->tensor_symbol_info[i].pair_ref)
1612 {
1613 tensor_arena->vt_tensors[i] = _ccv_nnc_tensor_arena_find_pair_ref(root_arena, graph_prep->symbolic_graph->pair, graph_prep->tensor_symbol_info[i].pair_ref - 1);
1614 // No need to continue check this if it is from its pair.
1615 continue;
1616 }
1617 if ((graph_prep->tensor_symbol_info[i].flags & CCV_NNC_TENSOR_SYMBOL_TAPE_VAR) && tensor_arena->vt_tensors[i])
1618 {
1619 // If it is a normal tensor, and the buffer it relies on is read only, no need to mark as tape var.
1620 if (!CCV_IS_TENSOR_MULTIVIEW(tensor_arena->vt_tensors[i])((*(int*)(tensor_arena->vt_tensors[i])) & CCV_TENSOR_MULTIVIEW
)
)
1621 {
1622 const int vt_ref = graph_prep->alloc_prep->vt_blocks[i];
1623 if (vt_ref >= 0 &&
1624 TENSOR_READ_WRITE(graph_prep->alloc_prep->buffers[graph_prep->alloc_prep->blocks[vt_ref].buffer_ref])(graph_prep->alloc_prep->buffers[graph_prep->alloc_prep
->blocks[vt_ref].buffer_ref].flags & 0xc)
== READ_ONLY)
1625 continue;
1626 }
1627 _ccv_nnc_tensor_mark_as_tape_var(tensor_arena->vt_tensors[i]);
1628 }
1629 }
1630 for (i = 0; i < graph_prep->sub_prep_size; i++)
1631 if (graph_prep->sub_preps[i])
1632 _ccv_nnc_tensor_arena_fixup_pair_ref_and_tape_var(root_arena, graph_prep->sub_preps[i], tensor_arena->sub_arenas[i]);
1633}
1634
1635static void _ccv_nnc_tensor_block_add_exec(const ccv_sparse_matrix_t* const exec_dep, const int idx, ccv_nnc_tensor_block_t tensor_blocks)
1636{
1637 int i, found = 0;
1638 // Try to insert head.
1639 ccv_array_t* head = tensor_blocks.head;
1640 assert(head)((void) sizeof ((head) ? 1 : 0), __extension__ ({ if (head) ;
else __assert_fail ("head", "ccv_nnc_symbolic_graph_compile.c"
, 1640, __extension__ __PRETTY_FUNCTION__); }))
;
1641 for (i = 0; i < head->rnum;)
1642 {
1643 const int head_idx = *(int*)ccv_array_get(head, i)((void*)(((char*)((head)->data)) + (size_t)(head)->rsize
* (size_t)(i)))
;
1644 if (head_idx == idx)
1645 {
1646 found = 1;
1647 break;
1648 }
1649 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, head_idx, idx);
1650 if (cell.i32 && cell.i32[0] > 0)
1651 {
1652 /* If the current node is the parent of the head node, check if we found it or not. */
1653 /* If not found, replace the current one. */
1654 if (!found)
1655 {
1656 found = 1;
1657 *(int*)ccv_array_get(head, i)((void*)(((char*)((head)->data)) + (size_t)(head)->rsize
* (size_t)(i)))
= idx;
1658 } else {
1659 /* Remove the current one, change the rnum. */
1660 if (i < head->rnum - 1)
1661 *(int*)ccv_array_get(head, i)((void*)(((char*)((head)->data)) + (size_t)(head)->rsize
* (size_t)(i)))
= *(int*)ccv_array_get(head, head->rnum - 1)((void*)(((char*)((head)->data)) + (size_t)(head)->rsize
* (size_t)(head->rnum - 1)))
;
1662 --head->rnum;
1663 continue;
1664 }
1665 } else {
1666 // If the head is the parent of the idx, we cannot add it to the array (it is deterministically later than head).
1667 cell = ccv_get_sparse_matrix_cell(exec_dep, idx, head_idx);
1668 if (cell.i32 && cell.i32[0] > 0)
1669 {
1670 found = 1;
1671 break;
1672 }
1673 }
1674 /* Advancing i. */
1675 ++i;
1676 }
1677 /* If not found, push this idx to the end of the array. */
1678 if (!found)
1679 ccv_array_push(head, &idx);
1680 // Try to insert tail.
1681 found = 0;
1682 ccv_array_t* tail = tensor_blocks.tail;
1683 assert(tail)((void) sizeof ((tail) ? 1 : 0), __extension__ ({ if (tail) ;
else __assert_fail ("tail", "ccv_nnc_symbolic_graph_compile.c"
, 1683, __extension__ __PRETTY_FUNCTION__); }))
;
1684 for (i = 0; i < tail->rnum;)
1685 {
1686 const int tail_idx = *(int*)ccv_array_get(tail, i)((void*)(((char*)((tail)->data)) + (size_t)(tail)->rsize
* (size_t)(i)))
;
1687 if (tail_idx == idx)
1688 {
1689 found = 1;
1690 break;
1691 }
1692 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, tail_idx);
1693 if (cell.i32 && cell.i32[0] > 0)
1694 {
1695 /* If the current node is the child of the tail node, check if we found it or not. */
1696 /* If not found, replace the current one. */
1697 if (!found)
1698 {
1699 found = 1;
1700 *(int*)ccv_array_get(tail, i)((void*)(((char*)((tail)->data)) + (size_t)(tail)->rsize
* (size_t)(i)))
= idx;
1701 } else {
1702 /* Remove the current one, change the rnum. */
1703 *(int*)ccv_array_get(tail, i)((void*)(((char*)((tail)->data)) + (size_t)(tail)->rsize
* (size_t)(i)))
= *(int*)ccv_array_get(tail, tail->rnum - 1)((void*)(((char*)((tail)->data)) + (size_t)(tail)->rsize
* (size_t)(tail->rnum - 1)))
;
1704 --tail->rnum;
1705 continue;
1706 }
1707 } else {
1708 // If the tail is the child of the idx, we cannot add it to the array (it is deterministically earlier than tail).
1709 cell = ccv_get_sparse_matrix_cell(exec_dep, tail_idx, idx);
1710 if (cell.i32 && cell.i32[0] > 0)
1711 {
1712 found = 1;
1713 break;
1714 }
1715 }
1716 /* Advancing i. */
1717 ++i;
1718 }
1719 /* If not found, push this idx to the end of the array. */
1720 if (!found)
1721 ccv_array_push(tail, &idx);
1722}
1723
1724ccv_nnc_tensor_t* ccv_nnc_tensor_from_symbol(const ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol)
1725{
1726 if ((intptr_t)symbol.graph == tensor_arena->graph_ref)
1727 {
1728 assert(symbol.d >= 0 && symbol.d < tensor_arena->vt_tensor_size)((void) sizeof ((symbol.d >= 0 && symbol.d < tensor_arena
->vt_tensor_size) ? 1 : 0), __extension__ ({ if (symbol.d >=
0 && symbol.d < tensor_arena->vt_tensor_size) ;
else __assert_fail ("symbol.d >= 0 && symbol.d < tensor_arena->vt_tensor_size"
, "ccv_nnc_symbolic_graph_compile.c", 1728, __extension__ __PRETTY_FUNCTION__
); }))
;
1729 ccv_nnc_tensor_t* tensor = tensor_arena->vt_tensors[symbol.d];
1730 if (tensor && CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
1731 {
1732 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
1733 while (CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
1734 mv = (ccv_nnc_tensor_multiview_t*)(mv->it ? mv->it : CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0]);
1735 return (ccv_nnc_tensor_t*)mv;
1736 }
1737 return tensor;
1738 }
1739 int i;
1740 for (i = 0; i < tensor_arena->sub_arena_size; i++)
1741 if (tensor_arena->sub_arenas[i])
1742 {
1743 ccv_nnc_tensor_t* tensor = ccv_nnc_tensor_from_symbol(tensor_arena->sub_arenas[i], symbol);
1744 if (tensor)
1745 return tensor;
1746 }
1747 return 0;
1748}
1749
1750ccv_nnc_graph_exec_t ccv_nnc_graph_exec_from_symbol(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const ccv_nnc_graph_exec_symbol_t symbol)
1751{
1752 if ((intptr_t)symbol.graph == graph_exec_arena->graph_ref)
1753 {
1754 assert(symbol.d >= 0 && symbol.d < graph_exec_arena->graph_exec_size)((void) sizeof ((symbol.d >= 0 && symbol.d < graph_exec_arena
->graph_exec_size) ? 1 : 0), __extension__ ({ if (symbol.d
>= 0 && symbol.d < graph_exec_arena->graph_exec_size
) ; else __assert_fail ("symbol.d >= 0 && symbol.d < graph_exec_arena->graph_exec_size"
, "ccv_nnc_symbolic_graph_compile.c", 1754, __extension__ __PRETTY_FUNCTION__
); }))
;
1755 return graph_exec_arena->graph_execs[symbol.d];
1756 }
1757 int i;
1758 for (i = 0; i < graph_exec_arena->sub_arena_size; i++)
1759 if (graph_exec_arena->sub_arenas[i])
1760 {
1761 ccv_nnc_graph_exec_t exec = ccv_nnc_graph_exec_from_symbol(graph_exec_arena->sub_arenas[i], symbol);
1762 if (!CCV_NO_GRAPH_EXEC(exec)((exec).graph == 0))
1763 return exec;
1764 }
1765 return (ccv_nnc_graph_exec_t){}; // 0.
1766}
1767
1768ccv_nnc_graph_exec_t ccv_nnc_graph_exec_source(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena)
1769{
1770 return graph_exec_arena->source;
1771}
1772
1773ccv_nnc_graph_exec_t ccv_nnc_graph_exec_destination(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena)
1774{
1775 return graph_exec_arena->destination;
1776}
1777
1778// Check whether the head is the beginning of this block.
1779static int _ccv_nnc_tensor_block_check_head(const ccv_nnc_tensor_block_t* const tensor_block, const int head_node)
1780{
1781 assert(tensor_block->head)((void) sizeof ((tensor_block->head) ? 1 : 0), __extension__
({ if (tensor_block->head) ; else __assert_fail ("tensor_block->head"
, "ccv_nnc_symbolic_graph_compile.c", 1781, __extension__ __PRETTY_FUNCTION__
); }))
;
1782 return (tensor_block->head->rnum == 1 && *(int*)ccv_array_get(tensor_block->head, 0)((void*)(((char*)((tensor_block->head)->data)) + (size_t
)(tensor_block->head)->rsize * (size_t)(0)))
== head_node);
1783}
1784
1785// Check whether the tail is the end of this block.
1786static int _ccv_nnc_tensor_block_check_tail(const ccv_nnc_tensor_block_t* const tensor_block, const int tail_node)
1787{
1788 assert(tensor_block->tail)((void) sizeof ((tensor_block->tail) ? 1 : 0), __extension__
({ if (tensor_block->tail) ; else __assert_fail ("tensor_block->tail"
, "ccv_nnc_symbolic_graph_compile.c", 1788, __extension__ __PRETTY_FUNCTION__
); }))
;
1789 return (tensor_block->tail->rnum == 1 && *(int*)ccv_array_get(tensor_block->tail, 0)((void*)(((char*)((tensor_block->tail)->data)) + (size_t
)(tensor_block->tail)->rsize * (size_t)(0)))
== tail_node);
1790}
1791
1792// Make two tensor blocks one. Return 1 if that happened.
1793static int _ccv_nnc_tensor_blocks_try_fold(ccv_nnc_tensor_block_t* const tensor_blocks, const int p_ref_0, const int p_ref_1)
1794{
1795 // Now we are sure p_ref_0 points to the input, p_ref_1 points to the output.
1796 if (!TENSOR_IS_UNFOLDABLE_AS_INPUT(tensor_blocks[p_ref_0])(tensor_blocks[p_ref_0].flags & UNFOLDABLE_AS_INPUT) &&
1797 (!TENSOR_IS_UNFOLDABLE_AS_OUTPUT(tensor_blocks[p_ref_1])(tensor_blocks[p_ref_1].flags & UNFOLDABLE_AS_OUTPUT) || tensor_blocks[p_ref_1].unfoldable_except_ref == p_ref_0 + 1) &&
1798 tensor_blocks[p_ref_0].tail->rnum == 1 &&
1799 tensor_blocks[p_ref_1].head->rnum == 1 &&
1800 tensor_blocks[p_ref_0].type == tensor_blocks[p_ref_1].type && // Must be the same type.
1801 *(int*)ccv_array_get(tensor_blocks[p_ref_0].tail, 0)((void*)(((char*)((tensor_blocks[p_ref_0].tail)->data)) + (
size_t)(tensor_blocks[p_ref_0].tail)->rsize * (size_t)(0))
)
== *(int*)ccv_array_get(tensor_blocks[p_ref_1].head, 0)((void*)(((char*)((tensor_blocks[p_ref_1].head)->data)) + (
size_t)(tensor_blocks[p_ref_1].head)->rsize * (size_t)(0))
)
)
1802 {
1803 // If the two parent refs matches (thus, they meet at the same node), we can concatenate with each other and mark one as a ref. This is very similar to in-place operation combining.
1804 assert(TENSOR_EXPECT_COMPUTABLE(tensor_blocks[p_ref_0]))((void) sizeof (((!((tensor_blocks[p_ref_0].flags & 0x3) ==
ALIAS) && !((tensor_blocks[p_ref_0].flags & 0x3)
== UNASSIGNED))) ? 1 : 0), __extension__ ({ if ((!((tensor_blocks
[p_ref_0].flags & 0x3) == ALIAS) && !((tensor_blocks
[p_ref_0].flags & 0x3) == UNASSIGNED))) ; else __assert_fail
("TENSOR_EXPECT_COMPUTABLE(tensor_blocks[p_ref_0])", "ccv_nnc_symbolic_graph_compile.c"
, 1804, __extension__ __PRETTY_FUNCTION__); }))
;
1805 assert(TENSOR_EXPECT_COMPUTABLE(tensor_blocks[p_ref_1]))((void) sizeof (((!((tensor_blocks[p_ref_1].flags & 0x3) ==
ALIAS) && !((tensor_blocks[p_ref_1].flags & 0x3)
== UNASSIGNED))) ? 1 : 0), __extension__ ({ if ((!((tensor_blocks
[p_ref_1].flags & 0x3) == ALIAS) && !((tensor_blocks
[p_ref_1].flags & 0x3) == UNASSIGNED))) ; else __assert_fail
("TENSOR_EXPECT_COMPUTABLE(tensor_blocks[p_ref_1])", "ccv_nnc_symbolic_graph_compile.c"
, 1805, __extension__ __PRETTY_FUNCTION__); }))
;
1806 ccv_array_free(tensor_blocks[p_ref_0].tail);
1807 tensor_blocks[p_ref_0].tail = tensor_blocks[p_ref_1].tail;
1808 if (tensor_blocks[p_ref_1].p_refs[0])
1809 {
1810 assert(tensor_blocks[p_ref_1].p_refs[1] == 0)((void) sizeof ((tensor_blocks[p_ref_1].p_refs[1] == 0) ? 1 :
0), __extension__ ({ if (tensor_blocks[p_ref_1].p_refs[1] ==
0) ; else __assert_fail ("tensor_blocks[p_ref_1].p_refs[1] == 0"
, "ccv_nnc_symbolic_graph_compile.c", 1810, __extension__ __PRETTY_FUNCTION__
); }))
; // It simply cannot have more than one p_refs, otherwise we cannot merge.
1811 if (!tensor_blocks[p_ref_0].p_refs[0])
1812 tensor_blocks[p_ref_0].p_refs[0] = tensor_blocks[p_ref_1].p_refs[0];
1813 else
1814 tensor_blocks[p_ref_0].p_refs[1] = tensor_blocks[p_ref_1].p_refs[0];
1815 }
1816 tensor_blocks[p_ref_0].pin_mem = tensor_blocks[p_ref_0].pin_mem || tensor_blocks[p_ref_1].pin_mem;
1817 TENSOR_SET_READ_WRITE(tensor_blocks[p_ref_0], TENSOR_READ_WRITE(tensor_blocks[p_ref_0]) | TENSOR_READ_WRITE(tensor_blocks[p_ref_1]))(tensor_blocks[p_ref_0].flags = ((tensor_blocks[p_ref_0].flags
& ~0xc) | (tensor_blocks[p_ref_0].flags & 0xc) | (tensor_blocks
[p_ref_1].flags & 0xc)))
;
1818 ccv_array_free(tensor_blocks[p_ref_1].head);
1819 if (TENSOR_IS_UNFOLDABLE_AS_INPUT(tensor_blocks[p_ref_1])(tensor_blocks[p_ref_1].flags & UNFOLDABLE_AS_INPUT))
1820 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[p_ref_0])(tensor_blocks[p_ref_0].flags = (tensor_blocks[p_ref_0].flags
| UNFOLDABLE_AS_INPUT))
;
1821 // Don't need to check UNFOLDABLE_AS_OUTPUT for p_ref_1 because if it is so, we cannot fold right now.
1822 TENSOR_EXPECT_SET_UNASSIGNED(tensor_blocks[p_ref_1])(tensor_blocks[p_ref_1].flags = ((tensor_blocks[p_ref_1].flags
& ~0x3) | UNASSIGNED))
;
1823 tensor_blocks[p_ref_1].ref = p_ref_0 + 1;
1824 if (!tensor_blocks[p_ref_0].r_refs)
1825 tensor_blocks[p_ref_0].r_refs = ccv_array_new(sizeof(int), 0, 0);
1826 ccv_array_add_unique_int(tensor_blocks[p_ref_0].r_refs, p_ref_1 + 1);
1827 tensor_blocks[p_ref_1].size = 0;
1828 tensor_blocks[p_ref_1].head = 0;
1829 tensor_blocks[p_ref_1].tail = 0;
1830 return 1;
1831 }
1832 return 0;
1833}
1834
1835static void _ccv_nnc_exec_dep_and_tensor_blocks_prep(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_graph_exec_symbol_info_t* const p_node_info, const ccv_nnc_graph_visit_t* const visit, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_tensor_symbol_info_t* const p_tensor_symbol_info, const int p_tensor_symbol_info_size, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const int unroll_count, const int* const dup_tensor_block_ref, const int* const dup_tensor_from_ref, const int* const dup_exec_from_ref, ccv_nnc_graph_exec_flag_t* const exec_flags, ccv_sparse_matrix_t** r_exec_dep, ccv_nnc_tensor_block_t** r_tensor_blocks)
1836{
1837 int i, j, k;
1838 // Generate exec dependencies (or, in other words, partial ordering of executions).
1839 ccv_sparse_matrix_t* exec_dep = ccv_sparse_matrix_new(symbolic_graph->exec_symbol_info->rnum, symbolic_graph->exec_symbol_info->rnum, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
1840 int* buf = (int*)ccmallocmalloc(sizeof(int) * symbolic_graph->exec_symbol_info->rnum * 2);
1841 int buf_size;
1842 if (p_node_info)
1843 { assert(output_size == 0)((void) sizeof ((output_size == 0) ? 1 : 0), __extension__ ({
if (output_size == 0) ; else __assert_fail ("output_size == 0"
, "ccv_nnc_symbolic_graph_compile.c", 1843, __extension__ __PRETTY_FUNCTION__
); }))
; }
1844#define for_block(x, val) \
1845 do { \
1846 if (((int32_t*)val)[0] > 0) \
1847 { \
1848 buf[buf_size * 2] = x; \
1849 buf[buf_size * 2 + 1] = ((int32_t*)val)[0] + 1; \
1850 ++buf_size; \
1851 } \
1852 } while (0)
1853 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_symbol_info)) const node __attribute__(
(unused)) = (exec_symbol_info) + idx;
{
1854 buf_size = 0; /* save all its parent deps to this buffer */
1855 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
1856 if (vector)
1857 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
1858 if (!node->outgoings)
1859 continue;
1860 for (i = 0; i < node->outgoings->rnum; i++)
1861 {
1862 int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
1863 const int32_t one = 1;
1864 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, idx);
1865 /* If not found, set, if the current node is the destination node, no need
1866 * set itself as parent of subsequent nodes because its terminal nature. */
1867 if (!term && (!cell.i32 || cell.i32[0] == 0))
1868 ccv_set_sparse_matrix_cell(exec_dep, outgoing, idx, &one);
1869 for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */
1870 {
1871 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2]);
1872 /* If not found, set */
1873 if (!cell.i32 || cell.i32[0] == 0)
1874 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &buf[j * 2 + 1]);
1875 else {
1876 /* Otherwise, set to the longest one */
1877 int32_t dep = ccv_max(cell.i32[0], buf[j * 2 + 1])({ typeof (cell.i32[0]) _a = (cell.i32[0]); typeof (buf[j * 2
+ 1]) _b = (buf[j * 2 + 1]); (_a > _b) ? _a : _b; })
;
1878 ccv_set_sparse_matrix_cell(exec_dep, outgoing, buf[j * 2], &dep);
1879 }
1880 }
1881 }
1882 } ccv_nnc_graph_visit_endfor} }
1883#undef for_block
1884 ccfreefree(buf);
1885 // This struct is allocated earlier to collect information about the tensor's expected start / end execs.
1886 const int tensor_block_size = symbolic_graph->tensor_symbol_info->rnum;
1887 ccv_nnc_tensor_block_t* tensor_blocks = (ccv_nnc_tensor_block_t*)cccalloccalloc(tensor_block_size, sizeof(ccv_nnc_tensor_block_t));
1888 // The reason is that I need to make everyone of them to be unassigned unless it is used somewhere. It
1889 // happens that I have to loop through all relevant node to find out if one is used or not.
1890 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
1891 tensor_blocks[i].flags = UNASSIGNED, tensor_blocks[i].type = tensor_symbol_info[i].info.type, tensor_blocks[i].bypass_ref = tensor_symbol_info[i].bypass_ref;
1892 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
1893 for (i = 0; i < node->input_size; i++)
1894 if (node->inputs[i] >= 0)
1895 {
1896 tensor_blocks[node->inputs[i]].flags = 0;
1897 // If this is a data transfer node, and this is a CPU memory, mark the memory type to be pinned mem.
1898 // This will get propagated back to the buffer, and used there to determine the allocation function to use.
1899 if (CCV_TENSOR_GET_MEMORY(tensor_blocks[node->inputs[i]].type)((tensor_blocks[node->inputs[i]].type) & 0x3) == CCV_TENSOR_CPU_MEMORY &&
1900 (node->cmd.cmd == CCV_NNC_DATA_TRANSFER_FORWARD || node->cmd.cmd == CCV_NNC_DATA_TRANSFER_BACKWARD))
1901 tensor_blocks[node->inputs[i]].pin_mem = 1;
1902 }
1903 for (i = 0; i < node->output_size; i++)
1904 if (node->outputs[i] >= 0)
1905 {
1906 tensor_blocks[node->outputs[i]].flags = 0;
1907 // If this is a data transfer node, and this is a CPU memory, mark the memory type to be pinned mem.
1908 // This will get propagated back to the buffer, and used there to determine the allocation function to use.
1909 if (CCV_TENSOR_GET_MEMORY(tensor_blocks[node->outputs[i]].type)((tensor_blocks[node->outputs[i]].type) & 0x3) == CCV_TENSOR_CPU_MEMORY &&
1910 (node->cmd.cmd == CCV_NNC_DATA_TRANSFER_FORWARD || node->cmd.cmd == CCV_NNC_DATA_TRANSFER_BACKWARD))
1911 tensor_blocks[node->outputs[i]].pin_mem = 1;
1912 }
1913 } ccv_nnc_graph_visit_endfor} }
1914 if (p_node_info)
1915 {
1916 assert(p_tensor_symbol_info)((void) sizeof ((p_tensor_symbol_info) ? 1 : 0), __extension__
({ if (p_tensor_symbol_info) ; else __assert_fail ("p_tensor_symbol_info"
, "ccv_nnc_symbolic_graph_compile.c", 1916, __extension__ __PRETTY_FUNCTION__
); }))
;
1917 // Mark it as used if it is used in either input or output.
1918 for (i = 0; i < p_node_info->input_size; i++)
1919 if (p_node_info->inputs[i] >= 0)
1920 {
1921 const int d = p_node_info->inputs[i];
1922 if (p_tensor_symbol_info[d].s_ref && p_tensor_symbol_info[d].s_ref->rnum >= symbolic_graph->p_idx)
1923 {
1924 const int dd = *(int*)ccv_array_get(p_tensor_symbol_info[d].s_ref, symbolic_graph->p_idx - 1)((void*)(((char*)((p_tensor_symbol_info[d].s_ref)->data)) +
(size_t)(p_tensor_symbol_info[d].s_ref)->rsize * (size_t)
(symbolic_graph->p_idx - 1)))
- 1;
1925 if (dd >= 0) // If this exists in this sub-graph, great.
1926 tensor_blocks[dd].flags = 0;
1927 }
1928 }
1929 for (i = 0; i < p_node_info->output_size; i++)
1930 if (p_node_info->outputs[i] >= 0)
1931 {
1932 const int d = p_node_info->outputs[i];
1933 if (p_tensor_symbol_info[d].s_ref && p_tensor_symbol_info[d].s_ref->rnum >= symbolic_graph->p_idx)
1934 {
1935 const int dd = *(int*)ccv_array_get(p_tensor_symbol_info[d].s_ref, symbolic_graph->p_idx - 1)((void*)(((char*)((p_tensor_symbol_info[d].s_ref)->data)) +
(size_t)(p_tensor_symbol_info[d].s_ref)->rsize * (size_t)
(symbolic_graph->p_idx - 1)))
- 1;
1936 if (dd >= 0) // If this exists in this sub-graph, great.
1937 tensor_blocks[dd].flags = 0;
1938 }
1939 }
1940 }
1941 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
1942 if (!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED))
1943 {
1944 // Check no tensor info is auto now.
1945 assert(!ccv_nnc_is_tensor_auto(tensor_symbol_info[i].info))((void) sizeof ((!ccv_nnc_is_tensor_auto(tensor_symbol_info[i
].info)) ? 1 : 0), __extension__ ({ if (!ccv_nnc_is_tensor_auto
(tensor_symbol_info[i].info)) ; else __assert_fail ("!ccv_nnc_is_tensor_auto(tensor_symbol_info[i].info)"
, "ccv_nnc_symbolic_graph_compile.c", 1945, __extension__ __PRETTY_FUNCTION__
); }))
;
1946 // If this tensor is used in assign_ref, set it to be un-foldable. (It will be used as parameter,
1947 // therefore, itself life-cycle almost certainly won't concatenate properly with the tensor to
1948 // fold to).
1949 if (tensor_symbol_info[i].assign_ref)
1950 {
1951 // TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[i]);
1952 // It can be folded as input (it is fine to be overwritten), but it cannot as output (when folded as input,
1953 // it kept its own representation, which is not the case for output).
1954 TENSOR_SET_UNFOLDABLE_AS_OUTPUT(tensor_blocks[i])(tensor_blocks[i].flags = (tensor_blocks[i].flags | UNFOLDABLE_AS_OUTPUT
))
;
1955 const int assign_ref = tensor_symbol_info[i].assign_ref - 1;
1956 // But for where it comes from, it cannot be folded as input, because it cannot be overwritten any time.
1957 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[assign_ref])(tensor_blocks[assign_ref].flags = (tensor_blocks[assign_ref]
.flags | UNFOLDABLE_AS_INPUT))
;
1958 // It also cannot be folded as output (except i), because we need to keep its own representation.
1959 TENSOR_SET_UNFOLDABLE_AS_OUTPUT(tensor_blocks[assign_ref])(tensor_blocks[assign_ref].flags = (tensor_blocks[assign_ref]
.flags | UNFOLDABLE_AS_OUTPUT))
;
1960 assert(tensor_blocks[assign_ref].unfoldable_except_ref == 0)((void) sizeof ((tensor_blocks[assign_ref].unfoldable_except_ref
== 0) ? 1 : 0), __extension__ ({ if (tensor_blocks[assign_ref
].unfoldable_except_ref == 0) ; else __assert_fail ("tensor_blocks[assign_ref].unfoldable_except_ref == 0"
, "ccv_nnc_symbolic_graph_compile.c", 1960, __extension__ __PRETTY_FUNCTION__
); }))
;
1961 tensor_blocks[assign_ref].unfoldable_except_ref = i + 1;
1962 for (j = 0; j < unroll_count; j++)
1963 {
1964 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[dup_tensor_block_ref[i * unroll_count + j]])(tensor_blocks[dup_tensor_block_ref[i * unroll_count + j]].flags
= (tensor_blocks[dup_tensor_block_ref[i * unroll_count + j]]
.flags | UNFOLDABLE_AS_INPUT))
;
1965 TENSOR_SET_UNFOLDABLE_AS_OUTPUT(tensor_blocks[dup_tensor_block_ref[i * unroll_count + j]])(tensor_blocks[dup_tensor_block_ref[i * unroll_count + j]].flags
= (tensor_blocks[dup_tensor_block_ref[i * unroll_count + j]]
.flags | UNFOLDABLE_AS_OUTPUT))
;
1966 }
1967 if (tensor_blocks[assign_ref].bypass_ref)
1968 {
1969 // If it contains a bypass_ref, that means we can fold into both the bypass and except_ref, making it untenable.
1970 tensor_blocks[assign_ref].unfoldable_except_ref = 0;
1971 const int bypass_ref = tensor_blocks[assign_ref].bypass_ref - 1;
1972 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[bypass_ref])(tensor_blocks[bypass_ref].flags = (tensor_blocks[bypass_ref]
.flags | UNFOLDABLE_AS_INPUT))
;
1973 TENSOR_SET_UNFOLDABLE_AS_OUTPUT(tensor_blocks[bypass_ref])(tensor_blocks[bypass_ref].flags = (tensor_blocks[bypass_ref]
.flags | UNFOLDABLE_AS_OUTPUT))
;
1974 // On the other hand, it can be folded into the except_ref for the bypass_ref.
1975 tensor_blocks[bypass_ref].unfoldable_except_ref = i + 1;
1976 if (dup_tensor_from_ref)
1977 {
1978 const int bypass_from_ref = dup_tensor_from_ref[bypass_ref];
1979 if (bypass_from_ref >= 0)
1980 {
1981 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[bypass_from_ref])(tensor_blocks[bypass_from_ref].flags = (tensor_blocks[bypass_from_ref
].flags | UNFOLDABLE_AS_INPUT))
;
1982 TENSOR_SET_UNFOLDABLE_AS_OUTPUT(tensor_blocks[bypass_from_ref])(tensor_blocks[bypass_from_ref].flags = (tensor_blocks[bypass_from_ref
].flags | UNFOLDABLE_AS_OUTPUT))
;
1983 assert(dup_tensor_block_ref[bypass_from_ref * unroll_count + unroll_count - 1] == bypass_ref)((void) sizeof ((dup_tensor_block_ref[bypass_from_ref * unroll_count
+ unroll_count - 1] == bypass_ref) ? 1 : 0), __extension__ (
{ if (dup_tensor_block_ref[bypass_from_ref * unroll_count + unroll_count
- 1] == bypass_ref) ; else __assert_fail ("dup_tensor_block_ref[bypass_from_ref * unroll_count + unroll_count - 1] == bypass_ref"
, "ccv_nnc_symbolic_graph_compile.c", 1983, __extension__ __PRETTY_FUNCTION__
); }))
;
1984 for (j = 0; j < unroll_count - 1; j++)
1985 {
1986 // Mark every incarnation as unfold-able.
1987 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[dup_tensor_block_ref[bypass_from_ref * unroll_count + j]])(tensor_blocks[dup_tensor_block_ref[bypass_from_ref * unroll_count
+ j]].flags = (tensor_blocks[dup_tensor_block_ref[bypass_from_ref
* unroll_count + j]].flags | UNFOLDABLE_AS_INPUT))
;
1988 TENSOR_SET_UNFOLDABLE_AS_OUTPUT(tensor_blocks[dup_tensor_block_ref[bypass_from_ref * unroll_count + j]])(tensor_blocks[dup_tensor_block_ref[bypass_from_ref * unroll_count
+ j]].flags = (tensor_blocks[dup_tensor_block_ref[bypass_from_ref
* unroll_count + j]].flags | UNFOLDABLE_AS_OUTPUT))
;
1989 }
1990 }
1991 }
1992 }
1993 }
1994 }
1995 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
1996 {
1997 // If it has a pair reference, we don't need to allocate this tensor at all,
1998 // set it to be unassigned.
1999 if (tensor_symbol_info[i].pair_ref)
2000 TENSOR_EXPECT_SET_UNASSIGNED(tensor_blocks[i])(tensor_blocks[i].flags = ((tensor_blocks[i].flags & ~0x3
) | UNASSIGNED))
;
2001 // If it is a tape variable, set it to be un-foldable as too (otherwise we cannot use tape properly).
2002 else if (tensor_symbol_info[i].flags & CCV_NNC_TENSOR_SYMBOL_TAPE_VAR) {
2003 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[i])(tensor_blocks[i].flags = (tensor_blocks[i].flags | UNFOLDABLE_AS_INPUT
))
;
2004 TENSOR_SET_UNFOLDABLE_AS_OUTPUT(tensor_blocks[i])(tensor_blocks[i].flags = (tensor_blocks[i].flags | UNFOLDABLE_AS_OUTPUT
))
;
2005 // For this case, there is no exception.
2006 tensor_blocks[i].unfoldable_except_ref = 0;
2007 } else if (tensor_symbol_info[i].p_ref) {
2008 assert(p_node_info)((void) sizeof ((p_node_info) ? 1 : 0), __extension__ ({ if (
p_node_info) ; else __assert_fail ("p_node_info", "ccv_nnc_symbolic_graph_compile.c"
, 2008, __extension__ __PRETTY_FUNCTION__); }))
;
2009 const int p_ref_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(tensor_symbol_info[i].p_ref - 1, p_node_info);
2010 // If I am a case of graph, and this tensor is the input from the parent graph, you cannot fold it as input.
2011 if (p_node_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
2012 // TODO: This check can be lifted if we can fold in the parent graph.
2013 if (-1 == p_ref_is_in_or_out)
2014 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[i])(tensor_blocks[i].flags = (tensor_blocks[i].flags | UNFOLDABLE_AS_INPUT
))
;
2015 if (1 == p_ref_is_in_or_out) // If p_ref is out, it cannot be fold as input.
2016 TENSOR_SET_UNFOLDABLE_AS_INPUT(tensor_blocks[i])(tensor_blocks[i].flags = (tensor_blocks[i].flags | UNFOLDABLE_AS_INPUT
))
;
2017 }
2018 }
2019 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2020 {
2021 if (tensor_symbol_info[i].alias_ref)
2022 {
2023 const int ref = tensor_symbol_info[i].alias_ref - 1;
2024 // If the referenced one is unassigned, mark this as assigned only if current one is assigned.
2025 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[ref])((tensor_blocks[ref].flags & 0x3) == UNASSIGNED) && !TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED))
2026 tensor_blocks[ref].flags = 0;
2027 // An alias cannot ref to another alias.
2028 assert(!tensor_symbol_info[ref].alias_ref)((void) sizeof ((!tensor_symbol_info[ref].alias_ref) ? 1 : 0)
, __extension__ ({ if (!tensor_symbol_info[ref].alias_ref) ; else
__assert_fail ("!tensor_symbol_info[ref].alias_ref", "ccv_nnc_symbolic_graph_compile.c"
, 2028, __extension__ __PRETTY_FUNCTION__); }))
;
2029 tensor_blocks[i].flags = ALIAS;
2030 tensor_blocks[i].ref = ref + 1; // Assign the ref.
2031 if (!tensor_blocks[ref].r_refs)
2032 tensor_blocks[ref].r_refs = ccv_array_new(sizeof(int), 0, 0);
2033 ccv_array_add_unique_int(tensor_blocks[ref].r_refs, i + 1);
2034 }
2035 }
2036 // Scan again and if the ref is not assigned, mark the alias not assigned.
2037 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2038 if (TENSOR_EXPECT_ALIAS(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == ALIAS))
2039 {
2040 const int ref = tensor_blocks[i].ref - 1;
2041 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[ref])((tensor_blocks[ref].flags & 0x3) == UNASSIGNED))
2042 {
2043 // Mark this as unassigned.
2044 tensor_blocks[i].flags = UNASSIGNED;
2045 tensor_blocks[i].ref = 0;
2046 }
2047 }
2048 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2049 {
2050 // If this tensor is not expected to be unassigned, allocate the arrays for s and t.
2051 if (TENSOR_EXPECT_COMPUTABLE(tensor_blocks[i])(!((tensor_blocks[i].flags & 0x3) == ALIAS) && !(
(tensor_blocks[i].flags & 0x3) == UNASSIGNED))
)
2052 {
2053 tensor_blocks[i].head = ccv_array_new(sizeof(int), 0, 0);
2054 tensor_blocks[i].tail = ccv_array_new(sizeof(int), 0, 0);
2055 // Cache tensor size (align to 16 bytes).
2056 tensor_blocks[i].size = (uint64_t)ccv_nnc_tensor_data_size(tensor_symbol_info[i].info);
2057 }
2058 // If there is a p_ref, add the one to the p_refs list.
2059 if (tensor_symbol_info[i].p_ref)
2060 tensor_blocks[i].p_refs[0] = tensor_symbol_info[i].p_ref;
2061 }
2062 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
2063 for (i = 0; i < node->input_size; i++)
2064 {
2065 int d = node->inputs[i];
2066 if (d < 0)
2067 continue;
2068 if (TENSOR_EXPECT_ALIAS(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == ALIAS))
2069 d = tensor_symbol_info[d].alias_ref - 1;
2070 tensor_blocks[d].flags |= READ_ONLY;
2071 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == UNASSIGNED))
2072 continue;
2073 assert(TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d]))((void) sizeof (((!((tensor_blocks[d].flags & 0x3) == ALIAS
) && !((tensor_blocks[d].flags & 0x3) == UNASSIGNED
))) ? 1 : 0), __extension__ ({ if ((!((tensor_blocks[d].flags
& 0x3) == ALIAS) && !((tensor_blocks[d].flags &
0x3) == UNASSIGNED))) ; else __assert_fail ("TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d])"
, "ccv_nnc_symbolic_graph_compile.c", 2073, __extension__ __PRETTY_FUNCTION__
); }))
;
2074 /* If this is first encounter, its head starts (this tensor is init'ed outside of the graph
2075 * from the very beginning of the graph life-cycle and ends here. */
2076 if (tensor_blocks[d].head->rnum == 0 && !TENSOR_REQUIRE_INIT(tensor_symbol_info[d].flags)(((tensor_symbol_info[d].flags) & CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS
) || ((tensor_symbol_info[d].flags) & CCV_NNC_TENSOR_SYMBOL_INIT_ONES
))
)
2077 {
2078 for (j = 0; j < source_size; j++)
2079 {
2080 // If the source is connecting to current node, add (otherwise we will create tensor blocks that used in other streams, which is unneccessary).
2081 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, idx, sources[j].d);
2082 if (cell.i32 && cell.i32[0] > 0)
2083 _ccv_nnc_tensor_block_add_exec(exec_dep, sources[j].d, tensor_blocks[d]);
2084 }
2085 /* If this is a read-only (based on SSA, if first encountered as read), and this is
2086 * sub-graph (TODO: this condition can be lifted for case..of that is never in a while
2087 * loop, however, in that case, you need to prevent read-only gets reused for the
2088 * output tensor, which is not obvious how to implement correctly), and it is not
2089 * assign_ref from anywhere (not a parameterized loop). We cannot reuse this region
2090 * of memory anyway (because on second loop, we want to read the same value out).
2091 * Mark it to the end of the graph. */
2092 if (p_node_info && !tensor_symbol_info[d].assign_ref)
2093 for (j = 0; j < destination_size; j++)
2094 {
2095 // If the destination is connecting to current node, add (otherwise we will create tensor blocks that used in other streams, which is unneccessary).
2096 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, destinations[j].d, idx);
2097 if (cell.i32 && cell.i32[0] > 0)
2098 _ccv_nnc_tensor_block_add_exec(exec_dep, destinations[j].d, tensor_blocks[d]);
2099 }
2100 }
2101 _ccv_nnc_tensor_block_add_exec(exec_dep, idx, tensor_blocks[d]);
2102 }
2103 for (i = 0; i < node->output_size; i++)
2104 {
2105 int d = node->outputs[i];
2106 if (d < 0)
2107 continue;
2108 if (TENSOR_EXPECT_ALIAS(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == ALIAS))
2109 d = tensor_symbol_info[d].alias_ref - 1;
2110 tensor_blocks[d].flags |= WRITE_ONLY;
2111 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == UNASSIGNED))
2112 continue;
2113 assert(TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d]))((void) sizeof (((!((tensor_blocks[d].flags & 0x3) == ALIAS
) && !((tensor_blocks[d].flags & 0x3) == UNASSIGNED
))) ? 1 : 0), __extension__ ({ if ((!((tensor_blocks[d].flags
& 0x3) == ALIAS) && !((tensor_blocks[d].flags &
0x3) == UNASSIGNED))) ; else __assert_fail ("TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d])"
, "ccv_nnc_symbolic_graph_compile.c", 2113, __extension__ __PRETTY_FUNCTION__
); }))
;
2114 _ccv_nnc_tensor_block_add_exec(exec_dep, idx, tensor_blocks[d]);
2115 }
2116 } ccv_nnc_graph_visit_endfor} }
2117 // For any assign_ref, its life-time kept until the end and wrap over.
2118 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2119 // If this tensor is not unassigned (or alias) and it is assigned from somewhere else,
2120 // that "somewhere else" need to keep its life-time til the end.
2121 if (TENSOR_EXPECT_COMPUTABLE(tensor_blocks[i])(!((tensor_blocks[i].flags & 0x3) == ALIAS) && !(
(tensor_blocks[i].flags & 0x3) == UNASSIGNED))
&&
2122 p_node_info && tensor_symbol_info[i].assign_ref)
2123 {
2124 const int assign_ref = tensor_symbol_info[i].assign_ref - 1;
2125 for (j = 0; j < destination_size; j++)
2126 {
2127 // This logic is to be more conservative about which destination we add to.
2128 // As of now, if we add everything, it is fine most likely. However, it may
2129 // cause issues in the future to do so naively. Thus, instead, we only add
2130 // the destination to it iff either the tensor is not used at all, or, the
2131 // destination is on the same stream as of the tensor block some way.
2132 int flag = !tensor_blocks[assign_ref].tail;
2133 for (k = 0; !flag && k < tensor_blocks[assign_ref].tail->rnum; k++)
2134 {
2135 const int idx = *(int*)ccv_array_get(tensor_blocks[assign_ref].tail, k)((void*)(((char*)((tensor_blocks[assign_ref].tail)->data))
+ (size_t)(tensor_blocks[assign_ref].tail)->rsize * (size_t
)(k)))
;
2136 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, destinations[j].d, idx);
2137 flag = (cell.i32 && cell.i32[0] > 0);
2138 }
2139 if (flag) // If there is no tail at all, add it. Otherwise, only add it if the destination is on the same stream with this tensor block somehow.
2140 _ccv_nnc_tensor_block_add_exec(exec_dep, destinations[j].d, tensor_blocks[assign_ref]);
2141 }
2142 }
2143 for (i = 0; i < output_size; i++)
2144 {
2145 assert(outputs[i].graph == symbolic_graph)((void) sizeof ((outputs[i].graph == symbolic_graph) ? 1 : 0)
, __extension__ ({ if (outputs[i].graph == symbolic_graph) ; else
__assert_fail ("outputs[i].graph == symbolic_graph", "ccv_nnc_symbolic_graph_compile.c"
, 2145, __extension__ __PRETTY_FUNCTION__); }))
;
2146 int d = outputs[i].d;
2147 if (d < 0)
2148 continue;
2149 if (TENSOR_EXPECT_ALIAS(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == ALIAS))
2150 d = tensor_symbol_info[d].alias_ref - 1;
2151 if (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == UNASSIGNED))
2152 continue;
2153 assert(TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d]))((void) sizeof (((!((tensor_blocks[d].flags & 0x3) == ALIAS
) && !((tensor_blocks[d].flags & 0x3) == UNASSIGNED
))) ? 1 : 0), __extension__ ({ if ((!((tensor_blocks[d].flags
& 0x3) == ALIAS) && !((tensor_blocks[d].flags &
0x3) == UNASSIGNED))) ; else __assert_fail ("TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d])"
, "ccv_nnc_symbolic_graph_compile.c", 2153, __extension__ __PRETTY_FUNCTION__
); }))
;
2154 for (j = 0; j < destination_size; j++)
2155 {
2156 int flag = !tensor_blocks[d].tail;
2157 for (k = 0; !flag && k < tensor_blocks[d].tail->rnum; k++)
2158 {
2159 const int idx = *(int*)ccv_array_get(tensor_blocks[d].tail, k)((void*)(((char*)((tensor_blocks[d].tail)->data)) + (size_t
)(tensor_blocks[d].tail)->rsize * (size_t)(k)))
;
2160 const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, destinations[j].d, idx);
2161 flag = (cell.i32 && cell.i32[0] > 0);
2162 }
2163 if (flag) // If there is no tail at all, add it. Otherwise, only add it if the destination is on the same stream with this tensor block somehow.
2164 _ccv_nnc_tensor_block_add_exec(exec_dep, destinations[j].d, tensor_blocks[d]);
2165 }
2166 }
2167 // Enforce tensor reuse by collapse tensors for in-place operations. We will fault if this cannot be done.
2168 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
2169 int x, y;
2170 for (x = 0; x < node->input_size; x++)
2171 for (y = 0; y < node->output_size; y++)
2172 /* Some operations enforces some tensors to be the same for inputs / outputs. */
2173 if (ccv_nnc_cmd_enforce_inplace(node->cmd, x, node->input_size, y, node->output_size))
2174 {
2175 // If both unassigned, it is fine.
2176 if (node->inputs[x] < 0 && node->outputs[y] < 0)
2177 continue;
2178 int ref = node->inputs[x];
2179 assert(ref >= 0)((void) sizeof ((ref >= 0) ? 1 : 0), __extension__ ({ if (
ref >= 0) ; else __assert_fail ("ref >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 2179, __extension__ __PRETTY_FUNCTION__); }))
;
2180 while (!TENSOR_EXPECT_COMPUTABLE(tensor_blocks[ref])(!((tensor_blocks[ref].flags & 0x3) == ALIAS) && !
((tensor_blocks[ref].flags & 0x3) == UNASSIGNED))
&& tensor_blocks[ref].ref)
2181 ref = tensor_blocks[ref].ref - 1;
2182 const int node_output_y = node->outputs[y];
2183 assert(node_output_y >= 0)((void) sizeof ((node_output_y >= 0) ? 1 : 0), __extension__
({ if (node_output_y >= 0) ; else __assert_fail ("node_output_y >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 2183, __extension__ __PRETTY_FUNCTION__
); }))
;
2184 // If both are not computable, it is fine, we don't need to enforce.
2185 if (!TENSOR_EXPECT_COMPUTABLE(tensor_blocks[ref])(!((tensor_blocks[ref].flags & 0x3) == ALIAS) && !
((tensor_blocks[ref].flags & 0x3) == UNASSIGNED))
&&
2186 !TENSOR_EXPECT_COMPUTABLE(tensor_blocks[node_output_y])(!((tensor_blocks[node_output_y].flags & 0x3) == ALIAS) &&
!((tensor_blocks[node_output_y].flags & 0x3) == UNASSIGNED
))
)
2187 continue;
2188 // Otherwise, enforce and error out if failed.
2189 if (!_ccv_nnc_tensor_blocks_try_fold(tensor_blocks, ref, node_output_y))
2190 { assert(0 && "cannot enforce inplace for the two tensors")((void) sizeof ((0 && "cannot enforce inplace for the two tensors"
) ? 1 : 0), __extension__ ({ if (0 && "cannot enforce inplace for the two tensors"
) ; else __assert_fail ("0 && \"cannot enforce inplace for the two tensors\""
, "ccv_nnc_symbolic_graph_compile.c", 2190, __extension__ __PRETTY_FUNCTION__
); }))
; }
2191 }
2192 } ccv_nnc_graph_visit_endfor} }
2193 // Ignore tensors that are already binded, no matter if it is used or not. Doing it here because
2194 // we need to make sure enforced tensors are properly assigned, so that we don't bind on a tensor
2195 // that is not enforced in-place (because the tensor enforced in-place will be different than the
2196 // binding one).
2197 for (i = 0; i < tensor_bind_size; i++)
2198 {
2199 const ccv_nnc_tensor_symbol_t resolved_symbol = ccv_nnc_tensor_symbol_resolve(symbolic_graph, tensor_binds[i].symbol);
2200 // If there is a tensor binded, then it is unassigned.
2201 if (resolved_symbol.d >= 0)
2202 {
2203 int d = resolved_symbol.d;
2204 // I cannot assert too much at this moment.
2205 if (TENSOR_EXPECT_ALIAS(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == ALIAS))
2206 d = tensor_symbol_info[d].alias_ref - 1; // Bind back to the original.
2207 // This check is for in-place ops. Only in-place op could have unassigned but ref.
2208 // It has nothing to do with alias.
2209 while (TENSOR_EXPECT_UNASSIGNED(tensor_blocks[d])((tensor_blocks[d].flags & 0x3) == UNASSIGNED) && tensor_blocks[d].ref)
2210 d = tensor_blocks[d].ref - 1;
2211 // Doesn't work if this is a loop carrying variable.
2212 assert(!tensor_symbol_info[d].assign_ref)((void) sizeof ((!tensor_symbol_info[d].assign_ref) ? 1 : 0),
__extension__ ({ if (!tensor_symbol_info[d].assign_ref) ; else
__assert_fail ("!tensor_symbol_info[d].assign_ref", "ccv_nnc_symbolic_graph_compile.c"
, 2212, __extension__ __PRETTY_FUNCTION__); }))
;
2213 tensor_blocks[d].flags = UNASSIGNED;
2214 tensor_blocks[d].ref = 0; // No need to have ref as well.
2215 }
2216 }
2217 // Maximum tensor reuse by collapse tensors allows in-place operations (and it matches the start, end tensor).
2218 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
2219 int x, y;
2220 for (x = 0; x < node->input_size; x++)
2221 {
2222 /* If the input is not assigned, it can be referenced, find the referenced one */
2223 int ref = node->inputs[x];
2224 if (ref < 0)
2225 continue;
2226 while (!TENSOR_EXPECT_COMPUTABLE(tensor_blocks[ref])(!((tensor_blocks[ref].flags & 0x3) == ALIAS) && !
((tensor_blocks[ref].flags & 0x3) == UNASSIGNED))
&& tensor_blocks[ref].ref)
2227 ref = tensor_blocks[ref].ref - 1;
2228 assert(tensor_blocks[ref].ref == 0)((void) sizeof ((tensor_blocks[ref].ref == 0) ? 1 : 0), __extension__
({ if (tensor_blocks[ref].ref == 0) ; else __assert_fail ("tensor_blocks[ref].ref == 0"
, "ccv_nnc_symbolic_graph_compile.c", 2228, __extension__ __PRETTY_FUNCTION__
); }))
;
2229 const ccv_nnc_tensor_symbol_info_t x_symbol = tensor_symbol_info[ref];
2230 if (TENSOR_EXPECT_COMPUTABLE(tensor_blocks[ref])(!((tensor_blocks[ref].flags & 0x3) == ALIAS) && !
((tensor_blocks[ref].flags & 0x3) == UNASSIGNED))
&&
2231 tensor_blocks[ref].tail->rnum == 1)
2232 for (y = 0; y < node->output_size; y++)
2233 /* Only proceed if the input symbol is different from the output symbol, */
2234 /* and the input symbol meets the output symbol exactly at the same spot. */
2235 if (ccv_nnc_cmd_allow_inplace(node->cmd, x, node->input_size, y, node->output_size) &&
2236 node->outputs[y] >= 0 &&
2237 ref != node->outputs[y] &&
2238 TENSOR_EXPECT_COMPUTABLE(tensor_blocks[node->outputs[y]])(!((tensor_blocks[node->outputs[y]].flags & 0x3) == ALIAS
) && !((tensor_blocks[node->outputs[y]].flags &
0x3) == UNASSIGNED))
)
2239 {
2240 const int node_output_y = node->outputs[y];
2241 const ccv_nnc_tensor_symbol_info_t y_symbol = tensor_symbol_info[node_output_y];
2242 /* If dimension matches perfectly, then we can assign y_symbol to x. */
2243 if (memcmp(x_symbol.info.dim, y_symbol.info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0)
2244 _ccv_nnc_tensor_blocks_try_fold(tensor_blocks, ref, node_output_y);
2245 }
2246 }
2247 } ccv_nnc_graph_visit_endfor} }
2248 // Specifically handle the bypass. This need to be done after the first pass.
2249 // I need to extend the bypass life-time to the same as the one I am going with.
2250 // It is important we visit these nodes and assign bypass_ref to its dependents in topological order.
2251 ccv_nnc_tensor_block_t empty_block = {};
2252 empty_block.head = ccv_array_new(sizeof(int), 0, 0);
2253 empty_block.tail = ccv_array_new(sizeof(int), 0, 0);
2254 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
2255 if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
2256 {
2257 int can_bypass = 1;
2258 for (i = 0; can_bypass && i < node->output_size; i++)
2259 {
2260 int d = node->outputs[i];
2261 if (d < 0)
2262 continue;
2263 if (!tensor_blocks[d].bypass_ref)
2264 continue;
2265 while (tensor_blocks[d].ref)
2266 d = tensor_blocks[d].ref - 1;
2267 int bypass_ref = tensor_blocks[node->outputs[i]].bypass_ref - 1;
2268 while (tensor_blocks[bypass_ref].ref)
2269 bypass_ref = tensor_blocks[bypass_ref].ref - 1;
2270 // If this doesn't participate in the while loop, we don't need to check the while loop constraint.
2271 if (!tensor_symbol_info[bypass_ref].assign_ref && !tensor_symbol_info[bypass_ref].r_assign_ref)
2272 continue;
2273 ccv_array_clear(empty_block.head);
2274 for (j = 0; tensor_blocks[bypass_ref].head && j < tensor_blocks[bypass_ref].head->rnum; j++)
2275 ccv_array_push(empty_block.head, ccv_array_get(tensor_blocks[bypass_ref].head, j)((void*)(((char*)((tensor_blocks[bypass_ref].head)->data))
+ (size_t)(tensor_blocks[bypass_ref].head)->rsize * (size_t
)(j)))
);
2276 ccv_array_clear(empty_block.tail);
2277 for (j = 0; tensor_blocks[bypass_ref].tail && j < tensor_blocks[bypass_ref].tail->rnum; j++)
2278 ccv_array_push(empty_block.tail, ccv_array_get(tensor_blocks[bypass_ref].tail, j)((void*)(((char*)((tensor_blocks[bypass_ref].tail)->data))
+ (size_t)(tensor_blocks[bypass_ref].tail)->rsize * (size_t
)(j)))
);
2279 for (j = 0; tensor_blocks[d].head && j < tensor_blocks[d].head->rnum; j++)
2280 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[d].head, j)((void*)(((char*)((tensor_blocks[d].head)->data)) + (size_t
)(tensor_blocks[d].head)->rsize * (size_t)(j)))
, empty_block);
2281 for (j = 0; tensor_blocks[d].tail && j < tensor_blocks[d].tail->rnum; j++)
2282 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[d].tail, j)((void*)(((char*)((tensor_blocks[d].tail)->data)) + (size_t
)(tensor_blocks[d].tail)->rsize * (size_t)(j)))
, empty_block);
2283 // It can only be unfoldable due to while constraint. Check whether this satisfies the while loop constraint.
2284 assert(!(tensor_symbol_info[bypass_ref].assign_ref && tensor_symbol_info[bypass_ref].r_assign_ref))((void) sizeof ((!(tensor_symbol_info[bypass_ref].assign_ref &&
tensor_symbol_info[bypass_ref].r_assign_ref)) ? 1 : 0), __extension__
({ if (!(tensor_symbol_info[bypass_ref].assign_ref &&
tensor_symbol_info[bypass_ref].r_assign_ref)) ; else __assert_fail
("!(tensor_symbol_info[bypass_ref].assign_ref && tensor_symbol_info[bypass_ref].r_assign_ref)"
, "ccv_nnc_symbolic_graph_compile.c", 2284, __extension__ __PRETTY_FUNCTION__
); }))
;
2285 int b_ref = (tensor_symbol_info[bypass_ref].assign_ref) ? tensor_symbol_info[bypass_ref].assign_ref - 1 : tensor_symbol_info[bypass_ref].r_assign_ref - 1;
2286 while (tensor_blocks[b_ref].ref)
2287 b_ref = tensor_blocks[b_ref].ref - 1;
2288 int a_hop_b = _ccv_nnc_tensor_block_head_after_tail(exec_dep, empty_block, tensor_blocks[b_ref]);
2289 int b_hop_a = _ccv_nnc_tensor_block_head_after_tail(exec_dep, tensor_blocks[b_ref], empty_block);
2290 // These two can be assigned to the same region of memory without issue (because their life-time doesn't interfere)
2291 // even after we extend the life-time of bypass_ref. Then we are in a good shape.
2292 can_bypass = can_bypass && (a_hop_b || b_hop_a);
2293 }
2294 if (can_bypass)
2295 {
2296 for (i = 0; i < node->output_size; i++)
2297 {
2298 int d = node->outputs[i];
2299 if (d < 0)
2300 continue;
2301 if (!tensor_blocks[d].bypass_ref)
2302 continue;
2303 while (tensor_blocks[d].ref)
2304 d = tensor_blocks[d].ref - 1;
2305 int bypass_ref = tensor_blocks[node->outputs[i]].bypass_ref - 1;
2306 while (tensor_blocks[bypass_ref].ref)
2307 bypass_ref = tensor_blocks[bypass_ref].ref - 1;
2308 // The bypass_ref can extend its life-time.
2309 for (j = 0; tensor_blocks[d].head && j < tensor_blocks[d].head->rnum; j++)
2310 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[d].head, j)((void*)(((char*)((tensor_blocks[d].head)->data)) + (size_t
)(tensor_blocks[d].head)->rsize * (size_t)(j)))
, tensor_blocks[bypass_ref]);
2311 for (j = 0; tensor_blocks[d].tail && j < tensor_blocks[d].tail->rnum; j++)
2312 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[d].tail, j)((void*)(((char*)((tensor_blocks[d].tail)->data)) + (size_t
)(tensor_blocks[d].tail)->rsize * (size_t)(j)))
, tensor_blocks[bypass_ref]);
2313 }
2314 } else {
2315 for (i = 0; i < node->output_size; i++)
2316 tensor_blocks[node->outputs[i]].bypass_ref = 0;
2317 const int exec_idx = (dup_exec_from_ref) ? dup_exec_from_ref[idx] : idx;
2318 // Mark this exec as no bypass IO (thus, I need to insert explicit data transfer.
2319 exec_flags[exec_idx].flags |= CCV_NNC_GRAPH_EXEC_ATTR_CASE_OF_NO_BYPASS_IO;
2320 }
2321 }
2322 } ccv_nnc_graph_visit_endfor} }
2323 ccv_array_free(empty_block.head);
2324 ccv_array_free(empty_block.tail);
2325 *r_exec_dep = exec_dep;
2326 *r_tensor_blocks = tensor_blocks;
2327}
2328
2329static ccv_nnc_cmd_t _ccv_nnc_subst_sub_graph_with_noop(const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd)
2330{
2331 if (cmd.cmd == CCV_NNC_GRAPH_FORWARD || cmd.cmd == CCV_NNC_GRAPH_BACKWARD)
2332 {
2333 ccv_nnc_cmd_t retval = cmd;
2334 retval.cmd = CCV_NNC_NOOP;
2335 return retval;
2336 }
2337 return cmd;
2338}
2339
2340static ccv_nnc_tensor_symbol_t _ccv_nnc_dup_tensor_symbol(ccv_nnc_symbolic_graph_t* const dup_graph, const int unroll_count, int* const dup_tensor_block_ref, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const int input)
2341{
2342 if (dup_tensor_block_ref[input * unroll_count] < 0) // No tensor ref, create one.
2343 {
2344 if (tensor_symbol_info[input].alias_ref)
2345 {
2346 const int alias_ref = tensor_symbol_info[input].alias_ref - 1;
2347 assert(tensor_symbol_info[alias_ref].alias_ref == 0)((void) sizeof ((tensor_symbol_info[alias_ref].alias_ref == 0
) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[alias_ref
].alias_ref == 0) ; else __assert_fail ("tensor_symbol_info[alias_ref].alias_ref == 0"
, "ccv_nnc_symbolic_graph_compile.c", 2347, __extension__ __PRETTY_FUNCTION__
); }))
;
2348 ccv_nnc_tensor_symbol_t tensor_symbol = {};
2349 if (dup_tensor_block_ref[alias_ref * unroll_count] < 0)
2350 {
2351 tensor_symbol = ccv_nnc_tensor_symbol_new(dup_graph, tensor_symbol_info[alias_ref].info, 0);
2352 if (tensor_symbol_info[alias_ref].pair_ref)
2353 ccv_nnc_tensor_symbol_pair_with(dup_graph, tensor_symbol, (ccv_nnc_tensor_symbol_t){
2354 .d = tensor_symbol_info[alias_ref].pair_ref - 1,
2355 .graph = dup_graph->pair
2356 });
2357 ccv_nnc_tensor_symbol_set_flags(dup_graph, tensor_symbol, tensor_symbol_info[alias_ref].flags);
2358 dup_tensor_block_ref[alias_ref * unroll_count] = tensor_symbol.d;
2359 } else {
2360 tensor_symbol.d = dup_tensor_block_ref[alias_ref * unroll_count];
2361 tensor_symbol.graph = dup_graph;
2362 }
2363 ccv_nnc_tensor_symbol_t alias_symbol = ccv_nnc_tensor_symbol_alias_new(dup_graph, tensor_symbol, tensor_symbol_info[input].ofs, tensor_symbol_info[input].inc, tensor_symbol_info[input].info, 0);
2364 if (tensor_symbol_info[input].pair_ref)
2365 ccv_nnc_tensor_symbol_pair_with(dup_graph, alias_symbol, (ccv_nnc_tensor_symbol_t){
2366 .d = tensor_symbol_info[input].pair_ref - 1,
2367 .graph = dup_graph->pair
2368 });
2369 ccv_nnc_tensor_symbol_set_flags(dup_graph, alias_symbol, tensor_symbol_info[input].flags);
2370 dup_tensor_block_ref[input * unroll_count] = alias_symbol.d;
2371 } else {
2372 ccv_nnc_tensor_symbol_t tensor_symbol = ccv_nnc_tensor_symbol_new(dup_graph, tensor_symbol_info[input].info, 0);
2373 if (tensor_symbol_info[input].pair_ref)
2374 ccv_nnc_tensor_symbol_pair_with(dup_graph, tensor_symbol, (ccv_nnc_tensor_symbol_t){
2375 .d = tensor_symbol_info[input].pair_ref - 1,
2376 .graph = dup_graph->pair
2377 });
2378 ccv_nnc_tensor_symbol_set_flags(dup_graph, tensor_symbol, tensor_symbol_info[input].flags);
2379 dup_tensor_block_ref[input * unroll_count] = tensor_symbol.d;
2380 }
2381 if (tensor_symbol_info[input].bypass_ref)
2382 {
2383 const int dup_bypass_ref = dup_tensor_block_ref[(tensor_symbol_info[input].bypass_ref - 1) * unroll_count];
2384 assert(dup_bypass_ref >= 0)((void) sizeof ((dup_bypass_ref >= 0) ? 1 : 0), __extension__
({ if (dup_bypass_ref >= 0) ; else __assert_fail ("dup_bypass_ref >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 2384, __extension__ __PRETTY_FUNCTION__
); }))
;
2385 ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(dup_graph->tensor_symbol_info, dup_tensor_block_ref[input * unroll_count])((void*)(((char*)((dup_graph->tensor_symbol_info)->data
)) + (size_t)(dup_graph->tensor_symbol_info)->rsize * (
size_t)(dup_tensor_block_ref[input * unroll_count])))
;
2386 symbol_info->bypass_ref = dup_bypass_ref + 1;
2387 }
2388 }
2389 return (ccv_nnc_tensor_symbol_t) {
2390 .d = dup_tensor_block_ref[input * unroll_count],
2391 .graph = dup_graph,
2392 };
2393}
2394
2395static ccv_nnc_graph_exec_symbol_t _ccv_nnc_dup_graph_exec_symbol(ccv_nnc_symbolic_graph_t* const dup_graph, const int unroll_count, int* const dup_exec_ref, int* const dup_tensor_block_ref, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_nnc_graph_exec_symbol_info_t* const node, const int idx, ccv_nnc_tensor_symbol_t* const max_inputs, ccv_nnc_tensor_symbol_t* const max_outputs)
2396{
2397 int i;
2398 if (dup_exec_ref[idx * unroll_count] < 0)
2399 {
2400 // Input has to come before output, because output could has a bypass reference to the input.
2401 for (i = 0; i < node->input_size; i++)
2402 max_inputs[i] = (node->inputs[i] >= 0) ? _ccv_nnc_dup_tensor_symbol(dup_graph, unroll_count, dup_tensor_block_ref, tensor_symbol_info, node->inputs[i]) : (ccv_nnc_tensor_symbol_t){ .d = node->inputs[i], .graph = dup_graph };
2403 for (i = 0; i < node->output_size; i++)
2404 max_outputs[i] = (node->outputs[i] >= 0) ? _ccv_nnc_dup_tensor_symbol(dup_graph, unroll_count, dup_tensor_block_ref, tensor_symbol_info, node->outputs[i]) : (ccv_nnc_tensor_symbol_t){ .d = node->outputs[i], .graph = dup_graph };
2405 ccv_nnc_graph_exec_symbol_t exec_symbol = ccv_nnc_graph_exec_symbol_new(dup_graph, node->cmd, max_inputs, node->input_size, max_outputs, node->output_size, 0);
2406 dup_exec_ref[idx * unroll_count] = exec_symbol.d;
2407 }
2408 return (ccv_nnc_graph_exec_symbol_t) {
2409 .d = dup_exec_ref[idx * unroll_count],
2410 .graph = dup_graph,
2411 };
2412}
2413
2414static void _ccv_nnc_tensor_blocks_free(ccv_nnc_tensor_block_t* const tensor_blocks, const int tensor_block_size)
2415{
2416 int i;
2417 for (i = 0; i < tensor_block_size; i++)
2418 {
2419 if (tensor_blocks[i].head)
2420 ccv_array_free(tensor_blocks[i].head);
2421 if (tensor_blocks[i].tail)
2422 ccv_array_free(tensor_blocks[i].tail);
2423 if (tensor_blocks[i].r_refs)
2424 ccv_array_free(tensor_blocks[i].r_refs);
2425 if (tensor_blocks[i].dup_p_refs)
2426 ccv_array_free(tensor_blocks[i].dup_p_refs);
2427 }
2428 ccfreefree(tensor_blocks);
2429}
2430
2431// Find tensors that cannot be solved by co-allocating to the same location.
2432static int _ccv_nnc_exec_dep_and_tensor_blocks_unroll_count(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_sparse_matrix_t* const exec_dep, ccv_nnc_tensor_block_t* const tensor_blocks)
2433{
2434 int i, j, unroll_count = 0;
2435 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2436 if (!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED) && tensor_symbol_info[i].assign_ref)
2437 {
2438 // This is is a parameter, thus, it has to be either an alias or used.
2439 assert(tensor_blocks[i].ref || TENSOR_EXPECT_ORDINARY(tensor_blocks[i]))((void) sizeof ((tensor_blocks[i].ref || ((tensor_blocks[i].flags
& 0x3) == 0)) ? 1 : 0), __extension__ ({ if (tensor_blocks
[i].ref || ((tensor_blocks[i].flags & 0x3) == 0)) ; else __assert_fail
("tensor_blocks[i].ref || TENSOR_EXPECT_ORDINARY(tensor_blocks[i])"
, "ccv_nnc_symbolic_graph_compile.c", 2439, __extension__ __PRETTY_FUNCTION__
); }))
;
2440 const int assign_ref = tensor_symbol_info[i].assign_ref - 1; // Starts at 1.
2441 // The parameter it assign to has to be either an alias or used.
2442 assert(tensor_blocks[assign_ref].ref || TENSOR_EXPECT_ORDINARY(tensor_blocks[assign_ref]))((void) sizeof ((tensor_blocks[assign_ref].ref || ((tensor_blocks
[assign_ref].flags & 0x3) == 0)) ? 1 : 0), __extension__ (
{ if (tensor_blocks[assign_ref].ref || ((tensor_blocks[assign_ref
].flags & 0x3) == 0)) ; else __assert_fail ("tensor_blocks[assign_ref].ref || TENSOR_EXPECT_ORDINARY(tensor_blocks[assign_ref])"
, "ccv_nnc_symbolic_graph_compile.c", 2442, __extension__ __PRETTY_FUNCTION__
); }))
;
2443 // If any of this two (assigner and assignee) is an alias, check to see if they are the same.
2444 // If it is the same, we are good, no need to extend.
2445 int a_ref = i;
2446 while (tensor_blocks[a_ref].ref)
2447 a_ref = tensor_blocks[a_ref].ref - 1;
2448 int b_ref = assign_ref;
2449 while (tensor_blocks[b_ref].ref)
2450 b_ref = tensor_blocks[b_ref].ref - 1;
2451 if (a_ref != b_ref)
2452 {
2453 // If any of the b's head is deterministically later than a's tail
2454 // or any of the b's tail is deterministically earlier than a's head, they don't interfere.
2455 int a_hop_b = _ccv_nnc_tensor_block_head_after_tail(exec_dep, tensor_blocks[b_ref], tensor_blocks[a_ref]);
2456 int b_hop_a = _ccv_nnc_tensor_block_head_after_tail(exec_dep, tensor_blocks[a_ref], tensor_blocks[b_ref]);
2457 // It cannot be that both i can hop to j can j can hop to i.
2458 assert(!(a_hop_b > 0 && b_hop_a > 0))((void) sizeof ((!(a_hop_b > 0 && b_hop_a > 0))
? 1 : 0), __extension__ ({ if (!(a_hop_b > 0 && b_hop_a
> 0)) ; else __assert_fail ("!(a_hop_b > 0 && b_hop_a > 0)"
, "ccv_nnc_symbolic_graph_compile.c", 2458, __extension__ __PRETTY_FUNCTION__
); }))
;
2459 // Can it be folded
2460 // These two can be assigned to the same region of memory without issue (because their life-time doesn't interfere).
2461 if (a_hop_b || b_hop_a)
2462 {
2463 tensor_blocks[a_ref].companion_ref = b_ref + 1;
2464 tensor_blocks[b_ref].companion_ref = a_ref + 1;
2465 continue;
2466 }
2467 int c_ref = tensor_symbol_info[b_ref].assign_ref - 1;
2468 for (j = 0; c_ref >= 0; j++)
2469 {
2470 while (tensor_blocks[c_ref].ref)
2471 c_ref = tensor_blocks[c_ref].ref - 1;
2472 c_ref = tensor_symbol_info[c_ref].assign_ref - 1;
2473 }
2474 unroll_count = ccv_max(unroll_count, j + 1)({ typeof (unroll_count) _a = (unroll_count); typeof (j + 1) _b
= (j + 1); (_a > _b) ? _a : _b; })
;
2475 }
2476 }
2477 // Reset companion_ref if need to unroll.
2478 if (unroll_count)
2479 for (j = 0; j < symbolic_graph->tensor_symbol_info->rnum; j++)
2480 tensor_blocks[j].companion_ref = 0;
2481 return unroll_count;
2482}
2483
2484static void _ccv_nnc_exec_dep_and_tensor_blocks_unroll_n(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_graph_visit_t* const visit, const int unroll_count, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_sparse_matrix_t* const exec_dep, const ccv_nnc_tensor_block_t* const tensor_blocks, ccv_nnc_symbolic_graph_t* const dup_graph, int* const r_dup_tensor_block_ref, int* const r_dup_exec_ref)
2485{
2486 int i, j, n;
2487 // The inout exec nodes, these are the nodes we are going to extend.
2488 uint8_t* inout = (uint8_t*)cccalloccalloc(symbolic_graph->exec_symbol_info->rnum, sizeof(uint8_t));
2489 int max_input_size = 0;
2490 int max_output_size = 0;
2491 for (i = 0; i < symbolic_graph->exec_symbol_info->rnum; i++)
2492 {
2493 max_input_size = ccv_max(exec_symbol_info[i].input_size, max_input_size)({ typeof (exec_symbol_info[i].input_size) _a = (exec_symbol_info
[i].input_size); typeof (max_input_size) _b = (max_input_size
); (_a > _b) ? _a : _b; })
;
2494 max_output_size = ccv_max(exec_symbol_info[i].output_size, max_output_size)({ typeof (exec_symbol_info[i].output_size) _a = (exec_symbol_info
[i].output_size); typeof (max_output_size) _b = (max_output_size
); (_a > _b) ? _a : _b; })
;
2495 }
2496 ccv_nnc_tensor_symbol_t max_inputs[ccv_max(1, max_input_size)({ typeof (1) _a = (1); typeof (max_input_size) _b = (max_input_size
); (_a > _b) ? _a : _b; })
];
2497 ccv_nnc_tensor_symbol_t max_outputs[ccv_max(1, max_output_size)({ typeof (1) _a = (1); typeof (max_output_size) _b = (max_output_size
); (_a > _b) ? _a : _b; })
];
2498 // Doing graph expansion
2499 // It goes without saying, we must have more than one tensors / execs (otherwise I cannot use 0 as no exec ref).
2500 assert(dup_graph->exec_symbol_info->rnum > 0)((void) sizeof ((dup_graph->exec_symbol_info->rnum >
0) ? 1 : 0), __extension__ ({ if (dup_graph->exec_symbol_info
->rnum > 0) ; else __assert_fail ("dup_graph->exec_symbol_info->rnum > 0"
, "ccv_nnc_symbolic_graph_compile.c", 2500, __extension__ __PRETTY_FUNCTION__
); }))
;
2501 assert(dup_graph->tensor_symbol_info->rnum > 0)((void) sizeof ((dup_graph->tensor_symbol_info->rnum >
0) ? 1 : 0), __extension__ ({ if (dup_graph->tensor_symbol_info
->rnum > 0) ; else __assert_fail ("dup_graph->tensor_symbol_info->rnum > 0"
, "ccv_nnc_symbolic_graph_compile.c", 2501, __extension__ __PRETTY_FUNCTION__
); }))
;
2502#define INCOMING_NODE (1)
2503#define OUTGOING_NODE (2)
2504 // Unroll the graph n times.
2505 for (n = 0; n < unroll_count; n++)
2506 {
2507 int* const dup_exec_ref = r_dup_exec_ref + n;
2508 const int* const prev_dup_tensor_block_ref = n > 0 ? r_dup_tensor_block_ref + (n - 1) : 0;
2509 int* const dup_tensor_block_ref = r_dup_tensor_block_ref + n;
2510 for (i = 0; i < symbolic_graph->exec_symbol_info->rnum; i++)
2511 dup_exec_ref[i * unroll_count] = -1;
2512 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2513 {
2514 // If there is a assign_ref, that means I don't need to dup the tensor.
2515 if (tensor_symbol_info[i].assign_ref)
2516 {
2517 const int assign_ref = tensor_symbol_info[i].assign_ref - 1;
2518 dup_tensor_block_ref[i * unroll_count] = prev_dup_tensor_block_ref ? prev_dup_tensor_block_ref[assign_ref * unroll_count] : assign_ref;
2519 } else if (TENSOR_EXPECT_COMPUTABLE(tensor_blocks[i])(!((tensor_blocks[i].flags & 0x3) == ALIAS) && !(
(tensor_blocks[i].flags & 0x3) == UNASSIGNED))
&& TENSOR_READ_WRITE(tensor_blocks[i])(tensor_blocks[i].flags & 0xc) == READ_ONLY)
2520 // If this is a read-only tensor block, no need to duplicate because the value never changes
2521 // (note we handled assign_ref first), therefore, no need to generate duplicate.
2522 dup_tensor_block_ref[i * unroll_count] = i;
2523 else
2524 dup_tensor_block_ref[i * unroll_count] = -1;
2525 }
2526 // Go through the original graph, make copies of the node if it is inout.
2527 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
2528 ccv_nnc_graph_exec_symbol_t exec_symbol = _ccv_nnc_dup_graph_exec_symbol(dup_graph, unroll_count, dup_exec_ref, dup_tensor_block_ref, tensor_symbol_info, node, idx, max_inputs, max_outputs);
2529 inout[idx] |= INCOMING_NODE; /* Mark this node as incoming. */
2530 if (!node->outgoings)
2531 continue;
2532 for (i = 0; i < node->outgoings->rnum; i++)
2533 {
2534 const int outgoing_idx = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
2535 inout[outgoing_idx] |= OUTGOING_NODE; /* Mark this node as outgoing. */
2536 ccv_nnc_graph_exec_symbol_t outgoing_symbol = _ccv_nnc_dup_graph_exec_symbol(dup_graph, unroll_count, dup_exec_ref, dup_tensor_block_ref, tensor_symbol_info, exec_symbol_info + outgoing_idx, outgoing_idx, max_inputs, max_outputs);
2537 ccv_nnc_graph_exec_symbol_concat(dup_graph, exec_symbol, outgoing_symbol);
2538 }
2539 } ccv_nnc_graph_visit_endfor} }
2540 // Check the visitor are all marked as either incoming or outgoing.
2541 const ccv_nnc_graph_exec_symbol_t* const dup_destinations = ccv_nnc_symbolic_graph_destinations(dup_graph);
2542 const int dup_destination_size = ccv_nnc_symbolic_graph_destination_size(dup_graph);
2543 for (i = 0; i < symbolic_graph->exec_symbol_info->rnum; i++)
2544 {
2545 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info[i].flags)((exec_symbol_info[i].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
2546 continue;
2547 assert((inout[i] & INCOMING_NODE) || (inout[i] & OUTGOING_NODE))((void) sizeof (((inout[i] & INCOMING_NODE) || (inout[i] &
OUTGOING_NODE)) ? 1 : 0), __extension__ ({ if ((inout[i] &
INCOMING_NODE) || (inout[i] & OUTGOING_NODE)) ; else __assert_fail
("(inout[i] & INCOMING_NODE) || (inout[i] & OUTGOING_NODE)"
, "ccv_nnc_symbolic_graph_compile.c", 2547, __extension__ __PRETTY_FUNCTION__
); }))
;
2548 // If this is pure incoming nodes, then I need to concat this one with all original destination node
2549 if (inout[i] == INCOMING_NODE)
2550 for (j = 0; j < dup_destination_size; j++)
2551 {
2552 ccv_nnc_graph_exec_symbol_concat(dup_graph, (ccv_nnc_graph_exec_symbol_t) {
2553 .d = dup_destinations[j].d,
2554 .graph = dup_graph,
2555 }, (ccv_nnc_graph_exec_symbol_t) {
2556 .d = dup_exec_ref[i * unroll_count],
2557 .graph = dup_graph,
2558 });
2559 }
2560 }
2561 if (dup_graph->destinations)
2562 ccv_array_clear(dup_graph->destinations);
2563 for (i = 0; i < symbolic_graph->exec_symbol_info->rnum; i++)
2564 {
2565 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info[i].flags)((exec_symbol_info[i].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
2566 continue;
2567 const int d = dup_exec_ref[i * unroll_count];
2568 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(dup_graph->exec_symbol_info, d)((void*)(((char*)((dup_graph->exec_symbol_info)->data))
+ (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(d)))
;
2569 // If this has no outgoing node, add to the destination.
2570 if (!exec_symbol_info->outgoings || exec_symbol_info->outgoings->rnum == 0)
2571 ccv_nnc_symbolic_graph_add_destination(dup_graph, (ccv_nnc_graph_exec_symbol_t) {
2572 .graph = dup_graph,
2573 .d = d,
2574 });
2575 }
2576 }
2577#undef INCOMING_NODE
2578#undef OUTGOING_NODE
2579 ccfreefree(inout);
2580}
2581
2582static void _ccv_nnc_fixup_assign_ref_after_unroll(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const int unroll_count, const ccv_nnc_tensor_block_t* const tensor_blocks, const int* const dup_tensor_block_ref, ccv_nnc_tensor_symbol_info_t* const dup_tensor_symbol_info)
2583{
2584 int i;
2585 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++) // symbolic graph is the old graph and tensor blocks is the old tensor blocks.
2586 // Now can assign them (The dup) as companion.
2587 // Get to the last one, which we will wrap over.
2588 if (dup_tensor_symbol_info[i].assign_ref)
2589 {
2590 dup_tensor_symbol_info[dup_tensor_symbol_info[i].assign_ref - 1].r_assign_ref = 0;
2591 dup_tensor_symbol_info[i].assign_ref = dup_tensor_block_ref[(dup_tensor_symbol_info[i].assign_ref - 1) * unroll_count + unroll_count - 1] + 1;
2592 assert(dup_tensor_symbol_info[i].assign_ref)((void) sizeof ((dup_tensor_symbol_info[i].assign_ref) ? 1 : 0
), __extension__ ({ if (dup_tensor_symbol_info[i].assign_ref)
; else __assert_fail ("dup_tensor_symbol_info[i].assign_ref"
, "ccv_nnc_symbolic_graph_compile.c", 2592, __extension__ __PRETTY_FUNCTION__
); }))
;
2593 dup_tensor_symbol_info[dup_tensor_symbol_info[i].assign_ref - 1].r_assign_ref = i + 1;
2594 }
2595}
2596
2597// If the tensor blocks are the outputs of this graph, its life-time should be extended to the end of this graph.
2598// However, it is not that simple if the graph is unrolled. For unrolled graph, it needs to reach the end of
2599// the "original" graph and all its duplicated ends (for their duplicated tensor blocks).
2600static void _ccv_nnc_fixup_tensor_blocks_for_outputs(ccv_sparse_matrix_t* const exec_dep, ccv_nnc_tensor_block_t* const tensor_blocks, const ccv_nnc_graph_exec_symbol_info_t* const p_node_info, const int unroll_count, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const int p_idx, const ccv_nnc_tensor_symbol_info_t* const p_tensor_symbol_info, const int p_tensor_symbol_info_size, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const int* const dup_exec_ref, const int* const dup_tensor_block_ref)
2601{
2602 int i, j, k;
2603 for (i = 0; i < p_node_info->output_size; i++)
2604 {
2605 const int d = p_node_info->outputs[i];
2606 const int s_ref = *(int*)ccv_array_get(p_tensor_symbol_info[d].s_ref, p_idx)((void*)(((char*)((p_tensor_symbol_info[d].s_ref)->data)) +
(size_t)(p_tensor_symbol_info[d].s_ref)->rsize * (size_t)
(p_idx)))
- 1;
2607 if (!TENSOR_EXPECT_COMPUTABLE(tensor_blocks[s_ref])(!((tensor_blocks[s_ref].flags & 0x3) == ALIAS) &&
!((tensor_blocks[s_ref].flags & 0x3) == UNASSIGNED))
)
2608 continue;
2609 for (k = 0; k < destination_size; k++)
2610 _ccv_nnc_tensor_block_add_exec(exec_dep, destinations[k].d, tensor_blocks[s_ref]);
2611 // Add the duplicated destinations to the tensor_block_ref.
2612 for (j = 0; j < unroll_count; j++)
2613 for (k = 0; k < destination_size; k++)
2614 {
2615 const int dup_exec_idx = dup_exec_ref[destinations[k].d * unroll_count + j];
2616 const int dup_tensor_block_idx = dup_tensor_block_ref[s_ref * unroll_count + j];
2617 if (dup_exec_idx >= 0 && dup_tensor_block_idx >= 0)
2618 _ccv_nnc_tensor_block_add_exec(exec_dep, dup_exec_idx, tensor_blocks[dup_tensor_block_idx]);
2619 }
2620 }
2621}
2622
2623static void _ccv_nnc_redo_exec_dep_and_tensor_blocks_when_unroll(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_graph_exec_symbol_info_t* const p_node_info, const ccv_nnc_graph_visit_t* const visit, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_tensor_symbol_info_t* const p_tensor_symbol_info, const int p_tensor_symbol_info_size, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, ccv_nnc_graph_exec_flag_t* const exec_flags, ccv_sparse_matrix_t** r_exec_dep, ccv_nnc_tensor_block_t** r_tensor_blocks, int* r_tensor_block_size, ccv_nnc_symbolic_graph_t** r_dup_graph, int* r_unroll_count, int** r_dup_exec_ref, int** r_dup_tensor_block_ref)
2624{
2625 int i, j;
2626 ccv_sparse_matrix_t* exec_dep = *r_exec_dep;
2627 ccv_nnc_tensor_block_t* tensor_blocks = *r_tensor_blocks;
2628 // blocks that cannot be simply solved with either in-place operation tensor block folding or using the same memory region.
2629 // Unfortunately, I cannot do this analysis to the block folding done for sub-graphs, because we do sub-graph placement later.
2630 // No need to change anything, we are good.
2631 const int unroll_count = _ccv_nnc_exec_dep_and_tensor_blocks_unroll_count(symbolic_graph, tensor_symbol_info, exec_dep, tensor_blocks);
2632 if (!unroll_count)
2633 return;
2634 // Have conditions that cannot be satisfied with simple solution (allocate to the same memory region).
2635 // Doing graph expansion, first duplicate the old graph, but replace all sub graphs with noop.
2636 ccv_nnc_symbolic_graph_t* dup_graph = ccv_nnc_symbolic_graph_dup(symbolic_graph, _ccv_nnc_subst_sub_graph_with_noop);
2637 int* dup_exec_ref = (int*)ccmallocmalloc(sizeof(int) * symbolic_graph->exec_symbol_info->rnum * unroll_count);
2638 int* dup_tensor_block_ref = (int*)ccmallocmalloc(sizeof(int) * symbolic_graph->tensor_symbol_info->rnum * unroll_count);
2639 _ccv_nnc_exec_dep_and_tensor_blocks_unroll_n(symbolic_graph, visit, unroll_count, exec_symbol_info, tensor_symbol_info, exec_dep, tensor_blocks, dup_graph, dup_tensor_block_ref, dup_exec_ref);
2640 ccv_nnc_tensor_symbol_info_t* const dup_tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * dup_graph->tensor_symbol_info->rnum);
2641 ccv_nnc_graph_exec_symbol_info_t* const dup_exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * dup_graph->exec_symbol_info->rnum);
2642 ccv_nnc_graph_visit_t* dup_visit = ccv_nnc_graph_visit_new(dup_graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(dup_graph->exec_symbol_info, 0), dup_graph->exec_symbol_info->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_graph->sources, 0), dup_graph->sources->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_graph->destinations, 0), dup_graph->destinations->rnum, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((dup_graph->exec_symbol_info->rnum) - 1)); _visit_->
size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c
; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (dup_graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*
)((void*)(((char*)((dup_graph->exec_symbol_info)->data)
) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_i_].outgoings->rnum : 0; const int _heap_mem_ = (
dup_graph->exec_symbol_info->rnum + _incoming_edges_ >
1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
dup_graph->exec_symbol_info->rnum) + sizeof(int32_t) * (
(dup_graph->exec_symbol_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (dup_graph->exec_symbol_info->
rnum) + sizeof(int32_t) * ((dup_graph->exec_symbol_info->
rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof
(ccv_nnc_incoming_t) * (dup_graph->exec_symbol_info->rnum
)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (dup_graph
->exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (dup_graph
->exec_symbol_info->rnum)) + (dup_graph->exec_symbol_info
->rnum), }; int32_t* const _edges_ = _exists_[1] + (dup_graph
->exec_symbol_info->rnum); for (_i_ = 0; _i_ < (dup_graph
->sources->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((dup_graph->sources)->data)) + (size_t
)(dup_graph->sources)->rsize * (size_t)(0))))[_i_].graph
== dup_graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((dup_graph->sources)->data)) + (size_t
)(dup_graph->sources)->rsize * (size_t)(0))))[_i_].graph
== dup_graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->sources)->data)) + (size_t)(dup_graph->sources)->rsize * (size_t)(0))))[_i_].graph == dup_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((dup_graph->sources)->data)) + (size_t)(dup_graph
->sources)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_
[2] = { (dup_graph->sources->rnum), 0, }; int _p_ = 0, _q_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 1) continue
; _incomings_[_idx_].r = 1; if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((dup_graph->exec_symbol_info)->data)) + (size_t
)(dup_graph->exec_symbol_info)->rsize * (size_t)(0))))[
_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_
[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (dup_graph->sources->rnum); _i_++) { (
(void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((void*)(((char
*)((dup_graph->sources)->data)) + (size_t)(dup_graph->
sources)->rsize * (size_t)(0))))[_i_].graph == dup_graph) ?
1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t*)
((void*)(((char*)((dup_graph->sources)->data)) + (size_t
)(dup_graph->sources)->rsize * (size_t)(0))))[_i_].graph
== dup_graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->sources)->data)) + (size_t)(dup_graph->sources)->rsize * (size_t)(0))))[_i_].graph == dup_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((dup_graph->sources)->data)) + (size_t)(dup_graph
->sources)->rsize * (size_t)(0))))[_i_].d; } _exist_size_
[0] = (dup_graph->sources->rnum); _exist_size_[1] = 0; _p_
= 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 2) continue; _incomings_[_idx_].r = 2
; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((
dup_graph->exec_symbol_info)->data)) + (size_t)(dup_graph
->exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t*)
((void*)(((char*)((dup_graph->exec_symbol_info)->data))
+ (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum; _j_++) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((dup_graph->exec_symbol_info)->data)) + (size_t
)(dup_graph->exec_symbol_info)->rsize * (size_t)(0))))[
_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_
[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; _exists_[
_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) =
(_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(dup_graph->destinations->rnum); _i_++) { ((void) sizeof
((((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph
->destinations)->data)) + (size_t)(dup_graph->destinations
)->rsize * (size_t)(0))))[_i_].graph == dup_graph) ? 1 : 0
), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((dup_graph->destinations)->data)) + (size_t
)(dup_graph->destinations)->rsize * (size_t)(0))))[_i_]
.graph == dup_graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->destinations)->data)) + (size_t)(dup_graph->destinations)->rsize * (size_t)(0))))[_i_].graph == dup_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((dup_graph->destinations)->data)) + (size_t
)(dup_graph->destinations)->rsize * (size_t)(0))))[_i_]
.d; } _exist_size_[0] = (dup_graph->destinations->rnum)
; _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[
_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r != 2) continue; _incomings_[_idx_].r = 3
; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ <
_incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_
[_idx_].edges - 1 + _j_]; _exists_[_q_][_exist_size_[_q_]] = d
; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_
) = (_i_)); } for (_i_ = 0; _i_ < (dup_graph->destinations
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((dup_graph->destinations)->data)) +
(size_t)(dup_graph->destinations)->rsize * (size_t)(0)
)))[_i_].graph == dup_graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->
destinations)->data)) + (size_t)(dup_graph->destinations
)->rsize * (size_t)(0))))[_i_].graph == dup_graph) ; else __assert_fail
("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->destinations)->data)) + (size_t)(dup_graph->destinations)->rsize * (size_t)(0))))[_i_].graph == dup_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)((
(char*)((dup_graph->destinations)->data)) + (size_t)(dup_graph
->destinations)->rsize * (size_t)(0))))[_i_].d].d = 1; }
for (_i_ = 0; _i_ < (dup_graph->sources->rnum); _i_
++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((dup_graph->sources)->data)) + (size_t)(dup_graph
->sources)->rsize * (size_t)(0))))[_i_].graph == dup_graph
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((dup_graph->sources)->data)) + (size_t
)(dup_graph->sources)->rsize * (size_t)(0))))[_i_].graph
== dup_graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->sources)->data)) + (size_t)(dup_graph->sources)->rsize * (size_t)(0))))[_i_].graph == dup_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((dup_graph->sources)->data)) + (size_t)(dup_graph
->sources)->rsize * (size_t)(0))))[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (dup_graph->sources->rnum); _exist_size_
[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_
->size].index = ((_idx_)); _visit_->node[_visit_->size
].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings->rnum == 1) { const int d = *(int
*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((dup_graph->exec_symbol_info)->data)) + (size_t
)(dup_graph->exec_symbol_info)->rsize * (size_t)(0))))[
_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((dup_graph->exec_symbol_info)->data
)) + (size_t)(dup_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (dup_graph->destinations->rnum))
{ _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_
< ((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((
dup_graph->exec_symbol_info)->data)) + (size_t)(dup_graph
->exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((dup_graph
->exec_symbol_info)->data)) + (size_t)(dup_graph->exec_symbol_info
)->rsize * (size_t)(0))))[_idx_].outgoings)->data)) + (
size_t)(((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)
((dup_graph->exec_symbol_info)->data)) + (size_t)(dup_graph
->exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 3 && _d_ <
(dup_graph->destinations->rnum)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (dup_graph
->destinations->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((dup_graph->destinations)->data)) +
(size_t)(dup_graph->destinations)->rsize * (size_t)(0)
)))[_i_].graph == dup_graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->
destinations)->data)) + (size_t)(dup_graph->destinations
)->rsize * (size_t)(0))))[_i_].graph == dup_graph) ; else __assert_fail
("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->destinations)->data)) + (size_t)(dup_graph->destinations)->rsize * (size_t)(0))))[_i_].graph == dup_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((dup_graph->destinations)->data)) + (size_t
)(dup_graph->destinations)->rsize * (size_t)(0))))[_i_]
.d].r == 4) continue; if (!(0)) { ((void) sizeof ((_incomings_
[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->
destinations)->data)) + (size_t)(dup_graph->destinations
)->rsize * (size_t)(0))))[_i_].d].c == 0) ? 1 : 0), __extension__
({ if (_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)((
(char*)((dup_graph->destinations)->data)) + (size_t)(dup_graph
->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0)
; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((dup_graph->destinations)->data)) + (size_t)(dup_graph->destinations)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[((ccv_nnc_graph_exec_symbol_t*
)((void*)(((char*)((dup_graph->destinations)->data)) + (
size_t)(dup_graph->destinations)->rsize * (size_t)(0)))
)[_i_].d].c > 0) continue; _visit_->node[_visit_->size
].index = ((((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)(
(dup_graph->destinations)->data)) + (size_t)(dup_graph->
destinations)->rsize * (size_t)(0))))[_i_].d)); _visit_->
node[_visit_->size].term = ((_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((dup_graph->destinations)->data)) +
(size_t)(dup_graph->destinations)->rsize * (size_t)(0)
)))[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_) free(
_incomings_); } while (0);; ((void) sizeof ((_visit_->size
<= (dup_graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
({ if (_visit_->size <= (dup_graph->exec_symbol_info
->rnum)) ; else __assert_fail ("_visit_->size <= (dup_graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_compile.c", 2642, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
2643 ccv_nnc_symbolic_graph_symbol_infer(dup_graph, dup_visit, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_graph->sources, 0)((void*)(((char*)((dup_graph->sources)->data)) + (size_t
)(dup_graph->sources)->rsize * (size_t)(0)))
, dup_graph->sources->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_graph->destinations, 0)((void*)(((char*)((dup_graph->destinations)->data)) + (
size_t)(dup_graph->destinations)->rsize * (size_t)(0)))
, dup_graph->destinations->rnum, p_tensor_symbol_info, p_tensor_symbol_info_size, dup_tensor_symbol_info, dup_exec_symbol_info);
2644 _ccv_nnc_fixup_assign_ref_after_unroll(symbolic_graph, unroll_count, tensor_blocks, dup_tensor_block_ref, dup_tensor_symbol_info);
2645 // Free out the old exec_dep
2646 ccv_matrix_free(exec_dep);
2647 // and the tensor blocks, prepare for the new.
2648 _ccv_nnc_tensor_blocks_free(tensor_blocks, symbolic_graph->tensor_symbol_info->rnum);
2649 // A reverse map to find where the original tensor comes from.
2650 int* dup_tensor_from_ref = (int*)ccmallocmalloc(sizeof(int) * dup_graph->tensor_symbol_info->rnum);
2651 for (i = 0; i < dup_graph->tensor_symbol_info->rnum; i++)
2652 dup_tensor_from_ref[i] = -1;
2653 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2654 for (j = 0; j < unroll_count; j++)
2655 if (dup_tensor_block_ref[i * unroll_count + j] >= 0)
2656 dup_tensor_from_ref[dup_tensor_block_ref[i * unroll_count + j]] = i;
2657 int* dup_exec_from_ref = (int*)ccmallocmalloc(sizeof(int) * dup_graph->exec_symbol_info->rnum);
2658 for (i = 0; i < dup_graph->exec_symbol_info->rnum; i++)
2659 dup_exec_from_ref[i] = -1;
2660 for (i = 0; i < symbolic_graph->exec_symbol_info->rnum; i++)
2661 {
2662 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info[i].flags)((exec_symbol_info[i].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
2663 continue;
2664 dup_exec_from_ref[i] = i; // Reference back.
2665 for (j = 0; j < unroll_count; j++)
2666 if (dup_exec_ref[i * unroll_count + j] >= 0)
2667 dup_exec_from_ref[dup_exec_ref[i * unroll_count + j]] = i;
2668 }
2669 // Reset all attr.
2670 memset(exec_flags, 0, sizeof(ccv_nnc_graph_exec_flag_t) * symbolic_graph->exec_symbol_info->rnum);
2671 _ccv_nnc_exec_dep_and_tensor_blocks_prep(dup_graph, p_node_info, dup_visit, tensor_binds, tensor_bind_size, 0, 0, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_graph->sources, 0)((void*)(((char*)((dup_graph->sources)->data)) + (size_t
)(dup_graph->sources)->rsize * (size_t)(0)))
, dup_graph->sources->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_graph->destinations, 0)((void*)(((char*)((dup_graph->destinations)->data)) + (
size_t)(dup_graph->destinations)->rsize * (size_t)(0)))
, dup_graph->destinations->rnum, p_tensor_symbol_info, p_tensor_symbol_info_size, dup_exec_symbol_info, dup_tensor_symbol_info, unroll_count, dup_tensor_block_ref, dup_tensor_from_ref, dup_exec_from_ref, exec_flags, &exec_dep, &tensor_blocks);
2672 ccv_nnc_graph_visit_free(dup_visit);
2673 ccfreefree(dup_exec_symbol_info);
2674 ccfreefree(dup_exec_from_ref);
2675 ccfreefree(dup_tensor_from_ref);
2676 // Assign out dup_p_ref, which will be used to extended the anonymous block life-time.
2677 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2678 // Loop over all possible duplications to assign dup_p_ref properly.
2679 for (j = 0; j < unroll_count; j++)
2680 {
2681 const int dup_idx = dup_tensor_block_ref[j + i * unroll_count];
2682 if (dup_idx >= 0 && (tensor_blocks[i].p_refs[0] || tensor_blocks[i].p_refs[1]))
2683 {
2684 const int p_ref_0 = tensor_blocks[i].p_refs[0] - 1;
2685 const int p_ref_0_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref_0, p_node_info);
2686 if (p_ref_0_is_in_or_out == 1) // If it is out tensor, mark dup_p_ref for this.
2687 {
2688 if (!tensor_blocks[dup_idx].dup_p_refs)
2689 tensor_blocks[dup_idx].dup_p_refs = ccv_array_new(sizeof(int), 1, 0);
2690 ccv_array_add_unique_int(tensor_blocks[dup_idx].dup_p_refs, p_ref_0);
2691 }
2692 if (p_ref_0_is_in_or_out == 1 || tensor_blocks[i].p_refs[1] == 0)
2693 continue;
2694 const int p_ref_1 = tensor_blocks[i].p_refs[1] - 1;
2695 const int p_ref_1_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref_1, p_node_info);
2696 if (p_ref_1_is_in_or_out == 1)
2697 {
2698 if (!tensor_blocks[dup_idx].dup_p_refs)
2699 tensor_blocks[dup_idx].dup_p_refs = ccv_array_new(sizeof(int), 1, 0);
2700 ccv_array_add_unique_int(tensor_blocks[dup_idx].dup_p_refs, p_ref_1);
2701 }
2702 }
2703 }
2704 // companion_ref
2705 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
2706 // Now can assign them (The dup) as companion.
2707 if (!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[i])((tensor_blocks[i].flags & 0x3) == UNASSIGNED) && dup_tensor_symbol_info[i].assign_ref)
2708 {
2709 // Get to the last one, which we will wrap over.
2710 const int assign_ref = dup_tensor_symbol_info[i].assign_ref - 1;
2711 if (assign_ref >= 0)
2712 {
2713 int b_ref = assign_ref;
2714 while (tensor_blocks[b_ref].ref)
2715 b_ref = tensor_blocks[b_ref].ref - 1;
2716 int a_hop_b = _ccv_nnc_tensor_block_head_after_tail(exec_dep, tensor_blocks[i], tensor_blocks[b_ref]);
2717 int b_hop_a = _ccv_nnc_tensor_block_head_after_tail(exec_dep, tensor_blocks[b_ref], tensor_blocks[i]);
2718 // It cannot be that both i can hop to j can j can hop to i.
2719 // And it can be hop from one to another now after duplication.
2720 assert(a_hop_b > 0 || b_hop_a > 0)((void) sizeof ((a_hop_b > 0 || b_hop_a > 0) ? 1 : 0), __extension__
({ if (a_hop_b > 0 || b_hop_a > 0) ; else __assert_fail
("a_hop_b > 0 || b_hop_a > 0", "ccv_nnc_symbolic_graph_compile.c"
, 2720, __extension__ __PRETTY_FUNCTION__); }))
;
2721 tensor_blocks[i].companion_ref = b_ref + 1;
2722 tensor_blocks[b_ref].companion_ref = i + 1;
2723 }
2724 }
2725 ccfreefree(dup_tensor_symbol_info);
2726 // Extend the dup tensor block ref, prepare for future extensions.
2727 dup_tensor_block_ref = (int*)ccreallocrealloc(dup_tensor_block_ref, sizeof(int) * dup_graph->tensor_symbol_info->rnum * unroll_count);
2728 for (i = symbolic_graph->tensor_symbol_info->rnum * unroll_count; i < dup_graph->tensor_symbol_info->rnum * unroll_count; i++)
2729 dup_tensor_block_ref[i] = -1;
2730 // Assign out changed properties.
2731 *r_exec_dep = exec_dep;
2732 *r_tensor_blocks = tensor_blocks;
2733 *r_tensor_block_size = dup_graph->tensor_symbol_info->rnum;
2734 *r_dup_graph = dup_graph;
2735 *r_unroll_count = unroll_count;
2736 *r_dup_exec_ref = dup_exec_ref;
2737 *r_dup_tensor_block_ref = dup_tensor_block_ref;
2738}
2739
2740static int _ccv_nnc_anonymous_tensor_block_from_free_list(const ccv_nnc_tensor_block_t* const tensor_blocks, const int tensor_block_size, const ccv_array_t* const anonymous_block_free_list, const int anonymous_block_free_list_cap, const int type, const uint64_t size, const ccv_sparse_matrix_t* const exec_dep, const ccv_array_t* const dup_p_refs)
2741{
2742 if (!anonymous_block_free_list || !anonymous_block_free_list_cap)
2743 return tensor_block_size;
2744 int i;
2745 const int no_dup_p_refs = (!dup_p_refs || !dup_p_refs->rnum);
2746 int found_idx = tensor_block_size;
2747 for (i = 0; i < anonymous_block_free_list_cap; i++)
2748 {
2749 const int idx = *(int*)ccv_array_get(anonymous_block_free_list, i)((void*)(((char*)((anonymous_block_free_list)->data)) + (size_t
)(anonymous_block_free_list)->rsize * (size_t)(i)))
;
2750 assert(idx < tensor_block_size)((void) sizeof ((idx < tensor_block_size) ? 1 : 0), __extension__
({ if (idx < tensor_block_size) ; else __assert_fail ("idx < tensor_block_size"
, "ccv_nnc_symbolic_graph_compile.c", 2750, __extension__ __PRETTY_FUNCTION__
); }))
;
2751 // If the type doesn't match, ignore.
2752 if (tensor_blocks[idx].type != type)
2753 continue;
2754 // Heuristic about how to select the best tensor block to move forward.
2755 // If the size is larger, and no dup_p_refs, found, I cannot do better than this, just return directly.
2756 if (tensor_blocks[idx].size >= size)
2757 {
2758 if (no_dup_p_refs)
2759 return idx;
2760 // Otherwise, only if the current tensor block's dup_p_refs is after (or at) the dup_p_refs,
2761 // then we cannot do better than this, if that is the case, just return.
2762 if (tensor_blocks[idx].dup_p_refs && tensor_blocks[idx].dup_p_refs->rnum &&
2763 _ccv_nnc_tensor_block_a_after_b_inclusively(exec_dep, tensor_blocks[idx].dup_p_refs, dup_p_refs))
2764 return idx;
2765 }
2766 int64_t found_idx_size_diff;
2767 int64_t idx_size_diff;
2768 if (found_idx == tensor_block_size || // If no found_idx yet, set the current one to be the found one, and continue.
2769 // Now, compare whether this one or the found_idx one is better.
2770 // At this point, there is no point of comparing the dup_p_refs, we only care about which one
2771 // is closer to the size we request. Only on a tie, dup_p_refs or not is important again.
2772 (found_idx_size_diff = llabs((int64_t)tensor_blocks[found_idx].size - (int64_t)size)) < (idx_size_diff = llabs((int64_t)tensor_blocks[idx].size - (int64_t)size)))
2773 {
2774 found_idx = idx;
2775 continue;
2776 }
2777 // No need to update if found_idx is better than idx.
2778 if (found_idx_size_diff > idx_size_diff)
2779 continue;
2780 // We bias towards the bigger one in case of similar.
2781 if (found_idx_size_diff == idx_size_diff && tensor_blocks[idx].size > tensor_blocks[found_idx].size)
2782 {
2783 found_idx = idx;
2784 continue;
2785 }
2786 assert(tensor_blocks[idx].size == tensor_blocks[found_idx].size)((void) sizeof ((tensor_blocks[idx].size == tensor_blocks[found_idx
].size) ? 1 : 0), __extension__ ({ if (tensor_blocks[idx].size
== tensor_blocks[found_idx].size) ; else __assert_fail ("tensor_blocks[idx].size == tensor_blocks[found_idx].size"
, "ccv_nnc_symbolic_graph_compile.c", 2786, __extension__ __PRETTY_FUNCTION__
); }))
;
2787 // On a tie, check which one has tighter life-cycle.
2788 if (tensor_blocks[idx].size >= size) // If this block size covers the size we request, we prefer longer life-cycle ones.
2789 {
2790 // Check whether the current tensor blocks life-cycle is longer than the previous one.
2791 if (tensor_blocks[idx].dup_p_refs && tensor_blocks[idx].dup_p_refs->rnum > 0 &&
2792 (!tensor_blocks[found_idx].dup_p_refs || !tensor_blocks[found_idx].dup_p_refs->rnum ||
2793 _ccv_nnc_tensor_block_a_after_b_inclusively(exec_dep, tensor_blocks[idx].dup_p_refs, tensor_blocks[found_idx].dup_p_refs)))
2794 found_idx = idx;
2795 continue;
2796 }
2797 // Now both our size is smaller than requested size, in this case, we need to increase the tensor block size.
2798 // We prefer to choose the one that has life-cycle closer to the expected ones.
2799 if (no_dup_p_refs)
2800 {
2801 // Whoever is shorter wins.
2802 if (tensor_blocks[found_idx].dup_p_refs && tensor_blocks[found_idx].dup_p_refs->rnum > 0 &&
2803 (!tensor_blocks[idx].dup_p_refs || !tensor_blocks[idx].dup_p_refs->rnum ||
2804 _ccv_nnc_tensor_block_a_after_b_inclusively(exec_dep, tensor_blocks[found_idx].dup_p_refs, tensor_blocks[idx].dup_p_refs)))
2805 found_idx = idx;
2806 continue;
2807 }
2808 if (!tensor_blocks[idx].dup_p_refs || !tensor_blocks[idx].dup_p_refs->rnum)
2809 continue;
2810 if (!tensor_blocks[found_idx].dup_p_refs || !tensor_blocks[found_idx].dup_p_refs->rnum)
2811 {
2812 found_idx = idx;
2813 continue;
2814 }
2815 // If both covers the request dup_p_refs, we prefer the shorter one, otherwise we prefer the longer one.
2816 const int idx_after_request = _ccv_nnc_tensor_block_a_after_b_inclusively(exec_dep, tensor_blocks[idx].dup_p_refs, dup_p_refs);
2817 const int found_idx_after_request = _ccv_nnc_tensor_block_a_after_b_inclusively(exec_dep, tensor_blocks[found_idx].dup_p_refs, dup_p_refs);
2818 if (idx_after_request && found_idx_after_request)
2819 {
2820 if (_ccv_nnc_tensor_block_a_after_b_inclusively(exec_dep, tensor_blocks[found_idx].dup_p_refs, tensor_blocks[idx].dup_p_refs))
2821 found_idx = idx;
2822 continue;
2823 } else {
2824 // We entered this branch must be either idx_after_request is false or found_idx_after_request is false or both.
2825 // If found_idx_after_request is not false, we are currently doing fine, no need to proceed.
2826 // Otherwise, if idx_after_request is true, it is preferred. If both are false, then prefer the longer one.
2827 if (!found_idx_after_request && (idx_after_request ||
2828 _ccv_nnc_tensor_block_a_after_b_inclusively(exec_dep, tensor_blocks[idx].dup_p_refs, tensor_blocks[found_idx].dup_p_refs)))
2829 found_idx = idx;
2830 continue;
2831 }
2832 }
2833 return found_idx;
2834}
2835
2836static ccv_array_t* _ccv_nnc_dup_breakpoints_with_p_node_inputs(ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_graph_exec_symbol_info_t* const p_node_info)
2837{
2838 if (!(p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)))
2839 return 0;
2840 int i, j, k;
2841 int input_size = 0;
2842 for (i = 0; i < p_node_info->p_while.input_size; i++)
2843 if (p_node_info->p_while.inputs[i] >= 0)
2844 ++input_size;
2845 // If doesn't have tensor inputs (thus, only special inputs), just return.
2846 if (!input_size)
2847 return 0;
2848 ccv_nnc_tensor_symbol_t inputs[input_size];
2849 input_size = 0;
2850 for (i = 0; i < p_node_info->p_while.input_size; i++)
2851 if (p_node_info->p_while.inputs[i] >= 0)
2852 inputs[input_size++] = (ccv_nnc_tensor_symbol_t){
2853 .d = p_node_info->p_while.inputs[i],
2854 .graph = symbolic_graph,
2855 };
2856 assert(symbolic_graph->breakpoint_size > 0)((void) sizeof ((symbolic_graph->breakpoint_size > 0) ?
1 : 0), __extension__ ({ if (symbolic_graph->breakpoint_size
> 0) ; else __assert_fail ("symbolic_graph->breakpoint_size > 0"
, "ccv_nnc_symbolic_graph_compile.c", 2856, __extension__ __PRETTY_FUNCTION__
); }))
;
2857 ccv_array_t* dup_breakpoints = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), symbolic_graph->breakpoint_size, 0);
2858 const int exec_symbol_info_size = symbolic_graph->exec_symbol_info->rnum;
2859 for (i = 0; i < symbolic_graph->breakpoint_size; i++)
2860 {
2861 // Make a noop copy of the breakpoint, but with some tensor inputs.
2862 ccv_nnc_graph_exec_symbol_t noop = ccv_nnc_graph_exec_symbol_new(symbolic_graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), inputs, input_size, 0, 0, 0);
2863 ccv_array_push(dup_breakpoints, &noop);
2864 // Connect this noop to the outgoing nodes of breakpoints.
2865 const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(symbolic_graph->exec_symbol_info, symbolic_graph->breakpoints[i].d)((void*)(((char*)((symbolic_graph->exec_symbol_info)->data
)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize *
(size_t)(symbolic_graph->breakpoints[i].d)))
;
2866 if (symbol_info->outgoings)
2867 for (j = 0; j < symbol_info->outgoings->rnum; j++)
2868 {
2869 const int d = *(int*)ccv_array_get(symbol_info->outgoings, j)((void*)(((char*)((symbol_info->outgoings)->data)) + (size_t
)(symbol_info->outgoings)->rsize * (size_t)(j)))
;
2870 ccv_nnc_graph_exec_symbol_concat(symbolic_graph, noop, (ccv_nnc_graph_exec_symbol_t){
2871 .d = d,
2872 .graph = symbolic_graph,
2873 });
2874 }
2875 }
2876 for (i = 0; i < exec_symbol_info_size; i++)
2877 {
2878 const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(symbolic_graph->exec_symbol_info, i)((void*)(((char*)((symbolic_graph->exec_symbol_info)->data
)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize *
(size_t)(i)))
;
2879 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(symbol_info->flags)((symbol_info->flags) & CCV_NNC_GRAPH_EXEC_DEAD))
2880 continue;
2881 if (symbol_info->outgoings)
2882 {
2883 const int outgoing_size = symbol_info->outgoings->rnum;
2884 for (j = 0; j < outgoing_size; j++)
2885 {
2886 const int d = *(int*)ccv_array_get(symbol_info->outgoings, j)((void*)(((char*)((symbol_info->outgoings)->data)) + (size_t
)(symbol_info->outgoings)->rsize * (size_t)(j)))
;
2887 for (k = 0; k < symbolic_graph->breakpoint_size; k++)
2888 if (d == symbolic_graph->breakpoints[k].d)
2889 {
2890 ccv_nnc_graph_exec_symbol_t noop = *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_breakpoints, k)((void*)(((char*)((dup_breakpoints)->data)) + (size_t)(dup_breakpoints
)->rsize * (size_t)(k)))
;
2891 ccv_nnc_graph_exec_symbol_concat(symbolic_graph, (ccv_nnc_graph_exec_symbol_t){
2892 .d = i,
2893 .graph = symbolic_graph,
2894 }, noop);
2895 // Found, connected, exit.
2896 break;
2897 }
2898 }
2899 }
2900 }
2901 // Add the dup_breakpoints to source if neccessary.
2902 assert(symbolic_graph->sources)((void) sizeof ((symbolic_graph->sources) ? 1 : 0), __extension__
({ if (symbolic_graph->sources) ; else __assert_fail ("symbolic_graph->sources"
, "ccv_nnc_symbolic_graph_compile.c", 2902, __extension__ __PRETTY_FUNCTION__
); }))
;
2903 const int source_size = symbolic_graph->sources->rnum;
2904 for (i = 0; i < source_size; i++)
2905 {
2906 const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(symbolic_graph->sources, i)((void*)(((char*)((symbolic_graph->sources)->data)) + (
size_t)(symbolic_graph->sources)->rsize * (size_t)(i)))
)->d;
2907 for (j = 0; j < symbolic_graph->breakpoint_size; j++)
2908 if (d == symbolic_graph->breakpoints[j].d)
2909 {
2910 ccv_nnc_graph_exec_symbol_t noop = *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_breakpoints, j)((void*)(((char*)((dup_breakpoints)->data)) + (size_t)(dup_breakpoints
)->rsize * (size_t)(j)))
;
2911 ccv_nnc_symbolic_graph_add_source(symbolic_graph, noop);
2912 // Found, made, exit.
2913 break;
2914 }
2915 }
2916 // Add the dup_breakpoints to destination if neccessary.
2917 assert(symbolic_graph->destinations)((void) sizeof ((symbolic_graph->destinations) ? 1 : 0), __extension__
({ if (symbolic_graph->destinations) ; else __assert_fail
("symbolic_graph->destinations", "ccv_nnc_symbolic_graph_compile.c"
, 2917, __extension__ __PRETTY_FUNCTION__); }))
;
2918 const int destination_size = symbolic_graph->destinations->rnum;
2919 for (i = 0; i < destination_size; i++)
2920 {
2921 const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(symbolic_graph->destinations, i)((void*)(((char*)((symbolic_graph->destinations)->data)
) + (size_t)(symbolic_graph->destinations)->rsize * (size_t
)(i)))
)->d;
2922 for (j = 0; j < symbolic_graph->breakpoint_size; j++)
2923 if (d == symbolic_graph->breakpoints[j].d)
2924 {
2925 ccv_nnc_graph_exec_symbol_t noop = *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(dup_breakpoints, j)((void*)(((char*)((dup_breakpoints)->data)) + (size_t)(dup_breakpoints
)->rsize * (size_t)(j)))
;
2926 ccv_nnc_symbolic_graph_add_destination(symbolic_graph, noop);
2927 // Found, made, exit.
2928 break;
2929 }
2930 }
2931 return dup_breakpoints;
2932}
2933
2934// Plan out how we allocate tensor (should I do optimizations on graph here or not at all?).
2935static ccv_nnc_symbolic_graph_prep_t* _ccv_nnc_symbolic_graph_prep_new(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_tensor_symbol_info_t* const p_tensor_symbol_info, const int p_tensor_symbol_info_size, const ccv_nnc_graph_exec_symbol_info_t* const p_exec_symbol_info, const int p_exec_symbol_info_size)
2936{
2937 assert(source_size > 0)((void) sizeof ((source_size > 0) ? 1 : 0), __extension__ (
{ if (source_size > 0) ; else __assert_fail ("source_size > 0"
, "ccv_nnc_symbolic_graph_compile.c", 2937, __extension__ __PRETTY_FUNCTION__
); }))
;
2938 assert(destination_size > 0)((void) sizeof ((destination_size > 0) ? 1 : 0), __extension__
({ if (destination_size > 0) ; else __assert_fail ("destination_size > 0"
, "ccv_nnc_symbolic_graph_compile.c", 2938, __extension__ __PRETTY_FUNCTION__
); }))
;
2939 // First, fill all the "auto" holes.
2940 // This is the symbol table that with "auto" info filled up.
2941 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * symbolic_graph->tensor_symbol_info->rnum);
2942 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * symbolic_graph->exec_symbol_info->rnum);
2943 ccv_nnc_graph_exec_flag_t* exec_flags = (ccv_nnc_graph_exec_flag_t*)cccalloccalloc(symbolic_graph->exec_symbol_info->rnum, sizeof(ccv_nnc_graph_exec_flag_t));
2944 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(symbolic_graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(symbolic_graph->exec_symbol_info, 0), symbolic_graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((symbolic_graph->exec_symbol_info->rnum) - 1)); _visit_
->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t
c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (symbolic_graph->exec_symbol_info
->rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_i_].outgoings->rnum : 0; const int _heap_mem_
= (symbolic_graph->exec_symbol_info->rnum + _incoming_edges_
> 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_)
_incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (symbolic_graph->exec_symbol_info->rnum) + sizeof(int32_t
) * ((symbolic_graph->exec_symbol_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (symbolic_graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((symbolic_graph->exec_symbol_info
->rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof
(ccv_nnc_incoming_t) * (symbolic_graph->exec_symbol_info->
rnum)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (symbolic_graph
->exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (symbolic_graph
->exec_symbol_info->rnum)) + (symbolic_graph->exec_symbol_info
->rnum), }; int32_t* const _edges_ = _exists_[1] + (symbolic_graph
->exec_symbol_info->rnum); for (_i_ = 0; _i_ < (source_size
); _i_++) { ((void) sizeof (((sources)[_i_].graph == symbolic_graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == symbolic_graph
) ; else __assert_fail ("(sources)[_i_].graph == symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2944, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_
[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r == 1) continue; _incomings_[
_idx_].r = 1; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*
)(((char*)((symbolic_graph->exec_symbol_info)->data)) +
(size_t)(symbolic_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_)
)); ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d;
++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_
) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { (
(void) sizeof (((sources)[_i_].graph == symbolic_graph) ? 1 :
0), __extension__ ({ if ((sources)[_i_].graph == symbolic_graph
) ; else __assert_fail ("(sources)[_i_].graph == symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2944, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0
] = (source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int
_bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
== 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((symbolic_graph
->exec_symbol_info)->data)) + (size_t)(symbolic_graph->
exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((symbolic_graph
->exec_symbol_info)->data)) + (size_t)(symbolic_graph->
exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t*)(
(void*)(((char*)((symbolic_graph->exec_symbol_info)->data
)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize *
(size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_)))
; if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_
; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_
[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_
[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == symbolic_graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == symbolic_graph) ; else __assert_fail
("(destinations)[_i_].graph == symbolic_graph", "ccv_nnc_symbolic_graph_compile.c"
, 2944, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][
_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 2) continue; _incomings_[
_idx_].r = 3; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == symbolic_graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
symbolic_graph) ; else __assert_fail ("(destinations)[_i_].graph == symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 2944, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == symbolic_graph) ? 1 : 0), __extension__ ({ if
((sources)[_i_].graph == symbolic_graph) ; else __assert_fail
("(sources)[_i_].graph == symbolic_graph", "ccv_nnc_symbolic_graph_compile.c"
, 2944, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][
_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] =
(source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_
]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_
->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++
_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_
[_idx_].r = 4; } if (((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((symbolic_graph->exec_symbol_info)->data)) +
(size_t)(symbolic_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0)))
; --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 3 && _d_ < (destination_size)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_)
)); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 3 && _d_ < (destination_size)) { _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_
; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (destination_size); _i_++) { ((void) sizeof ((
(destinations)[_i_].graph == symbolic_graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == symbolic_graph) ; else __assert_fail
("(destinations)[_i_].graph == symbolic_graph", "ccv_nnc_symbolic_graph_compile.c"
, 2944, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_
[(destinations)[_i_].d].r == 4) continue; if (!(0)) { ((void)
sizeof ((_incomings_[(destinations)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(destinations)[_i_].d].c ==
0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_compile.c", 2944, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (symbolic_graph->exec_symbol_info->rnum)) ? 1
: 0), __extension__ ({ if (_visit_->size <= (symbolic_graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_visit_->size <= (symbolic_graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_compile.c", 2944, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
2945 ccv_nnc_symbolic_graph_symbol_infer(symbolic_graph, visit, sources, source_size, destinations, destination_size, p_tensor_symbol_info, p_tensor_symbol_info_size, tensor_symbol_info, exec_symbol_info);
2946 int i, j, k, p, q;
2947 const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = p_exec_symbol_info ? p_exec_symbol_info + (symbolic_graph->exec_idx - 1) : 0;
2948 ccv_sparse_matrix_t* exec_dep;
2949 ccv_nnc_tensor_block_t* tensor_blocks;
2950 _ccv_nnc_exec_dep_and_tensor_blocks_prep(symbolic_graph, p_node_info, visit, tensor_binds, tensor_bind_size, outputs, output_size, sources, source_size, destinations, destination_size, p_tensor_symbol_info, p_tensor_symbol_info_size, exec_symbol_info, tensor_symbol_info, 0, 0, 0, 0, exec_flags, &exec_dep, &tensor_blocks);
2951 int tensor_block_size = symbolic_graph->tensor_symbol_info->rnum;
2952 // Now, everything is prepared, tensor life is analyzed, inplace operations are collapsed, all tensor symbols and hints
2953 // are automatically filled in, and all the sub-graphs are processed.
2954 // There is a last step though, for a while loop, it is parameterized:
2955 // while (x > 5) {
2956 // y = x + 1;
2957 // } (y => x) // This means after this loop is done, y's value will be copied over to x.
2958 // we will do our best to avoid to do the actual data copy, what we do here is to check whether y can be x's alias.
2959 // If y can be x's alias, this is good, no other changes required. In above case, y can be x's alias because
2960 // it is a inplace operation.
2961 // But if y cannot be x's alias, for example, this while loop looks like this:
2962 // while (x > 5) {
2963 // y = x + a
2964 // b = x + y
2965 // } (y => x, b => a) // This means after this loop is done, y's value copied to x and b's value copied to a.
2966 // For this example, y cannot be x's alias because x is used later to compute b (and that computation
2967 // has dependency on y as well).
2968 // For this case, we need to modify the computation graph. Previously, the graph looks like this:
2969 // y = x + a -> b = x + y
2970 // This graph will be extended to look like this:
2971 // y0 = x0 + a0 -> b0 = x0 + y0 -> y1 = y0 + b0 -> b1 = y0 + y1, or:
2972 // while (x0 > 5) {
2973 // y0 = x0 + a0
2974 // b0 = x0 + y0
2975 // if (y0 > 5) break
2976 // y1 = y0 + b0
2977 // b1 = y0 + y1
2978 // } (y1 => x0, b1 => a0)
2979 // After this expansion, y1 now can be the alias of x0, as well as b1 can be alias of a0 (they don't interfere
2980 // with each other now).
2981 // With this algorithm, we don't need to insert any data copy logic, the only thing need is to switch pointers
2982 // which is covered by the tensor_multiview_t construct (thus, y (y0, y1), x (y1, y0), b (b0, b1), a (b1, b0))
2983 ccv_nnc_symbolic_graph_t* dup_graph = 0;
2984 int* dup_exec_ref = 0;
2985 int* dup_tensor_block_ref = 0;
2986 int unroll_count = 0;
2987 // In true recursive fashion, I need to call all the sub graphs and do the pre compilation for them one by one.
2988 ccv_nnc_symbolic_graph_prep_t* prep = (ccv_nnc_symbolic_graph_prep_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_prep_t));
2989 prep->graph = ccv_nnc_graph_new(); // Just allocate the graph right now.
2990 prep->flags = 0;
2991 // Cannot handle dup a node that is a graph as well.
2992 if (p_exec_symbol_info)
2993 {
2994 prep->flags = p_node_info->flags;
2995 if (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
2996 {
2997 _ccv_nnc_redo_exec_dep_and_tensor_blocks_when_unroll(symbolic_graph, p_node_info, visit, tensor_binds, tensor_bind_size, sources, source_size, destinations, destination_size, p_tensor_symbol_info, p_tensor_symbol_info_size, exec_symbol_info, tensor_symbol_info, exec_flags, &exec_dep, &tensor_blocks, &tensor_block_size, &dup_graph, &unroll_count, &dup_exec_ref, &dup_tensor_block_ref);
2998 _ccv_nnc_fixup_tensor_blocks_for_outputs(exec_dep, tensor_blocks, p_node_info, unroll_count, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(symbolic_graph->destinations, 0)((void*)(((char*)((symbolic_graph->destinations)->data)
) + (size_t)(symbolic_graph->destinations)->rsize * (size_t
)(0)))
, symbolic_graph->destinations->rnum, symbolic_graph->p_idx - 1, p_tensor_symbol_info, p_tensor_symbol_info_size, tensor_symbol_info, dup_exec_ref, dup_tensor_block_ref);
2999 } else if (p_node_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
3000 // TODO: We want to try our best to fit as much of its corresponding inputs / outputs into companion_ref group.
3001 }
3002 }
3003 ccv_nnc_symbolic_graph_prep_t** sub_preps = symbolic_graph->sub_graphs && symbolic_graph->sub_graphs->rnum ? (ccv_nnc_symbolic_graph_prep_t**)cccalloccalloc(symbolic_graph->sub_graphs->rnum, sizeof(ccv_nnc_symbolic_graph_prep_t*)) : 0;
3004 ccv_array_t* anonymous_block_free_list = 0;
3005 const int tensor_fold_size = (tensor_block_size + 31) >> 5;
3006 // Record whether this tensor is folded in this round.
3007 uint32_t* const tensor_fold = (uint32_t*)ccmallocmalloc(sizeof(uint32_t) * tensor_fold_size);
3008 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
3009 for (p = 0; p < node->graph_ref_size; p++)
3010 {
3011 assert(symbolic_graph->sub_graphs)((void) sizeof ((symbolic_graph->sub_graphs) ? 1 : 0), __extension__
({ if (symbolic_graph->sub_graphs) ; else __assert_fail (
"symbolic_graph->sub_graphs", "ccv_nnc_symbolic_graph_compile.c"
, 3011, __extension__ __PRETTY_FUNCTION__); }))
;
3012 ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(symbolic_graph->sub_graphs, CCV_NNC_GRAPH_REF(node)[p] - 1)((void*)(((char*)((symbolic_graph->sub_graphs)->data)) +
(size_t)(symbolic_graph->sub_graphs)->rsize * (size_t)
(((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (
node)->_inline_graph_ref)[p] - 1)))
;
3013 ccv_array_t* const dup_breakpoints = _ccv_nnc_dup_breakpoints_with_p_node_inputs(sub_graph, node);
3014 ccv_nnc_symbolic_graph_prep_t* const sub_prep = _ccv_nnc_symbolic_graph_prep_new(sub_graph, tensor_binds, tensor_bind_size, 0, 0, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sub_graph->sources, 0)((void*)(((char*)((sub_graph->sources)->data)) + (size_t
)(sub_graph->sources)->rsize * (size_t)(0)))
, sub_graph->sources->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sub_graph->destinations, 0)((void*)(((char*)((sub_graph->destinations)->data)) + (
size_t)(sub_graph->destinations)->rsize * (size_t)(0)))
, sub_graph->destinations->rnum, tensor_symbol_info, symbolic_graph->tensor_symbol_info->rnum, exec_symbol_info, symbolic_graph->exec_symbol_info->rnum);
3015 sub_prep->dup_breakpoints = dup_breakpoints;
3016 sub_prep->p = prep;
3017 sub_preps[CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[p] - 1] = sub_prep;
3018 const ccv_nnc_tensor_alloc_prep_t* const s_alloc_prep = sub_prep->alloc_prep;
3019 const ccv_nnc_tensor_block_t* const s_tensor_blocks = sub_prep->tensor_blocks;
3020 for (i = 0; i < s_alloc_prep->block_size; i++)
3021 {
3022 const int block_ref = s_alloc_prep->blocks[i].block_ref;
3023 const int buffer_ref = s_alloc_prep->blocks[i].buffer_ref;
3024 if (block_ref < sub_prep->tensor_symbol_info_size)
3025 {
3026 // If this block has a bypass, and its bypass has a different p_refs, then it doesn't matter.
3027 // I cannot assign p_refs to its parent buffer, and that buffer has to be anonymous.
3028 if (s_tensor_blocks[block_ref].bypass_ref)
3029 {
3030 int bypass_ref = s_tensor_blocks[block_ref].bypass_ref - 1;
3031 while (s_tensor_blocks[bypass_ref].ref)
3032 bypass_ref = s_tensor_blocks[bypass_ref].ref - 1;
3033 if (s_tensor_blocks[block_ref].p_refs[0] != s_tensor_blocks[bypass_ref].p_refs[0] ||
3034 s_tensor_blocks[block_ref].p_refs[1] != s_tensor_blocks[bypass_ref].p_refs[1])
3035 continue;
3036 }
3037 if (s_tensor_blocks[block_ref].p_refs[0])
3038 {
3039 /* If it is already properly assigned, next. */
3040 if (s_alloc_prep->buffers[buffer_ref].p_refs[0] != s_tensor_blocks[block_ref].p_refs[0] &&
3041 s_alloc_prep->buffers[buffer_ref].p_refs[1] != s_tensor_blocks[block_ref].p_refs[0])
3042 {
3043 if (!s_alloc_prep->buffers[buffer_ref].p_refs[0])
3044 s_alloc_prep->buffers[buffer_ref].p_refs[0] = s_tensor_blocks[block_ref].p_refs[0];
3045 else {
3046 assert(!s_alloc_prep->buffers[buffer_ref].p_refs[1])((void) sizeof ((!s_alloc_prep->buffers[buffer_ref].p_refs
[1]) ? 1 : 0), __extension__ ({ if (!s_alloc_prep->buffers
[buffer_ref].p_refs[1]) ; else __assert_fail ("!s_alloc_prep->buffers[buffer_ref].p_refs[1]"
, "ccv_nnc_symbolic_graph_compile.c", 3046, __extension__ __PRETTY_FUNCTION__
); }))
;
3047 s_alloc_prep->buffers[buffer_ref].p_refs[1] = s_tensor_blocks[block_ref].p_refs[0];
3048 }
3049 }
3050 /* When entering this branch, s_alloc_prep->buffers[buffer_ref].p_refs[0] cannot be 0. */
3051 if (s_tensor_blocks[block_ref].p_refs[1] &&
3052 s_alloc_prep->buffers[buffer_ref].p_refs[0] != s_tensor_blocks[block_ref].p_refs[1] &&
3053 s_alloc_prep->buffers[buffer_ref].p_refs[1] != s_tensor_blocks[block_ref].p_refs[1])
3054 {
3055 assert(s_alloc_prep->buffers[buffer_ref].p_refs[0])((void) sizeof ((s_alloc_prep->buffers[buffer_ref].p_refs[
0]) ? 1 : 0), __extension__ ({ if (s_alloc_prep->buffers[buffer_ref
].p_refs[0]) ; else __assert_fail ("s_alloc_prep->buffers[buffer_ref].p_refs[0]"
, "ccv_nnc_symbolic_graph_compile.c", 3055, __extension__ __PRETTY_FUNCTION__
); }))
;
3056 assert(!s_alloc_prep->buffers[buffer_ref].p_refs[1])((void) sizeof ((!s_alloc_prep->buffers[buffer_ref].p_refs
[1]) ? 1 : 0), __extension__ ({ if (!s_alloc_prep->buffers
[buffer_ref].p_refs[1]) ; else __assert_fail ("!s_alloc_prep->buffers[buffer_ref].p_refs[1]"
, "ccv_nnc_symbolic_graph_compile.c", 3056, __extension__ __PRETTY_FUNCTION__
); }))
;
3057 s_alloc_prep->buffers[buffer_ref].p_refs[1] = s_tensor_blocks[block_ref].p_refs[1];
3058 }
3059 }
3060 } else if (s_tensor_blocks[block_ref].dup_p_refs) {
3061 /* In this case, only relevant bit is dup_p_ref. dup_p_ref extends the life-time of anonymous block
3062 * which by default only has life-cycle shared with this sub-graph node. The reason to extend is that
3063 * these anonymous blocks that has dup_p_ref may contain data that will be used as output (thus, dup_p_ref
3064 * always points to an output tensor of this sub-graph node) therefore, the memory region must extend
3065 * its life-time to the end of the output tensor. */
3066 if (!s_alloc_prep->buffers[buffer_ref].dup_p_refs)
3067 s_alloc_prep->buffers[buffer_ref].dup_p_refs = ccv_array_new(sizeof(int), s_tensor_blocks[block_ref].dup_p_refs->rnum, 0);
3068 for (j = 0; j < s_tensor_blocks[block_ref].dup_p_refs->rnum; j++)
3069 ccv_array_add_unique_int(s_alloc_prep->buffers[buffer_ref].dup_p_refs, *(int*)ccv_array_get(s_tensor_blocks[block_ref].dup_p_refs, j)((void*)(((char*)((s_tensor_blocks[block_ref].dup_p_refs)->
data)) + (size_t)(s_tensor_blocks[block_ref].dup_p_refs)->
rsize * (size_t)(j)))
);
3070 }
3071 }
3072 }
3073 const int init_tensor_block_size = tensor_block_size;
3074 int rw_anonymous_buffer_size_cap = 0;
3075 int ro_anonymous_buffer_size_cap = 0;
3076 if (anonymous_block_free_list)
3077 ccv_array_clear(anonymous_block_free_list);
3078 memset(tensor_fold, 0, sizeof(uint32_t) * tensor_fold_size);
3079 for (p = 0; p < node->graph_ref_size; p++)
3080 {
3081 ccv_nnc_symbolic_graph_prep_t* const sub_prep = sub_preps[CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[p] - 1];
3082 const ccv_nnc_tensor_alloc_prep_t* const s_alloc_prep = sub_prep->alloc_prep;
3083 int rw_anonymous_buffer_size = 0;
3084 int ro_anonymous_buffer_size = 0;
3085 for (i = 0; i < s_alloc_prep->buffer_size; i++)
3086 if (s_alloc_prep->buffers[i].p_refs[0])
3087 {
3088 /* Reduce 2 p_refs, if it is, to 1 p_ref (by doing block folding). */
3089 int p_ref_0 = s_alloc_prep->buffers[i].p_refs[0] - 1;
3090 /* Need to go through refs. Since we reuse the tensor block for this input, it now has to have allocate at least this much space. */
3091 int p_ref_0_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref_0, node);
3092 assert(p_ref_0_is_in_or_out != 0)((void) sizeof ((p_ref_0_is_in_or_out != 0) ? 1 : 0), __extension__
({ if (p_ref_0_is_in_or_out != 0) ; else __assert_fail ("p_ref_0_is_in_or_out != 0"
, "ccv_nnc_symbolic_graph_compile.c", 3092, __extension__ __PRETTY_FUNCTION__
); }))
;
3093 int unref_p_ref_0 = p_ref_0;
3094 while (tensor_blocks[unref_p_ref_0].ref)
3095 unref_p_ref_0 = tensor_blocks[unref_p_ref_0].ref - 1;
3096 /* This parent tensor block cannot be unassigned because it is either input / output of this sub-graph node. */
3097 assert(!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[unref_p_ref_0]))((void) sizeof ((!((tensor_blocks[unref_p_ref_0].flags & 0x3
) == UNASSIGNED)) ? 1 : 0), __extension__ ({ if (!((tensor_blocks
[unref_p_ref_0].flags & 0x3) == UNASSIGNED)) ; else __assert_fail
("!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[unref_p_ref_0])", "ccv_nnc_symbolic_graph_compile.c"
, 3097, __extension__ __PRETTY_FUNCTION__); }))
;
3098 if (s_alloc_prep->buffers[i].p_refs[1])
3099 {
3100 int p_ref_1 = s_alloc_prep->buffers[i].p_refs[1] - 1;
3101 const int p_ref_1_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref_1, node);
3102 assert(p_ref_1_is_in_or_out != 0)((void) sizeof ((p_ref_1_is_in_or_out != 0) ? 1 : 0), __extension__
({ if (p_ref_1_is_in_or_out != 0) ; else __assert_fail ("p_ref_1_is_in_or_out != 0"
, "ccv_nnc_symbolic_graph_compile.c", 3102, __extension__ __PRETTY_FUNCTION__
); }))
;
3103 int unref_p_ref_1 = p_ref_1;
3104 while (tensor_blocks[unref_p_ref_1].ref)
3105 unref_p_ref_1 = tensor_blocks[unref_p_ref_1].ref - 1;
3106 /* See above comment for the similar p_ref_0 check. */
3107 assert(!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[unref_p_ref_1]))((void) sizeof ((!((tensor_blocks[unref_p_ref_1].flags & 0x3
) == UNASSIGNED)) ? 1 : 0), __extension__ ({ if (!((tensor_blocks
[unref_p_ref_1].flags & 0x3) == UNASSIGNED)) ; else __assert_fail
("!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[unref_p_ref_1])", "ccv_nnc_symbolic_graph_compile.c"
, 3107, __extension__ __PRETTY_FUNCTION__); }))
;
3108 assert(p_ref_0_is_in_or_out != p_ref_1_is_in_or_out)((void) sizeof ((p_ref_0_is_in_or_out != p_ref_1_is_in_or_out
) ? 1 : 0), __extension__ ({ if (p_ref_0_is_in_or_out != p_ref_1_is_in_or_out
) ; else __assert_fail ("p_ref_0_is_in_or_out != p_ref_1_is_in_or_out"
, "ccv_nnc_symbolic_graph_compile.c", 3108, __extension__ __PRETTY_FUNCTION__
); }))
;
3109 int p_ref_t;
3110 if (p_ref_0_is_in_or_out < p_ref_1_is_in_or_out) /* if p_ref_0 is input and p_ref_1 is output, switch. */
3111 {
3112 CCV_SWAP(p_ref_0, p_ref_1, p_ref_t)((p_ref_t) = (p_ref_0), (p_ref_0) = (p_ref_1), (p_ref_1) = (p_ref_t
))
;
3113 CCV_SWAP(unref_p_ref_0, unref_p_ref_1, p_ref_t)((p_ref_t) = (unref_p_ref_0), (unref_p_ref_0) = (unref_p_ref_1
), (unref_p_ref_1) = (p_ref_t))
;
3114 }
3115 p_ref_0_is_in_or_out = 1; /* Now p_ref_0 surely is the output tensor. */
3116 /* If the dimension matches, can fold. */
3117 if (memcmp(tensor_symbol_info[unref_p_ref_1].info.dim, tensor_symbol_info[unref_p_ref_0].info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0)
3118 {
3119 const int folded = _ccv_nnc_tensor_blocks_try_fold(tensor_blocks, unref_p_ref_1, unref_p_ref_0);
3120 if (folded)
3121 {
3122 p_ref_0 = p_ref_1;
3123 unref_p_ref_0 = unref_p_ref_1; // p_ref_0 now folded into p_ref_1, therefore, pointing to p_ref_1 now.
3124 tensor_fold[unref_p_ref_0 >> 5] |= (1u << (unref_p_ref_0 & 0x1f));
3125 for (j = 0; j < unroll_count; j++) /* Fold its duplicates as well. */
3126 {
3127 const int folded = _ccv_nnc_tensor_blocks_try_fold(tensor_blocks, dup_tensor_block_ref[unref_p_ref_1 * unroll_count + j], dup_tensor_block_ref[unref_p_ref_0 * unroll_count + j]);
3128 assert(folded && "the subsequent duplicates can be folded too.")((void) sizeof ((folded && "the subsequent duplicates can be folded too."
) ? 1 : 0), __extension__ ({ if (folded && "the subsequent duplicates can be folded too."
) ; else __assert_fail ("folded && \"the subsequent duplicates can be folded too.\""
, "ccv_nnc_symbolic_graph_compile.c", 3128, __extension__ __PRETTY_FUNCTION__
); }))
;
3129 }
3130 }
3131 }
3132 }
3133 /* Only proceed if it is folded here (thus, the input / output tensor can be connected, reuse is not a problem
3134 * Or if the p_ref_0 is the output, it is the first started from this node (thus, I have full control over
3135 * its life-cycle). Or if the p_ref_0 is the input, it is ended in this node (thus, I can take over i
3136 * life-cycle freely within this sub-graph (otherwise, if it is used anywhere, I cannot change the content
3137 * within its memory region)). Unless this buffer is used as read-only, and we don't have any output
3138 * associated with it, then we are good. */
3139 if ((tensor_fold[unref_p_ref_0 >> 5] & (1u << (unref_p_ref_0 & 0x1f))) ||
3140 (p_ref_0_is_in_or_out == 1 && _ccv_nnc_tensor_block_check_head(tensor_blocks + unref_p_ref_0, idx)) ||
3141 (p_ref_0_is_in_or_out == -1 && _ccv_nnc_tensor_block_check_tail(tensor_blocks + unref_p_ref_0, idx)) ||
3142 TENSOR_READ_WRITE(s_alloc_prep->buffers[i])(s_alloc_prep->buffers[i].flags & 0xc) == READ_ONLY)
3143 {
3144 if (TENSOR_READ_WRITE(s_alloc_prep->buffers[i])(s_alloc_prep->buffers[i].flags & 0xc) == READ_ONLY)
3145 { assert(s_alloc_prep->buffers[i].p_refs[1] == 0)((void) sizeof ((s_alloc_prep->buffers[i].p_refs[1] == 0) ?
1 : 0), __extension__ ({ if (s_alloc_prep->buffers[i].p_refs
[1] == 0) ; else __assert_fail ("s_alloc_prep->buffers[i].p_refs[1] == 0"
, "ccv_nnc_symbolic_graph_compile.c", 3145, __extension__ __PRETTY_FUNCTION__
); }))
; }
3146 /* p_ref_0 is either the only one, or the output tensor, we always prefer the output tensor (there
3147 * is a long argument why that is the case, the digest is, it is much easier to control your output
3148 * than your input). */
3149 s_alloc_prep->buffers[i].p_refs[0] = p_ref_0 + 1;
3150 s_alloc_prep->buffers[i].p_refs[1] = 0;
3151 /* This parent tensor block cannot be unassigned because it is either input / output of this sub-graph node. */
3152 assert(!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[unref_p_ref_0]))((void) sizeof ((!((tensor_blocks[unref_p_ref_0].flags & 0x3
) == UNASSIGNED)) ? 1 : 0), __extension__ ({ if (!((tensor_blocks
[unref_p_ref_0].flags & 0x3) == UNASSIGNED)) ; else __assert_fail
("!TENSOR_EXPECT_UNASSIGNED(tensor_blocks[unref_p_ref_0])", "ccv_nnc_symbolic_graph_compile.c"
, 3152, __extension__ __PRETTY_FUNCTION__); }))
;
3153 tensor_blocks[unref_p_ref_0].size = ccv_max(s_alloc_prep->buffers[i].size, tensor_blocks[unref_p_ref_0].size)({ typeof (s_alloc_prep->buffers[i].size) _a = (s_alloc_prep
->buffers[i].size); typeof (tensor_blocks[unref_p_ref_0].size
) _b = (tensor_blocks[unref_p_ref_0].size); (_a > _b) ? _a
: _b; })
;
3154 for (j = 0; j < unroll_count; j++) /* Change the size of its duplicates as well. */
3155 tensor_blocks[dup_tensor_block_ref[p_ref_0 * unroll_count + j]].size =
3156 tensor_blocks[dup_tensor_block_ref[unref_p_ref_0 * unroll_count + j]].size =
3157 tensor_blocks[unref_p_ref_0].size;
3158 } else {
3159 s_alloc_prep->buffers[i].p_refs[0] = s_alloc_prep->buffers[i].p_refs[1] = 0;
3160 if (TENSOR_READ_WRITE(s_alloc_prep->buffers[i])(s_alloc_prep->buffers[i].flags & 0xc) == READ_ONLY)
3161 ++ro_anonymous_buffer_size;
3162 else
3163 rw_anonymous_buffer_size += unroll_count + 1;
3164 }
3165 } else {
3166 if (TENSOR_READ_WRITE(s_alloc_prep->buffers[i])(s_alloc_prep->buffers[i].flags & 0xc) == READ_ONLY)
3167 ++ro_anonymous_buffer_size;
3168 else
3169 rw_anonymous_buffer_size += unroll_count + 1;
3170 }
3171 if (ro_anonymous_buffer_size || rw_anonymous_buffer_size)
3172 {
3173 const int anonymous_block_free_list_cap = anonymous_block_free_list ? anonymous_block_free_list->rnum : 0;
3174 // All read-write buffer (potentially) can be reused between each case..of branch.
3175 rw_anonymous_buffer_size_cap += rw_anonymous_buffer_size;
3176 // Read-only buffer cannot be reused between each case..of branch.
3177 ro_anonymous_buffer_size_cap += ro_anonymous_buffer_size;
3178 /* Anonymous block, allocate additional tensor blocks for this. */
3179 /* This is either because this is an internal tensor (don't have p_ref) */
3180 /* or it is an anonymous block itself within the sub graphs of this while graph. */
3181 tensor_blocks = (ccv_nnc_tensor_block_t*)ccreallocrealloc(tensor_blocks, sizeof(ccv_nnc_tensor_block_t) * (init_tensor_block_size + (unroll_count + 1) * rw_anonymous_buffer_size_cap + ro_anonymous_buffer_size_cap));
3182 memset(tensor_blocks + tensor_block_size, 0, sizeof(ccv_nnc_tensor_block_t) * (init_tensor_block_size + (unroll_count + 1) * rw_anonymous_buffer_size_cap + ro_anonymous_buffer_size_cap - tensor_block_size));
3183 if (dup_tensor_block_ref)
3184 dup_tensor_block_ref = (int*)ccreallocrealloc(dup_tensor_block_ref, sizeof(int) * unroll_count * (init_tensor_block_size + rw_anonymous_buffer_size_cap + ro_anonymous_buffer_size_cap));
3185 for (i = 0; i < s_alloc_prep->buffer_size; i++)
3186 if (!s_alloc_prep->buffers[i].p_refs[0])
3187 {
3188 if (TENSOR_READ_WRITE(s_alloc_prep->buffers[i])(s_alloc_prep->buffers[i].flags & 0xc) == READ_ONLY) /* If it is read-only, add all sources (destinations) to it. */
3189 {
3190 assert(tensor_block_size < init_tensor_block_size + rw_anonymous_buffer_size_cap + ro_anonymous_buffer_size_cap)((void) sizeof ((tensor_block_size < init_tensor_block_size
+ rw_anonymous_buffer_size_cap + ro_anonymous_buffer_size_cap
) ? 1 : 0), __extension__ ({ if (tensor_block_size < init_tensor_block_size
+ rw_anonymous_buffer_size_cap + ro_anonymous_buffer_size_cap
) ; else __assert_fail ("tensor_block_size < init_tensor_block_size + rw_anonymous_buffer_size_cap + ro_anonymous_buffer_size_cap"
, "ccv_nnc_symbolic_graph_compile.c", 3190, __extension__ __PRETTY_FUNCTION__
); }))
;
3191 TENSOR_SET_ANONYMOUS(tensor_blocks[tensor_block_size])(tensor_blocks[tensor_block_size].flags = ((tensor_blocks[tensor_block_size
].flags & ~0x10) | ANONYMOUS))
;
3192 TENSOR_SET_READ_WRITE(tensor_blocks[tensor_block_size], TENSOR_READ_WRITE(s_alloc_prep->buffers[i]))(tensor_blocks[tensor_block_size].flags = ((tensor_blocks[tensor_block_size
].flags & ~0xc) | (s_alloc_prep->buffers[i].flags &
0xc)))
;
3193 tensor_blocks[tensor_block_size].type = s_alloc_prep->buffers[i].type;
3194 tensor_blocks[tensor_block_size].pin_mem = s_alloc_prep->buffers[i].pin_mem;
3195 tensor_blocks[tensor_block_size].size = s_alloc_prep->buffers[i].size;
3196 s_alloc_prep->buffers[i].p_refs[0] = tensor_block_size + 1;
3197 tensor_blocks[tensor_block_size].head = ccv_array_new(sizeof(int), 1, 0);
3198 ccv_array_push(tensor_blocks[tensor_block_size].head, &idx);
3199 ccv_array_t* const dup_p_refs = s_alloc_prep->buffers[i].dup_p_refs;
3200 if (dup_p_refs && dup_p_refs->rnum > 0)
3201 {
3202 for (j = 0; j < dup_p_refs->rnum; j++)
3203 {
3204 const int dup_p_ref = *(int*)ccv_array_get(dup_p_refs, j)((void*)(((char*)((dup_p_refs)->data)) + (size_t)(dup_p_refs
)->rsize * (size_t)(j)))
;
3205 assert(dup_p_ref >= 0)((void) sizeof ((dup_p_ref >= 0) ? 1 : 0), __extension__ (
{ if (dup_p_ref >= 0) ; else __assert_fail ("dup_p_ref >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3205, __extension__ __PRETTY_FUNCTION__
); }))
;
3206 assert(dup_p_ref < symbolic_graph->tensor_symbol_info->rnum)((void) sizeof ((dup_p_ref < symbolic_graph->tensor_symbol_info
->rnum) ? 1 : 0), __extension__ ({ if (dup_p_ref < symbolic_graph
->tensor_symbol_info->rnum) ; else __assert_fail ("dup_p_ref < symbolic_graph->tensor_symbol_info->rnum"
, "ccv_nnc_symbolic_graph_compile.c", 3206, __extension__ __PRETTY_FUNCTION__
); }))
;
3207 assert(tensor_blocks[dup_p_ref].tail)((void) sizeof ((tensor_blocks[dup_p_ref].tail) ? 1 : 0), __extension__
({ if (tensor_blocks[dup_p_ref].tail) ; else __assert_fail (
"tensor_blocks[dup_p_ref].tail", "ccv_nnc_symbolic_graph_compile.c"
, 3207, __extension__ __PRETTY_FUNCTION__); }))
;
3208 // If it points to a p_ref upwards, check whether this is an output, if it is an output, add it to
3209 // this block's dup_p_refs. It propagates back all the way to upper layer's buffer object.
3210 if (tensor_symbol_info[dup_p_ref].p_ref)
3211 {
3212 const int p_ref_0 = tensor_symbol_info[dup_p_ref].p_ref - 1;
3213 assert(p_node_info)((void) sizeof ((p_node_info) ? 1 : 0), __extension__ ({ if (
p_node_info) ; else __assert_fail ("p_node_info", "ccv_nnc_symbolic_graph_compile.c"
, 3213, __extension__ __PRETTY_FUNCTION__); }))
;
3214 const int p_ref_0_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref_0, p_node_info);
3215 if (p_ref_0_is_in_or_out == 1) // If it is out tensor, mark dup_p_ref for this.
3216 {
3217 if (!tensor_blocks[tensor_block_size].dup_p_refs)
3218 tensor_blocks[tensor_block_size].dup_p_refs = ccv_array_new(sizeof(int), 1, 0);
3219 ccv_array_add_unique_int(tensor_blocks[tensor_block_size].dup_p_refs, p_ref_0);
3220 }
3221 }
3222 if (!tensor_blocks[tensor_block_size].tail)
3223 tensor_blocks[tensor_block_size].tail = ccv_array_new(sizeof(int), tensor_blocks[dup_p_ref].tail->rnum, 0);
3224 for (k = 0; k < tensor_blocks[dup_p_ref].tail->rnum; k++)
3225 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[dup_p_ref].tail, k)((void*)(((char*)((tensor_blocks[dup_p_ref].tail)->data)) +
(size_t)(tensor_blocks[dup_p_ref].tail)->rsize * (size_t)
(k)))
, tensor_blocks[tensor_block_size]);
3226 }
3227 } else {
3228 tensor_blocks[tensor_block_size].tail = ccv_array_new(sizeof(int), 1, 0);
3229 ccv_array_push(tensor_blocks[tensor_block_size].tail, &idx);
3230 }
3231 for (j = 0; j < source_size; j++)
3232 _ccv_nnc_tensor_block_add_exec(exec_dep, sources[j].d, tensor_blocks[tensor_block_size]);
3233 /* If this is a read-only (based on SSA, if first encountered as read), and this is
3234 * sub-graph. Mark it to the end of the graph. */
3235 if (p_exec_symbol_info)
3236 for (j = 0; j < destination_size; j++)
3237 _ccv_nnc_tensor_block_add_exec(exec_dep, destinations[j].d, tensor_blocks[tensor_block_size]);
3238 /* If it is read-only, it is self-reflecting. */
3239 for (k = 0; k < unroll_count; k++)
3240 {
3241 for (j = 0; j < destination_size; j++)
3242 if (dup_exec_ref[destinations[j].d * unroll_count + k] >= 0)
3243 _ccv_nnc_tensor_block_add_exec(exec_dep, dup_exec_ref[destinations[j].d * unroll_count + k], tensor_blocks[tensor_block_size]);
3244 /* No need to extend life-time, because this is a sub-graph and we already extended read-only to the end of destination. */
3245 assert(symbolic_graph->p)((void) sizeof ((symbolic_graph->p) ? 1 : 0), __extension__
({ if (symbolic_graph->p) ; else __assert_fail ("symbolic_graph->p"
, "ccv_nnc_symbolic_graph_compile.c", 3245, __extension__ __PRETTY_FUNCTION__
); }))
;
3246 dup_tensor_block_ref[tensor_block_size * unroll_count + k] = tensor_block_size;
3247 }
3248 ++tensor_block_size;
3249 } else {
3250 ccv_array_t* const dup_p_refs = s_alloc_prep->buffers[i].dup_p_refs;
3251 const int tensor_block_idx = _ccv_nnc_anonymous_tensor_block_from_free_list(tensor_blocks, tensor_block_size, anonymous_block_free_list, anonymous_block_free_list_cap, s_alloc_prep->buffers[i].type, s_alloc_prep->buffers[i].size, exec_dep, dup_p_refs);
3252 const int new_anonymous_tensor_block = (tensor_block_idx == tensor_block_size);
3253 // Find suitable tensor block from the free list.
3254 TENSOR_SET_ANONYMOUS(tensor_blocks[tensor_block_idx])(tensor_blocks[tensor_block_idx].flags = ((tensor_blocks[tensor_block_idx
].flags & ~0x10) | ANONYMOUS))
;
3255 TENSOR_SET_READ_WRITE(tensor_blocks[tensor_block_idx], TENSOR_READ_WRITE(s_alloc_prep->buffers[i]))(tensor_blocks[tensor_block_idx].flags = ((tensor_blocks[tensor_block_idx
].flags & ~0xc) | (s_alloc_prep->buffers[i].flags &
0xc)))
;
3256 s_alloc_prep->buffers[i].p_refs[0] = tensor_block_idx + 1;
3257 if (new_anonymous_tensor_block)
3258 {
3259 tensor_blocks[tensor_block_idx].type = s_alloc_prep->buffers[i].type;
3260 tensor_blocks[tensor_block_idx].pin_mem = s_alloc_prep->buffers[i].pin_mem;
3261 tensor_blocks[tensor_block_idx].size = s_alloc_prep->buffers[i].size;
3262 tensor_blocks[tensor_block_idx].head = ccv_array_new(sizeof(int), 1, 0);
3263 ccv_array_push(tensor_blocks[tensor_block_idx].head, &idx);
3264 } else {
3265 tensor_blocks[tensor_block_idx].pin_mem = tensor_blocks[tensor_block_idx].pin_mem || s_alloc_prep->buffers[i].pin_mem;
3266 tensor_blocks[tensor_block_idx].size = ccv_max(tensor_blocks[tensor_block_idx].size, s_alloc_prep->buffers[i].size)({ typeof (tensor_blocks[tensor_block_idx].size) _a = (tensor_blocks
[tensor_block_idx].size); typeof (s_alloc_prep->buffers[i]
.size) _b = (s_alloc_prep->buffers[i].size); (_a > _b) ?
_a : _b; })
;
3267 }
3268 if (dup_p_refs && dup_p_refs->rnum > 0)
3269 {
3270 for (j = 0; j < dup_p_refs->rnum; j++)
3271 {
3272 const int dup_p_ref = *(int*)ccv_array_get(dup_p_refs, j)((void*)(((char*)((dup_p_refs)->data)) + (size_t)(dup_p_refs
)->rsize * (size_t)(j)))
;
3273 assert(dup_p_ref >= 0)((void) sizeof ((dup_p_ref >= 0) ? 1 : 0), __extension__ (
{ if (dup_p_ref >= 0) ; else __assert_fail ("dup_p_ref >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3273, __extension__ __PRETTY_FUNCTION__
); }))
;
3274 assert(dup_p_ref < symbolic_graph->tensor_symbol_info->rnum)((void) sizeof ((dup_p_ref < symbolic_graph->tensor_symbol_info
->rnum) ? 1 : 0), __extension__ ({ if (dup_p_ref < symbolic_graph
->tensor_symbol_info->rnum) ; else __assert_fail ("dup_p_ref < symbolic_graph->tensor_symbol_info->rnum"
, "ccv_nnc_symbolic_graph_compile.c", 3274, __extension__ __PRETTY_FUNCTION__
); }))
;
3275 // If it points to a p_ref upwards, check whether this is an output, if it is an output, add it to
3276 // this block's dup_p_refs. It propagates back all the way to upper layer's buffer object.
3277 if (tensor_symbol_info[dup_p_ref].p_ref)
3278 {
3279 const int p_ref_0 = tensor_symbol_info[dup_p_ref].p_ref - 1;
3280 assert(p_node_info)((void) sizeof ((p_node_info) ? 1 : 0), __extension__ ({ if (
p_node_info) ; else __assert_fail ("p_node_info", "ccv_nnc_symbolic_graph_compile.c"
, 3280, __extension__ __PRETTY_FUNCTION__); }))
;
3281 const int p_ref_0_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref_0, p_node_info);
3282 if (p_ref_0_is_in_or_out == 1) // If it is out tensor, mark dup_p_ref for this.
3283 {
3284 if (!tensor_blocks[tensor_block_idx].dup_p_refs)
3285 tensor_blocks[tensor_block_idx].dup_p_refs = ccv_array_new(sizeof(int), 1, 0);
3286 ccv_array_add_unique_int(tensor_blocks[tensor_block_idx].dup_p_refs, p_ref_0);
3287 }
3288 }
3289 assert(tensor_blocks[dup_p_ref].tail)((void) sizeof ((tensor_blocks[dup_p_ref].tail) ? 1 : 0), __extension__
({ if (tensor_blocks[dup_p_ref].tail) ; else __assert_fail (
"tensor_blocks[dup_p_ref].tail", "ccv_nnc_symbolic_graph_compile.c"
, 3289, __extension__ __PRETTY_FUNCTION__); }))
;
3290 if (!tensor_blocks[tensor_block_idx].tail)
3291 tensor_blocks[tensor_block_idx].tail = ccv_array_new(sizeof(int), tensor_blocks[dup_p_ref].tail->rnum, 0);
3292 for (k = 0; k < tensor_blocks[dup_p_ref].tail->rnum; k++)
3293 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[dup_p_ref].tail, k)((void*)(((char*)((tensor_blocks[dup_p_ref].tail)->data)) +
(size_t)(tensor_blocks[dup_p_ref].tail)->rsize * (size_t)
(k)))
, tensor_blocks[tensor_block_idx]);
3294 // We have to add it to the warp around companion_ref as well.
3295 // TODO: Although we know this wasted space (any space in between current one and its companion_ref will still
3296 // be occupied and unlikely to be reused), but we cannot really do too much about it because the companion_ref's
3297 // definition is too free-form and if we enforce stronger gaurantee on this (such as it must wrap around), this
3298 // gaurantee may be broken down in the line.
3299 if (tensor_blocks[dup_p_ref].companion_ref)
3300 {
3301 const int companion_ref = tensor_blocks[dup_p_ref].companion_ref - 1;
3302 for (q = 0; tensor_blocks[companion_ref].head && q < tensor_blocks[companion_ref].head->rnum; q++)
3303 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[companion_ref].head, q)((void*)(((char*)((tensor_blocks[companion_ref].head)->data
)) + (size_t)(tensor_blocks[companion_ref].head)->rsize * (
size_t)(q)))
, tensor_blocks[tensor_block_idx]);
3304 for (q = 0; tensor_blocks[companion_ref].tail && q < tensor_blocks[companion_ref].tail->rnum; q++)
3305 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[companion_ref].tail, q)((void*)(((char*)((tensor_blocks[companion_ref].tail)->data
)) + (size_t)(tensor_blocks[companion_ref].tail)->rsize * (
size_t)(q)))
, tensor_blocks[tensor_block_idx]);
3306 }
3307 }
3308 } else if (new_anonymous_tensor_block) {
3309 tensor_blocks[tensor_block_idx].tail = ccv_array_new(sizeof(int), 1, 0);
3310 ccv_array_push(tensor_blocks[tensor_block_idx].tail, &idx);
3311 }
3312 const int prev_tensor_block_idx = tensor_block_idx;
3313 if (new_anonymous_tensor_block)
3314 {
3315 if (!anonymous_block_free_list)
3316 anonymous_block_free_list = ccv_array_new(sizeof(int), 0, 0);
3317 ccv_array_push(anonymous_block_free_list, &tensor_block_size);
3318 ++tensor_block_size;
3319 }
3320 for (k = 0; k < unroll_count; k++)
3321 {
3322 const int tensor_block_idx = new_anonymous_tensor_block ?
3323 (dup_tensor_block_ref[prev_tensor_block_idx * unroll_count + k] = tensor_block_size) :
3324 dup_tensor_block_ref[prev_tensor_block_idx * unroll_count + k];
3325 TENSOR_SET_ANONYMOUS(tensor_blocks[tensor_block_idx])(tensor_blocks[tensor_block_idx].flags = ((tensor_blocks[tensor_block_idx
].flags & ~0x10) | ANONYMOUS))
;
3326 TENSOR_SET_READ_WRITE(tensor_blocks[tensor_block_idx], TENSOR_READ_WRITE(s_alloc_prep->buffers[i]))(tensor_blocks[tensor_block_idx].flags = ((tensor_blocks[tensor_block_idx
].flags & ~0xc) | (s_alloc_prep->buffers[i].flags &
0xc)))
;
3327 if (new_anonymous_tensor_block)
3328 {
3329 tensor_blocks[tensor_block_idx].type = s_alloc_prep->buffers[i].type;
3330 tensor_blocks[tensor_block_idx].pin_mem = s_alloc_prep->buffers[i].pin_mem;
3331 tensor_blocks[tensor_block_idx].size = s_alloc_prep->buffers[i].size;
3332 tensor_blocks[tensor_block_idx].head = ccv_array_new(sizeof(int), 1, 0);
3333 /* Attach to duplicated exec for this tensor block. */
3334 ccv_array_push(tensor_blocks[tensor_block_idx].head, &dup_exec_ref[idx * unroll_count + k]);
3335 } else {
3336 tensor_blocks[tensor_block_idx].pin_mem = tensor_blocks[tensor_block_idx].pin_mem || s_alloc_prep->buffers[i].pin_mem;
3337 tensor_blocks[tensor_block_idx].size = ccv_max(tensor_blocks[tensor_block_idx].size, s_alloc_prep->buffers[i].size)({ typeof (tensor_blocks[tensor_block_idx].size) _a = (tensor_blocks
[tensor_block_idx].size); typeof (s_alloc_prep->buffers[i]
.size) _b = (s_alloc_prep->buffers[i].size); (_a > _b) ?
_a : _b; })
;
3338 _ccv_nnc_tensor_block_add_exec(exec_dep, dup_exec_ref[idx * unroll_count + k], tensor_blocks[tensor_block_idx]);
3339
3340 }
3341 if (dup_p_refs && dup_p_refs->rnum > 0)
3342 {
3343 /* Not nil, not self-reflecting. */
3344 for (j = 0; j < dup_p_refs->rnum; j++)
3345 {
3346 const int dup_p_ref = *(int*)ccv_array_get(dup_p_refs, j)((void*)(((char*)((dup_p_refs)->data)) + (size_t)(dup_p_refs
)->rsize * (size_t)(j)))
;
3347 assert(dup_p_ref >= 0)((void) sizeof ((dup_p_ref >= 0) ? 1 : 0), __extension__ (
{ if (dup_p_ref >= 0) ; else __assert_fail ("dup_p_ref >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3347, __extension__ __PRETTY_FUNCTION__
); }))
;
3348 assert(dup_p_ref < symbolic_graph->tensor_symbol_info->rnum)((void) sizeof ((dup_p_ref < symbolic_graph->tensor_symbol_info
->rnum) ? 1 : 0), __extension__ ({ if (dup_p_ref < symbolic_graph
->tensor_symbol_info->rnum) ; else __assert_fail ("dup_p_ref < symbolic_graph->tensor_symbol_info->rnum"
, "ccv_nnc_symbolic_graph_compile.c", 3348, __extension__ __PRETTY_FUNCTION__
); }))
;
3349 // If it points to a p_ref upwards, check whether this is an output, if it is an output, add it to
3350 // this block's dup_p_refs. It propagates back all the way to upper layer's buffer object.
3351 if (tensor_symbol_info[dup_p_ref].p_ref)
3352 {
3353 const int p_ref_0 = tensor_symbol_info[dup_p_ref].p_ref - 1;
3354 assert(p_node_info)((void) sizeof ((p_node_info) ? 1 : 0), __extension__ ({ if (
p_node_info) ; else __assert_fail ("p_node_info", "ccv_nnc_symbolic_graph_compile.c"
, 3354, __extension__ __PRETTY_FUNCTION__); }))
;
3355 const int p_ref_0_is_in_or_out = _ccv_nnc_is_symbolic_graph_exec_input_or_output(p_ref_0, p_node_info);
3356 if (p_ref_0_is_in_or_out == 1) // If it is out tensor, mark dup_p_ref for this.
3357 {
3358 if (!tensor_blocks[tensor_block_idx].dup_p_refs)
3359 tensor_blocks[tensor_block_idx].dup_p_refs = ccv_array_new(sizeof(int), 1, 0);
3360 ccv_array_add_unique_int(tensor_blocks[tensor_block_idx].dup_p_refs, p_ref_0);
3361 }
3362 }
3363 assert(dup_tensor_block_ref[dup_p_ref * unroll_count + k] >= 0 && dup_tensor_block_ref[dup_p_ref * unroll_count + k] != dup_p_ref)((void) sizeof ((dup_tensor_block_ref[dup_p_ref * unroll_count
+ k] >= 0 && dup_tensor_block_ref[dup_p_ref * unroll_count
+ k] != dup_p_ref) ? 1 : 0), __extension__ ({ if (dup_tensor_block_ref
[dup_p_ref * unroll_count + k] >= 0 && dup_tensor_block_ref
[dup_p_ref * unroll_count + k] != dup_p_ref) ; else __assert_fail
("dup_tensor_block_ref[dup_p_ref * unroll_count + k] >= 0 && dup_tensor_block_ref[dup_p_ref * unroll_count + k] != dup_p_ref"
, "ccv_nnc_symbolic_graph_compile.c", 3363, __extension__ __PRETTY_FUNCTION__
); }))
;
3364 const int dup_dup_p_ref = dup_tensor_block_ref[dup_p_ref * unroll_count + k];
3365 assert(tensor_blocks[dup_dup_p_ref].tail)((void) sizeof ((tensor_blocks[dup_dup_p_ref].tail) ? 1 : 0),
__extension__ ({ if (tensor_blocks[dup_dup_p_ref].tail) ; else
__assert_fail ("tensor_blocks[dup_dup_p_ref].tail", "ccv_nnc_symbolic_graph_compile.c"
, 3365, __extension__ __PRETTY_FUNCTION__); }))
;
3366 if (!tensor_blocks[tensor_block_idx].tail)
3367 tensor_blocks[tensor_block_idx].tail = ccv_array_new(sizeof(int), tensor_blocks[dup_dup_p_ref].tail->rnum, 0);
3368 for (q = 0; q < tensor_blocks[dup_dup_p_ref].tail->rnum; q++)
3369 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[dup_dup_p_ref].tail, q)((void*)(((char*)((tensor_blocks[dup_dup_p_ref].tail)->data
)) + (size_t)(tensor_blocks[dup_dup_p_ref].tail)->rsize * (
size_t)(q)))
, tensor_blocks[tensor_block_idx]);
3370 // We have to add it to the warp around companion_ref as well.
3371 if (tensor_blocks[dup_dup_p_ref].companion_ref)
3372 {
3373 const int companion_ref = tensor_blocks[dup_dup_p_ref].companion_ref - 1;
3374 for (q = 0; tensor_blocks[companion_ref].head && q < tensor_blocks[companion_ref].head->rnum; q++)
3375 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[companion_ref].head, q)((void*)(((char*)((tensor_blocks[companion_ref].head)->data
)) + (size_t)(tensor_blocks[companion_ref].head)->rsize * (
size_t)(q)))
, tensor_blocks[tensor_block_idx]);
3376 for (q = 0; tensor_blocks[companion_ref].tail && q < tensor_blocks[companion_ref].tail->rnum; q++)
3377 _ccv_nnc_tensor_block_add_exec(exec_dep, *(int*)ccv_array_get(tensor_blocks[companion_ref].tail, q)((void*)(((char*)((tensor_blocks[companion_ref].tail)->data
)) + (size_t)(tensor_blocks[companion_ref].tail)->rsize * (
size_t)(q)))
, tensor_blocks[tensor_block_idx]);
3378 }
3379 }
3380 } else if (new_anonymous_tensor_block) {
3381 tensor_blocks[tensor_block_idx].tail = ccv_array_new(sizeof(int), 1, 0);
3382 ccv_array_push(tensor_blocks[tensor_block_idx].tail, &dup_exec_ref[idx * unroll_count + k]);
3383 }
3384 if (new_anonymous_tensor_block)
3385 ++tensor_block_size;
3386 }
3387 }
3388 }
3389 }
3390 }
3391 } ccv_nnc_graph_visit_endfor} }
3392 if (anonymous_block_free_list)
3393 ccv_array_free(anonymous_block_free_list);
3394 ccfreefree(tensor_fold);
3395 // It is time to guess what's the best tensor placement and create the opaque tensor arena. The alloc_dep will return
3396 // the allocation dependencies, thus, which tensor is reused to the existing tensor.
3397 ccv_nnc_tensor_alloc_prep_t* alloc_prep = _ccv_nnc_tensor_alloc_prep_new(exec_dep, tensor_blocks, tensor_block_size);
3398 ccv_matrix_free(exec_dep);
3399 prep->while_count_tensor = 0;
3400 prep->dup_breakpoints = 0;
3401 prep->p = 0;
3402 prep->symbolic_graph = symbolic_graph;
3403 prep->p_idx = symbolic_graph->p_idx;
3404 prep->exec_idx = symbolic_graph->exec_idx;
3405 prep->sub_prep_size = symbolic_graph->sub_graphs ? symbolic_graph->sub_graphs->rnum : 0;
3406 prep->sub_preps = sub_preps;
3407 prep->exec_symbol_info_size = symbolic_graph->exec_symbol_info->rnum;
3408 prep->exec_symbol_info = exec_symbol_info;
3409 prep->tensor_symbol_info_size = symbolic_graph->tensor_symbol_info->rnum;
3410 prep->tensor_symbol_info = tensor_symbol_info;
3411 prep->unroll_count = unroll_count;
3412 prep->dup_tensor_block_ref = dup_tensor_block_ref;
3413 prep->tensor_block_size = tensor_block_size;
3414 prep->tensor_blocks = tensor_blocks;
3415 prep->exec_flags = exec_flags;
3416 prep->visit = visit;
3417 prep->alloc_prep = alloc_prep;
3418 if (dup_graph)
3419 ccv_nnc_symbolic_graph_free(dup_graph);
3420 if (dup_exec_ref)
3421 ccfreefree(dup_exec_ref);
3422 return prep;
3423}
3424
3425static void _ccv_nnc_symbolic_graph_prep_free(ccv_nnc_symbolic_graph_prep_t* const prep)
3426{
3427 int i;
3428 _ccv_nnc_tensor_blocks_free(prep->tensor_blocks, prep->tensor_block_size);
3429 ccfreefree(prep->exec_flags);
3430 for (i = 0; i < prep->sub_prep_size; i++)
3431 if (prep->sub_preps[i])
3432 _ccv_nnc_symbolic_graph_prep_free(prep->sub_preps[i]);
3433 if (prep->sub_preps)
3434 ccfreefree(prep->sub_preps);
3435 ccfreefree(prep->tensor_symbol_info);
3436 ccfreefree(prep->exec_symbol_info);
3437 if (prep->dup_tensor_block_ref)
3438 ccfreefree(prep->dup_tensor_block_ref);
3439 _ccv_nnc_tensor_alloc_prep_free(prep->alloc_prep);
3440 ccv_nnc_graph_visit_free(prep->visit);
3441 ccfreefree(prep);
3442}
3443
3444static void _ccv_nnc_symbolic_graph_prep_while_count_tensor(ccv_nnc_symbolic_graph_prep_t* const graph_prep)
3445{
3446 int i, j;
3447 ccv_nnc_graph_visit_for(graph_prep->visit, graph_prep->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (graph_prep->visit)->
size; _i_++) { const int idx __attribute__((unused)) = (graph_prep
->visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (graph_prep->visit)->node[_i_].term; typeof
((graph_prep->exec_symbol_info)) const node __attribute__
((unused)) = (graph_prep->exec_symbol_info) + idx;
{
3448 if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
3449 {
3450 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[0] - 1;
3451 assert(graph_ref >= 0)((void) sizeof ((graph_ref >= 0) ? 1 : 0), __extension__ (
{ if (graph_ref >= 0) ; else __assert_fail ("graph_ref >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3451, __extension__ __PRETTY_FUNCTION__
); }))
;
3452 ccv_nnc_symbolic_graph_prep_t* const sub_prep = graph_prep->sub_preps[graph_ref];
3453 for (i = 0; i < node->p_while.input_size; i++)
3454 if (CCV_NNC_IS_WHILE_COUNT_TENSOR_SYMBOL(node->p_while.inputs[i])(((uint32_t)(node->p_while.inputs[i]) & 0xf) == 0xe))
3455 {
3456 ccv_nnc_symbolic_graph_prep_t* prep = sub_prep;
3457 const int d = CCV_NNC_DECODE_WHILE_COUNT_SYMBOL(node->p_while.inputs[i])((~(uint32_t)(node->p_while.inputs[i])) >> 4);
3458 for (j = 0; j < d; j++)
3459 prep = prep->p;
3460 prep->while_count_tensor = 1;
3461 }
3462 }
3463 for (i = 0; i < node->graph_ref_size; i++)
3464 {
3465 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
3466 if (graph_ref >= 0)
3467 _ccv_nnc_symbolic_graph_prep_while_count_tensor(graph_prep->sub_preps[graph_ref]);
3468 }
3469 } ccv_nnc_graph_visit_endfor} }
3470}
3471
3472static ccv_nnc_tensor_t* _ccv_nnc_tensor_from_graph_prep(const ccv_nnc_symbolic_graph_prep_t* const graph_prep, const int symbol)
3473{
3474 if (symbol >= 0)
3475 return graph_prep->tensor_arena->vt_tensors[symbol];
3476 if (symbol == CCV_NNC_NO_TENSOR_SYMBOL)
3477 return 0;
3478 assert(CCV_NNC_IS_WHILE_COUNT_TENSOR_SYMBOL(symbol))((void) sizeof (((((uint32_t)(symbol) & 0xf) == 0xe)) ? 1
: 0), __extension__ ({ if ((((uint32_t)(symbol) & 0xf) ==
0xe)) ; else __assert_fail ("CCV_NNC_IS_WHILE_COUNT_TENSOR_SYMBOL(symbol)"
, "ccv_nnc_symbolic_graph_compile.c", 3478, __extension__ __PRETTY_FUNCTION__
); }))
;
3479 const ccv_nnc_symbolic_graph_prep_t* prep = graph_prep;
3480 int i;
3481 const int d = CCV_NNC_DECODE_WHILE_COUNT_SYMBOL(symbol)((~(uint32_t)(symbol)) >> 4);
3482 for (i = 0; i < d; i++)
3483 prep = prep->p;
3484 assert(prep->while_count_tensor)((void) sizeof ((prep->while_count_tensor) ? 1 : 0), __extension__
({ if (prep->while_count_tensor) ; else __assert_fail ("prep->while_count_tensor"
, "ccv_nnc_symbolic_graph_compile.c", 3484, __extension__ __PRETTY_FUNCTION__
); }))
;
3485 return (ccv_nnc_tensor_t*)_ccv_nnc_tensor_metadata_get(prep->tensor_arena->tensor_metadata, (0 << 1) + 1);
3486}
3487
3488static void _ccv_nnc_graph_exec_arena_topsort(ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_arena_t* const graph_exec_arena)
3489{
3490 int i;
3491 int* const exec_cvt = (int*)ccmallocmalloc(sizeof(int) * graph->exec_info->rnum);
3492 ccv_nnc_graph_topsort(graph, exec_cvt, graph->exec_info->rnum);
3493 graph_exec_arena->source.d = exec_cvt[graph_exec_arena->source.d];
3494 graph_exec_arena->destination.d = exec_cvt[graph_exec_arena->destination.d];
3495 ccv_nnc_graph_exec_t* const graph_execs = graph_exec_arena->graph_execs;
3496 for (i = 0; i < graph_exec_arena->graph_exec_size; i++)
3497 if (graph_execs[i].graph == graph)
3498 graph_execs[i].d = exec_cvt[graph_execs[i].d];
3499 ccfreefree(exec_cvt);
3500}
3501
3502static ccv_nnc_graph_exec_arena_t* _ccv_nnc_graph_exec_arena_new(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_symbolic_graph_prep_t* const graph_prep, const ccv_nnc_tensor_arena_t* const tensor_arena)
3503{
3504 int i, j, k;
3505 ccv_nnc_graph_t* const graph = graph_prep->graph;
3506 const int exec_symbol_info_size = symbolic_graph->exec_symbol_info->rnum;
3507 ccv_nnc_graph_exec_arena_t* const graph_exec_arena = (ccv_nnc_graph_exec_arena_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_arena_t) + sizeof(ccv_nnc_graph_exec_arena_t*) * graph_prep->sub_prep_size + sizeof(ccv_nnc_graph_exec_t) * (exec_symbol_info_size - 1));
3508 graph_exec_arena->graph_ref = (intptr_t)symbolic_graph;
3509 graph_exec_arena->graph_exec_size = exec_symbol_info_size;
3510 graph_exec_arena->sub_arena_size = graph_prep->sub_prep_size;
3511 graph_exec_arena->sub_arenas = (ccv_nnc_graph_exec_arena_t**)(graph_exec_arena->graph_execs + exec_symbol_info_size);
3512 memset(graph_exec_arena->sub_arenas, 0, sizeof(ccv_nnc_graph_exec_arena_t*) * graph_exec_arena->sub_arena_size);
3513 ccv_nnc_graph_exec_t* const graph_execs = graph_exec_arena->graph_execs;
3514 int max_input_size = 0, max_output_size = 0, max_breakpoint_size = 0;
3515 for (i = 0; i < exec_symbol_info_size; i++)
1
Assuming 'i' is >= 'exec_symbol_info_size'
2
Loop condition is false. Execution continues on line 3524
3516 {
3517 max_input_size = ccv_max(max_input_size, graph_prep->exec_symbol_info[i].input_size)({ typeof (max_input_size) _a = (max_input_size); typeof (graph_prep
->exec_symbol_info[i].input_size) _b = (graph_prep->exec_symbol_info
[i].input_size); (_a > _b) ? _a : _b; })
;
3518 max_output_size = ccv_max(max_input_size, graph_prep->exec_symbol_info[i].output_size)({ typeof (max_input_size) _a = (max_input_size); typeof (graph_prep
->exec_symbol_info[i].output_size) _b = (graph_prep->exec_symbol_info
[i].output_size); (_a > _b) ? _a : _b; })
;
3519 if (graph_prep->exec_symbol_info[i].flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
3520 max_input_size = ccv_max(max_input_size, graph_prep->exec_symbol_info[i].p_while.input_size)({ typeof (max_input_size) _a = (max_input_size); typeof (graph_prep
->exec_symbol_info[i].p_while.input_size) _b = (graph_prep
->exec_symbol_info[i].p_while.input_size); (_a > _b) ? _a
: _b; })
;
3521 graph_execs[i].d = CCV_NNC_NO_TENSOR_SYMBOL;
3522 graph_execs[i].graph = 0;
3523 }
3524 for (i = 0; i < graph_prep->sub_prep_size; i++)
3
Assuming 'i' is >= field 'sub_prep_size'
4
Loop condition is false. Execution continues on line 3526
3525 max_breakpoint_size = ccv_max(max_breakpoint_size, (*(ccv_nnc_symbolic_graph_t**)ccv_array_get(symbolic_graph->sub_graphs, i))->breakpoint_size)({ typeof (max_breakpoint_size) _a = (max_breakpoint_size); typeof
((*(ccv_nnc_symbolic_graph_t**)((void*)(((char*)((symbolic_graph
->sub_graphs)->data)) + (size_t)(symbolic_graph->sub_graphs
)->rsize * (size_t)(i))))->breakpoint_size) _b = ((*(ccv_nnc_symbolic_graph_t
**)((void*)(((char*)((symbolic_graph->sub_graphs)->data
)) + (size_t)(symbolic_graph->sub_graphs)->rsize * (size_t
)(i))))->breakpoint_size); (_a > _b) ? _a : _b; })
;
3526 ccv_nnc_tensor_t* max_inputs[ccv_max(1, max_input_size)({ typeof (1) _a = (1); typeof (max_input_size) _b = (max_input_size
); (_a > _b) ? _a : _b; })
];
5
'?' condition is true
3527 ccv_nnc_tensor_t* max_outputs[ccv_max(1, max_output_size)({ typeof (1) _a = (1); typeof (max_output_size) _b = (max_output_size
); (_a > _b) ? _a : _b; })
];
6
'?' condition is true
3528 ccv_nnc_graph_exec_t max_breakpoints[ccv_max(1, max_breakpoint_size)({ typeof (1) _a = (1); typeof (max_breakpoint_size) _b = (max_breakpoint_size
); (_a > _b) ? _a : _b; })
];
7
'?' condition is true
3529 const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = graph_prep->exec_symbol_info;
3530 const ccv_nnc_graph_exec_flag_t* const exec_flags = graph_prep->exec_flags;
3531 // Create node, this is in topological order.
3532 ccv_nnc_graph_visit_for(graph_prep->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (graph_prep->visit)->
size; _i_++) { const int idx __attribute__((unused)) = (graph_prep
->visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (graph_prep->visit)->node[_i_].term; typeof
((exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info
) + idx;
{
8
Assuming '_i_' is >= field 'size'
9
Loop condition is false. Execution continues on line 3605
3533 if (CCV_NO_GRAPH_EXEC(graph_execs[idx])((graph_execs[idx]).graph == 0))
3534 {
3535 for (i = 0; i < node->input_size; i++)
3536 max_inputs[i] = _ccv_nnc_tensor_from_graph_prep(graph_prep, node->inputs[i]);
3537 for (i = 0; i < node->output_size; i++)
3538 max_outputs[i] = node->outputs[i] >= 0 ? tensor_arena->vt_tensors[node->outputs[i]] : 0;
3539 if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
3540 {
3541 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[0] - 1;
3542 assert(graph_ref >= 0)((void) sizeof ((graph_ref >= 0) ? 1 : 0), __extension__ (
{ if (graph_ref >= 0) ; else __assert_fail ("graph_ref >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3542, __extension__ __PRETTY_FUNCTION__
); }))
;
3543 ccv_nnc_symbolic_graph_prep_t* const sub_prep = graph_prep->sub_preps[graph_ref];
3544 ccv_nnc_graph_t* const sub_graph = sub_prep->graph;
3545 graph_execs[idx] = ccv_nnc_graph_while(graph, node->cmd.cmd, sub_graph);
3546 const ccv_nnc_symbolic_graph_t* const sub_symbolic_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(symbolic_graph->sub_graphs, graph_ref)((void*)(((char*)((symbolic_graph->sub_graphs)->data)) +
(size_t)(symbolic_graph->sub_graphs)->rsize * (size_t)
(graph_ref)))
;
3547 ccv_nnc_graph_exec_arena_t* const sub_arena = graph_exec_arena->sub_arenas[graph_ref] = _ccv_nnc_graph_exec_arena_new(sub_symbolic_graph, ccv_nnc_symbolic_graph_sources(sub_symbolic_graph), ccv_nnc_symbolic_graph_source_size(sub_symbolic_graph), ccv_nnc_symbolic_graph_destinations(sub_symbolic_graph), ccv_nnc_symbolic_graph_destination_size(sub_symbolic_graph), graph_prep->sub_preps[graph_ref], tensor_arena->sub_arenas[graph_ref]);
3548 ccv_nnc_graph_exec_set_io(graph, graph_execs[idx], max_inputs, node->input_size, max_outputs, node->output_size);
3549 for (i = 0; i < node->p_while.input_size; i++)
3550 max_inputs[i] = _ccv_nnc_tensor_from_graph_prep(sub_prep, node->p_while.inputs[i]);
3551 for (i = 0; i < sub_symbolic_graph->breakpoint_size; i++)
3552 max_breakpoints[i] = ccv_nnc_graph_exec_from_symbol(sub_arena, sub_symbolic_graph->breakpoints[i]);
3553 ccv_nnc_graph_set_while_expr(sub_graph, node->p_while.expr, node->p_while.data, max_inputs, node->p_while.input_size, max_breakpoints, sub_symbolic_graph->breakpoint_size);
3554 _ccv_nnc_graph_exec_arena_topsort(sub_graph, sub_arena);
3555 } else if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
3556 for (i = 0; i < node->output_size; i++)
3557 if (max_outputs[i] && max_outputs[i]->alias_ref)
3558 max_outputs[i] = (ccv_nnc_tensor_t*)max_outputs[i]->alias_ref;
3559 graph_execs[idx] = ccv_nnc_graph_case_of_new(graph, node->cmd.cmd, max_inputs + node->case_of.argument.offset, node->case_of.argument.size, max_outputs, node->output_size);
3560 // Check whether this is already covered in the inputs, if not, need to be covered in the update.
3561 for (i = 0; i < node->case_of.argument.offset; i++)
3562 {
3563 ccv_nnc_tensor_t* const update = max_inputs[i];
3564 if (!CCV_IS_TENSOR_MULTIVIEW(update)((*(int*)(update)) & CCV_TENSOR_MULTIVIEW)) // No need if it is a naked tensor.
3565 continue;
3566 int flag = 0;
3567 for (j = node->case_of.argument.offset; !flag && j < node->case_of.argument.size; j++)
3568 flag = (update == max_inputs[j]);
3569 if (!flag)
3570 ccv_nnc_graph_exec_add_as_affected(graph, graph_execs[idx], update);
3571 }
3572 const int offset = (exec_flags[idx].flags & CCV_NNC_GRAPH_EXEC_ATTR_CASE_OF_NO_BYPASS_IO) ? 1 : 0;
3573 ccv_nnc_graph_set_case_of_expr(graph, graph_execs[idx], node->case_of.expr, node->case_of.data, offset);
3574 if (exec_flags[idx].flags & CCV_NNC_GRAPH_EXEC_ATTR_CASE_OF_NO_BYPASS_IO)
3575 {
3576 // Add another graph for data transfer.
3577 ccv_nnc_graph_t* sub_graph = ccv_nnc_graph_new();
3578 for (i = 0; i < node->output_size; i++)
3579 max_outputs[i] = node->outputs[i] >= 0 ? tensor_arena->vt_tensors[node->outputs[i]] : 0;
3580 ccv_nnc_graph_exec_t io = ccv_nnc_graph_exec_new(sub_graph, ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), ccv_nnc_no_hint, max_inputs, ccv_min(node->input_size, node->output_size)({ typeof (node->input_size) _a = (node->input_size); typeof
(node->output_size) _b = (node->output_size); (_a <
_b) ? _a : _b; })
, max_outputs, ccv_min(node->input_size, node->output_size)({ typeof (node->input_size) _a = (node->input_size); typeof
(node->output_size) _b = (node->output_size); (_a <
_b) ? _a : _b; })
);
3581 ccv_nnc_graph_set_sources(sub_graph, &io, 1);
3582 ccv_nnc_graph_set_destinations(sub_graph, &io, 1);
3583 ccv_nnc_graph_set_case_of(graph, graph_execs[idx], sub_graph, 0);
3584 int exec_cvt;
3585 ccv_nnc_graph_topsort(sub_graph, &exec_cvt, 1);
3586 }
3587 for (i = 0; i < node->graph_ref_size; i++)
3588 {
3589 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
3590 if (graph_ref < 0)
3591 continue;
3592 ccv_nnc_graph_t* const sub_graph = graph_prep->sub_preps[graph_ref]->graph;
3593 const ccv_nnc_symbolic_graph_t* const sub_symbolic_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(symbolic_graph->sub_graphs, graph_ref)((void*)(((char*)((symbolic_graph->sub_graphs)->data)) +
(size_t)(symbolic_graph->sub_graphs)->rsize * (size_t)
(graph_ref)))
;
3594 ccv_nnc_graph_exec_arena_t* const sub_arena = graph_exec_arena->sub_arenas[graph_ref] = _ccv_nnc_graph_exec_arena_new(sub_symbolic_graph, ccv_nnc_symbolic_graph_sources(sub_symbolic_graph), ccv_nnc_symbolic_graph_source_size(sub_symbolic_graph), ccv_nnc_symbolic_graph_destinations(sub_symbolic_graph), ccv_nnc_symbolic_graph_destination_size(sub_symbolic_graph), graph_prep->sub_preps[graph_ref], tensor_arena->sub_arenas[graph_ref]);
3595 ccv_nnc_graph_set_case_of(graph, graph_execs[idx], sub_graph, i + offset);
3596 _ccv_nnc_graph_exec_arena_topsort(sub_graph, sub_arena);
3597 }
3598 } else {
3599 graph_execs[idx] = ccv_nnc_graph_exec_new(graph, node->cmd, node->hint, max_inputs, node->input_size, max_outputs, node->output_size);
3600 }
3601 ccv_nnc_graph_exec_set_io_flags(graph, graph_execs[idx], 0, 0, 0, 0);
3602 }
3603 } ccv_nnc_graph_visit_endfor} }
3604 // Then connect them.
3605 ccv_nnc_graph_visit_for(graph_prep->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (graph_prep->visit)->
size; _i_++) { const int idx __attribute__((unused)) = (graph_prep
->visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (graph_prep->visit)->node[_i_].term; typeof
((exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info
) + idx;
{
10
Loop condition is false. Execution continues on line 3614
3606 if (node->outgoings)
3607 for (i = 0; i < node->outgoings->rnum; i++)
3608 {
3609 const int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
3610 if (graph_execs[outgoing].graph)
3611 ccv_nnc_graph_exec_concat(graph, graph_execs[idx], graph_execs[outgoing]);
3612 }
3613 } ccv_nnc_graph_visit_endfor} }
3614 int source_exec_created = 0;
3615 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = graph_prep->tensor_symbol_info;
3616 const ccv_nnc_tensor_block_t* const tensor_blocks = graph_prep->tensor_blocks;
3617 ccv_array_t* const* const alloc_dep = graph_prep->alloc_prep->alloc_dep;
3618 // After the graph is materialized, we need to handle the case that some of these tensors require to be initialized to zero before use.
3619 for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; i++)
11
Assuming 'i' is >= field 'rnum'
12
Loop condition is false. Execution continues on line 3684
3620 {
3621 if (TENSOR_REQUIRE_INIT(tensor_symbol_info[i].flags)(((tensor_symbol_info[i].flags) & CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS
) || ((tensor_symbol_info[i].flags) & CCV_NNC_TENSOR_SYMBOL_INIT_ONES
))
)
3622 {
3623 int ref = i;
3624 while (tensor_symbol_info[ref].alias_ref)
3625 ref = tensor_symbol_info[ref].alias_ref - 1;
3626 while (!TENSOR_EXPECT_COMPUTABLE(tensor_blocks[ref])(!((tensor_blocks[ref].flags & 0x3) == ALIAS) && !
((tensor_blocks[ref].flags & 0x3) == UNASSIGNED))
&& tensor_blocks[ref].ref)
3627 ref = tensor_blocks[ref].ref - 1;
3628 // This is not computable. It could be that we marked a const tensor as init zero.
3629 if (!TENSOR_EXPECT_COMPUTABLE(tensor_blocks[ref])(!((tensor_blocks[ref].flags & 0x3) == ALIAS) && !
((tensor_blocks[ref].flags & 0x3) == UNASSIGNED))
)
3630 continue;
3631 // If this tensor is not used by any exec, we don't need to init at all. Skip.
3632 if (!tensor_blocks[ref].head || tensor_blocks[ref].head->rnum == 0)
3633 continue;
3634 ccv_nnc_tensor_t* tensor = tensor_arena->vt_tensors[ref];
3635 // Now, we have the original tensor, we can get the actual tensor, and construct the set command.
3636 ccv_nnc_graph_exec_t set_exec;
3637 if (tensor_symbol_info[i].flags & CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS)
3638 set_exec = ccv_nnc_graph_exec_new(graph, CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, ccv_nnc_no_hint, 0, 0, &tensor, 1);
3639 else if (tensor_symbol_info[i].flags & CCV_NNC_TENSOR_SYMBOL_INIT_ONES)
3640 set_exec = ccv_nnc_graph_exec_new(graph, CMD_SET_FORWARD(1)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={1,}}}, 0)
, ccv_nnc_no_hint, 0, 0, &tensor, 1);
3641 for (j = 0; j < tensor_blocks[ref].head->rnum; j++)
3642 {
3643 const int outgoing = *(int*)ccv_array_get(tensor_blocks[ref].head, j)((void*)(((char*)((tensor_blocks[ref].head)->data)) + (size_t
)(tensor_blocks[ref].head)->rsize * (size_t)(j)))
;
3644 if (outgoing >= exec_symbol_info_size)
3645 continue;
3646 assert(outgoing >= 0)((void) sizeof ((outgoing >= 0) ? 1 : 0), __extension__ ({
if (outgoing >= 0) ; else __assert_fail ("outgoing >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3646, __extension__ __PRETTY_FUNCTION__
); }))
;
3647 assert(graph_execs[outgoing].graph)((void) sizeof ((graph_execs[outgoing].graph) ? 1 : 0), __extension__
({ if (graph_execs[outgoing].graph) ; else __assert_fail ("graph_execs[outgoing].graph"
, "ccv_nnc_symbolic_graph_compile.c", 3647, __extension__ __PRETTY_FUNCTION__
); }))
;
3648 ccv_nnc_graph_exec_concat(graph, set_exec, graph_execs[outgoing]);
3649 }
3650 int flags = 0;
3651 if (alloc_dep[ref])
3652 for (j = 0; j < alloc_dep[ref]->rnum; j++)
3653 {
3654 const int d = *(int*)ccv_array_get(alloc_dep[ref], j)((void*)(((char*)((alloc_dep[ref])->data)) + (size_t)(alloc_dep
[ref])->rsize * (size_t)(j)))
;
3655 // This is from alloc_dep, it should be computable.
3656 assert(TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d]))((void) sizeof (((!((tensor_blocks[d].flags & 0x3) == ALIAS
) && !((tensor_blocks[d].flags & 0x3) == UNASSIGNED
))) ? 1 : 0), __extension__ ({ if ((!((tensor_blocks[d].flags
& 0x3) == ALIAS) && !((tensor_blocks[d].flags &
0x3) == UNASSIGNED))) ; else __assert_fail ("TENSOR_EXPECT_COMPUTABLE(tensor_blocks[d])"
, "ccv_nnc_symbolic_graph_compile.c", 3656, __extension__ __PRETTY_FUNCTION__
); }))
;
3657 if (tensor_blocks[d].tail)
3658 for (k = 0; k < tensor_blocks[d].tail->rnum; k++)
3659 {
3660 const int incoming = *(int*)ccv_array_get(tensor_blocks[d].tail, k)((void*)(((char*)((tensor_blocks[d].tail)->data)) + (size_t
)(tensor_blocks[d].tail)->rsize * (size_t)(k)))
;
3661 if (incoming >= exec_symbol_info_size)
3662 continue;
3663 assert(incoming >= 0)((void) sizeof ((incoming >= 0) ? 1 : 0), __extension__ ({
if (incoming >= 0) ; else __assert_fail ("incoming >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3663, __extension__ __PRETTY_FUNCTION__
); }))
;
3664 assert(graph_execs[incoming].graph)((void) sizeof ((graph_execs[incoming].graph) ? 1 : 0), __extension__
({ if (graph_execs[incoming].graph) ; else __assert_fail ("graph_execs[incoming].graph"
, "ccv_nnc_symbolic_graph_compile.c", 3664, __extension__ __PRETTY_FUNCTION__
); }))
;
3665 ccv_nnc_graph_exec_concat(graph, graph_execs[incoming], set_exec);
3666 flags = 1;
3667 }
3668 }
3669 // If cannot find a start node for this exec, we need to append it to the no-op of the start.
3670 if (!flags)
3671 {
3672 if (!source_exec_created)
3673 {
3674 graph_exec_arena->source = ccv_nnc_graph_exec_new(graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), ccv_nnc_no_hint, 0, 0, 0, 0);
3675 source_exec_created = 1;
3676 }
3677 ccv_nnc_graph_exec_concat(graph, graph_exec_arena->source, set_exec);
3678 }
3679 }
3680 }
3681 // Now go through the list of tensors to see whether we need to do explicit broadcast for these tensor multi-views
3682 // (we need that if it is not associated as inputs / outputs of any execs, this is possible if all execs associate
3683 // with its alias).
3684 assert(tensor_arena->vt_tensor_size == graph_prep->tensor_symbol_info_size)((void) sizeof ((tensor_arena->vt_tensor_size == graph_prep
->tensor_symbol_info_size) ? 1 : 0), __extension__ ({ if (
tensor_arena->vt_tensor_size == graph_prep->tensor_symbol_info_size
) ; else __assert_fail ("tensor_arena->vt_tensor_size == graph_prep->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_compile.c", 3684, __extension__ __PRETTY_FUNCTION__
); }))
;
13
Assuming field 'vt_tensor_size' is equal to field 'tensor_symbol_info_size'
14
Taking true branch
3685 for (i = 0; i < tensor_arena->vt_tensor_size; i++)
15
Assuming 'i' is >= field 'vt_tensor_size'
16
Loop condition is false. Execution continues on line 3716
3686 {
3687 ccv_nnc_tensor_t* mv = tensor_arena->vt_tensors[i];
3688 // If it is multiview tensor, inspect all its head to see whether we already associated with the node.
3689 if (mv && CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
3690 {
3691 const ccv_array_t* const head = tensor_blocks[i].head;
3692 if (head && head->rnum > 0)
3693 for (j = 0; j < head->rnum; j++)
3694 {
3695 const int idx = *(int*)ccv_array_get(head, j)((void*)(((char*)((head)->data)) + (size_t)(head)->rsize
* (size_t)(j)))
;
3696 if (idx >= exec_symbol_info_size)
3697 continue;
3698 assert(idx >= 0)((void) sizeof ((idx >= 0) ? 1 : 0), __extension__ ({ if (
idx >= 0) ; else __assert_fail ("idx >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 3698, __extension__ __PRETTY_FUNCTION__); }))
;
3699 const int d = graph_execs[idx].d;
3700 ccv_nnc_graph_exec_info_t* const exec_info = (ccv_nnc_graph_exec_info_t*)ccv_array_get(graph->exec_info, d)((void*)(((char*)((graph->exec_info)->data)) + (size_t)
(graph->exec_info)->rsize * (size_t)(d)))
;
3701 int flag = 0;
3702 if (exec_info->tensor_wraps_ref)
3703 {
3704 ccv_nnc_graph_tensor_wrap_array_t* const tensor_wrap_array = *(ccv_nnc_graph_tensor_wrap_array_t**)ccv_array_get(graph->tensor_wraps, exec_info->tensor_wraps_ref - 1)((void*)(((char*)((graph->tensor_wraps)->data)) + (size_t
)(graph->tensor_wraps)->rsize * (size_t)(exec_info->
tensor_wraps_ref - 1)))
;
3705 for (k = 0; k < tensor_wrap_array->size && !flag; k++)
3706 flag = (tensor_wrap_array->tensor_wraps[k] && tensor_wrap_array->tensor_wraps[k]->tensors[0] == mv);
3707 }
3708 // If none is in the flag, it need to be included in the cast.
3709 if (!flag)
3710 ccv_nnc_graph_exec_add_as_affected(graph, graph_execs[idx], mv);
3711 }
3712 }
3713 }
3714 // Create source / destination phony node. This is to facilitate use of compiled graph.
3715 // Also, this is needed if you have init zero execs.
3716 if (source_exec_created
16.1
'source_exec_created' is 0
|| source_size > 1)
17
Assuming 'source_size' is > 1
18
Taking true branch
3717 {
3718 if (!source_exec_created
18.1
'source_exec_created' is 0
)
19
Taking true branch
3719 graph_exec_arena->source = ccv_nnc_graph_exec_new(graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), ccv_nnc_no_hint, 0, 0, 0, 0);
3720 for (i = 0; i
19.1
'i' is < 'source_size'
< source_size; i++)
20
Loop condition is true. Entering loop body
3721 ccv_nnc_graph_exec_concat(graph, graph_exec_arena->source, graph_execs[sources[i].d]);
21
Passed-by-value struct argument contains uninitialized data (e.g., field: 'd')
3722 } else {
3723 assert(!source_exec_created)((void) sizeof ((!source_exec_created) ? 1 : 0), __extension__
({ if (!source_exec_created) ; else __assert_fail ("!source_exec_created"
, "ccv_nnc_symbolic_graph_compile.c", 3723, __extension__ __PRETTY_FUNCTION__
); }))
;
3724 assert(source_size == 1)((void) sizeof ((source_size == 1) ? 1 : 0), __extension__ ({
if (source_size == 1) ; else __assert_fail ("source_size == 1"
, "ccv_nnc_symbolic_graph_compile.c", 3724, __extension__ __PRETTY_FUNCTION__
); }))
;
3725 graph_exec_arena->source = graph_execs[sources[0].d];
3726 }
3727 if (destination_size == 1)
3728 graph_exec_arena->destination = graph_execs[destinations[0].d];
3729 else {
3730 graph_exec_arena->destination = ccv_nnc_graph_exec_new(graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), ccv_nnc_no_hint, 0, 0, 0, 0);
3731 for (i = 0; i < destination_size; i++)
3732 ccv_nnc_graph_exec_concat(graph, graph_execs[destinations[i].d], graph_exec_arena->destination);
3733 }
3734 ccv_nnc_graph_set_sources(graph, &graph_exec_arena->source, 1);
3735 ccv_nnc_graph_set_destinations(graph, &graph_exec_arena->destination, 1);
3736 return graph_exec_arena;
3737}
3738
3739static ccv_nnc_graph_t* _ccv_nnc_graph_find_pair(const ccv_nnc_symbolic_graph_prep_t* const graph_prep, const ccv_nnc_symbolic_graph_t* const pair)
3740{
3741 if (graph_prep->symbolic_graph == pair)
3742 return graph_prep->graph;
3743 int i;
3744 for (i = 0; i < graph_prep->sub_prep_size; i++)
3745 if (graph_prep->sub_preps[i])
3746 {
3747 ccv_nnc_graph_t* const graph = _ccv_nnc_graph_find_pair(graph_prep->sub_preps[i], pair);
3748 if (graph)
3749 return graph;
3750 }
3751 return 0;
3752}
3753
3754static void _ccv_nnc_graph_fixup_pair(const ccv_nnc_symbolic_graph_prep_t* const root_prep, ccv_nnc_symbolic_graph_prep_t* const graph_prep)
3755{
3756 int i;
3757 for (i = 0; i < graph_prep->sub_prep_size; i++)
3758 if (graph_prep->sub_preps[i])
3759 {
3760 if (graph_prep->sub_preps[i]->symbolic_graph->pair)
3761 graph_prep->sub_preps[i]->graph->pair = _ccv_nnc_graph_find_pair(root_prep, graph_prep->sub_preps[i]->symbolic_graph->pair);
3762 }
3763}
3764
3765static void _ccv_nnc_graph_exec_arena_fixup_pair_ref(const ccv_nnc_graph_exec_arena_t* const root_arena, const ccv_nnc_symbolic_graph_prep_t* const graph_prep, ccv_nnc_graph_exec_arena_t* const graph_exec_arena)
3766{
3767 assert(graph_exec_arena->graph_ref == (intptr_t)graph_prep->symbolic_graph)((void) sizeof ((graph_exec_arena->graph_ref == (intptr_t)
graph_prep->symbolic_graph) ? 1 : 0), __extension__ ({ if (
graph_exec_arena->graph_ref == (intptr_t)graph_prep->symbolic_graph
) ; else __assert_fail ("graph_exec_arena->graph_ref == (intptr_t)graph_prep->symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 3767, __extension__ __PRETTY_FUNCTION__
); }))
;
3768 int i;
3769 for (i = 0; i < graph_prep->exec_symbol_info_size; i++)
3770 {
3771 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(graph_prep->exec_symbol_info[i].flags)((graph_prep->exec_symbol_info[i].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
3772 continue;
3773 if (graph_exec_arena->graph_execs[i].graph && graph_prep->exec_symbol_info[i].pair_ref)
3774 {
3775 ccv_nnc_graph_exec_t pair_exec = ccv_nnc_graph_exec_from_symbol(root_arena, (ccv_nnc_graph_exec_symbol_t){
3776 .d = graph_prep->exec_symbol_info[i].pair_ref - 1,
3777 .graph = graph_prep->symbolic_graph->pair ? graph_prep->symbolic_graph->pair : graph_prep->symbolic_graph,
3778 });
3779 if (pair_exec.d >= 0)
3780 ccv_nnc_graph_exec_pair_with(graph_prep->graph, graph_exec_arena->graph_execs[i], pair_exec);
3781 }
3782 }
3783 for (i = 0; i < graph_prep->sub_prep_size; i++)
3784 if (graph_prep->sub_preps[i])
3785 _ccv_nnc_graph_exec_arena_fixup_pair_ref(root_arena, graph_prep->sub_preps[i], graph_exec_arena->sub_arenas[i]);
3786}
3787
3788static void _ccv_nnc_symbolic_graph_prep_dup_breakpoints_free(ccv_nnc_symbolic_graph_prep_t* const graph_prep)
3789{
3790 int i;
3791 if (graph_prep->dup_breakpoints)
3792 {
3793 // Strip the const modifier only possible because it is a sub-graph.
3794 ccv_nnc_symbolic_graph_t* const symbolic_graph = (ccv_nnc_symbolic_graph_t*)graph_prep->symbolic_graph;
3795 for (i = 0; i < graph_prep->dup_breakpoints->rnum; i++)
3796 ccv_nnc_graph_exec_symbol_free(symbolic_graph, *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(graph_prep->dup_breakpoints, i)((void*)(((char*)((graph_prep->dup_breakpoints)->data))
+ (size_t)(graph_prep->dup_breakpoints)->rsize * (size_t
)(i)))
);
3797 ccv_array_free(graph_prep->dup_breakpoints);
3798 graph_prep->dup_breakpoints = 0;
3799 graph_prep->exec_symbol_info_size = symbolic_graph->exec_symbol_info->rnum;
3800 // Afterwards, we have to regenerate the exec_symbol_info, fill in the information (through symbol_infer).
3801 memcpy(graph_prep->exec_symbol_info, ccv_array_get(symbolic_graph->exec_symbol_info, 0)((void*)(((char*)((symbolic_graph->exec_symbol_info)->data
)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize *
(size_t)(0)))
, sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph_prep->exec_symbol_info_size);
3802 // Since exec_symbol_info changed, create a new visit object.
3803 assert(symbolic_graph->sources)((void) sizeof ((symbolic_graph->sources) ? 1 : 0), __extension__
({ if (symbolic_graph->sources) ; else __assert_fail ("symbolic_graph->sources"
, "ccv_nnc_symbolic_graph_compile.c", 3803, __extension__ __PRETTY_FUNCTION__
); }))
;
3804 assert(symbolic_graph->destinations)((void) sizeof ((symbolic_graph->destinations) ? 1 : 0), __extension__
({ if (symbolic_graph->destinations) ; else __assert_fail
("symbolic_graph->destinations", "ccv_nnc_symbolic_graph_compile.c"
, 3804, __extension__ __PRETTY_FUNCTION__); }))
;
3805 ccv_nnc_graph_exec_symbol_t* const sources = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(symbolic_graph->sources, 0)((void*)(((char*)((symbolic_graph->sources)->data)) + (
size_t)(symbolic_graph->sources)->rsize * (size_t)(0)))
;
3806 const int source_size = symbolic_graph->sources->rnum;
3807 ccv_nnc_graph_exec_symbol_t* const destinations = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(symbolic_graph->destinations, 0)((void*)(((char*)((symbolic_graph->destinations)->data)
) + (size_t)(symbolic_graph->destinations)->rsize * (size_t
)(0)))
;
3808 const int destination_size = symbolic_graph->destinations->rnum;
3809 ccv_nnc_graph_visit_t* visit = ccv_nnc_graph_visit_new(symbolic_graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(symbolic_graph->exec_symbol_info, 0), symbolic_graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((symbolic_graph->exec_symbol_info->rnum) - 1)); _visit_
->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t
c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (symbolic_graph->exec_symbol_info
->rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_i_].outgoings->rnum : 0; const int _heap_mem_
= (symbolic_graph->exec_symbol_info->rnum + _incoming_edges_
> 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_)
_incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (symbolic_graph->exec_symbol_info->rnum) + sizeof(int32_t
) * ((symbolic_graph->exec_symbol_info->rnum) * 2 + _incoming_edges_
)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (
sizeof(ccv_nnc_incoming_t) * (symbolic_graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((symbolic_graph->exec_symbol_info
->rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof
(ccv_nnc_incoming_t) * (symbolic_graph->exec_symbol_info->
rnum)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (symbolic_graph
->exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (symbolic_graph
->exec_symbol_info->rnum)) + (symbolic_graph->exec_symbol_info
->rnum), }; int32_t* const _edges_ = _exists_[1] + (symbolic_graph
->exec_symbol_info->rnum); for (_i_ = 0; _i_ < (source_size
); _i_++) { ((void) sizeof (((sources)[_i_].graph == symbolic_graph
) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == symbolic_graph
) ; else __assert_fail ("(sources)[_i_].graph == symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 3809, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_
[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r == 1) continue; _incomings_[
_idx_].r = 1; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*
)(((char*)((symbolic_graph->exec_symbol_info)->data)) +
(size_t)(symbolic_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_)
)); ++_incomings_[d].c; _exists_[_q_][_exist_size_[_q_]] = d;
++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_
) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { (
(void) sizeof (((sources)[_i_].graph == symbolic_graph) ? 1 :
0), __extension__ ({ if ((sources)[_i_].graph == symbolic_graph
) ; else __assert_fail ("(sources)[_i_].graph == symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 3809, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0
] = (source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int
_bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
== 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((
ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((symbolic_graph
->exec_symbol_info)->data)) + (size_t)(symbolic_graph->
exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
(ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((symbolic_graph
->exec_symbol_info)->data)) + (size_t)(symbolic_graph->
exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t*)(
(void*)(((char*)((symbolic_graph->exec_symbol_info)->data
)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize *
(size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_)))
; if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_
; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_
[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_
[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == symbolic_graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == symbolic_graph) ; else __assert_fail
("(destinations)[_i_].graph == symbolic_graph", "ccv_nnc_symbolic_graph_compile.c"
, 3809, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][
_i_] = (destinations)[_i_].d; } _exist_size_[0] = (destination_size
); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 2) continue; _incomings_[
_idx_].r = 3; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == symbolic_graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
symbolic_graph) ; else __assert_fail ("(destinations)[_i_].graph == symbolic_graph"
, "ccv_nnc_symbolic_graph_compile.c", 3809, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == symbolic_graph) ? 1 : 0), __extension__ ({ if
((sources)[_i_].graph == symbolic_graph) ; else __assert_fail
("(sources)[_i_].graph == symbolic_graph", "ccv_nnc_symbolic_graph_compile.c"
, 3809, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][
_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] =
(source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_
]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_
->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++
_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_
[_idx_].r = 4; } if (((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((symbolic_graph->exec_symbol_info)->data)) +
(size_t)(symbolic_graph->exec_symbol_info)->rsize * (size_t
)(0))))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum == 1) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(0)))
; --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 3 && _d_ < (destination_size)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((symbolic_graph->exec_symbol_info)->
data)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize
* (size_t)(0))))[_idx_].outgoings)->rsize * (size_t)(_j_)
)); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 3 && _d_ < (destination_size)) { _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_
; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_
= 0; _i_ < (destination_size); _i_++) { ((void) sizeof ((
(destinations)[_i_].graph == symbolic_graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == symbolic_graph) ; else __assert_fail
("(destinations)[_i_].graph == symbolic_graph", "ccv_nnc_symbolic_graph_compile.c"
, 3809, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_
[(destinations)[_i_].d].r == 4) continue; if (!(0)) { ((void)
sizeof ((_incomings_[(destinations)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(destinations)[_i_].d].c ==
0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_compile.c", 3809, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (symbolic_graph->exec_symbol_info->rnum)) ? 1
: 0), __extension__ ({ if (_visit_->size <= (symbolic_graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_visit_->size <= (symbolic_graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_compile.c", 3809, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
3810 ccv_nnc_graph_visit_free(graph_prep->visit);
3811 graph_prep->visit = visit;
3812 assert(graph_prep->p)((void) sizeof ((graph_prep->p) ? 1 : 0), __extension__ ({
if (graph_prep->p) ; else __assert_fail ("graph_prep->p"
, "ccv_nnc_symbolic_graph_compile.c", 3812, __extension__ __PRETTY_FUNCTION__
); }))
;
3813 ccv_nnc_symbolic_graph_symbol_infer(symbolic_graph, visit, sources, source_size, destinations, destination_size, graph_prep->p->tensor_symbol_info, graph_prep->p->tensor_symbol_info_size, graph_prep->tensor_symbol_info, graph_prep->exec_symbol_info);
3814 }
3815 ccv_nnc_graph_visit_for(graph_prep->visit, graph_prep->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (graph_prep->visit)->
size; _i_++) { const int idx __attribute__((unused)) = (graph_prep
->visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (graph_prep->visit)->node[_i_].term; typeof
((graph_prep->exec_symbol_info)) const node __attribute__
((unused)) = (graph_prep->exec_symbol_info) + idx;
{
3816 for (i = 0; i < node->graph_ref_size; i++)
3817 {
3818 const int graph_ref = CCV_NNC_GRAPH_REF(node)((node)->_heap_graph_ref ? (node)->_heap_graph_ref : (node
)->_inline_graph_ref)
[i] - 1;
3819 if (graph_ref >= 0)
3820 _ccv_nnc_symbolic_graph_prep_dup_breakpoints_free(graph_prep->sub_preps[graph_ref]);
3821 }
3822 } ccv_nnc_graph_visit_endfor} }
3823}
3824
3825const ccv_nnc_symbolic_graph_compile_param_t ccv_nnc_default_compile_params = {};
3826
3827void ccv_nnc_symbolic_graph_compile(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_symbolic_graph_compile_param_t compile_params, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, ccv_nnc_graph_t** const graph_ref, ccv_nnc_tensor_arena_t** const tensor_arena_ref, ccv_nnc_graph_exec_arena_t** const graph_exec_arena_ref)
3828{
3829 assert(graph_ref)((void) sizeof ((graph_ref) ? 1 : 0), __extension__ ({ if (graph_ref
) ; else __assert_fail ("graph_ref", "ccv_nnc_symbolic_graph_compile.c"
, 3829, __extension__ __PRETTY_FUNCTION__); }))
;
3830 assert(tensor_arena_ref)((void) sizeof ((tensor_arena_ref) ? 1 : 0), __extension__ ({
if (tensor_arena_ref) ; else __assert_fail ("tensor_arena_ref"
, "ccv_nnc_symbolic_graph_compile.c", 3830, __extension__ __PRETTY_FUNCTION__
); }))
;
3831 assert(graph_exec_arena_ref)((void) sizeof ((graph_exec_arena_ref) ? 1 : 0), __extension__
({ if (graph_exec_arena_ref) ; else __assert_fail ("graph_exec_arena_ref"
, "ccv_nnc_symbolic_graph_compile.c", 3831, __extension__ __PRETTY_FUNCTION__
); }))
;
3832 int i;
3833 // Cannot bind the multi-view.
3834 for (i = 0; i < tensor_bind_size; i++)
3835 {
3836 assert(tensor_binds[i].tensor)((void) sizeof ((tensor_binds[i].tensor) ? 1 : 0), __extension__
({ if (tensor_binds[i].tensor) ; else __assert_fail ("tensor_binds[i].tensor"
, "ccv_nnc_symbolic_graph_compile.c", 3836, __extension__ __PRETTY_FUNCTION__
); }))
;
3837 assert(!CCV_IS_TENSOR_MULTIVIEW(tensor_binds[i].tensor))((void) sizeof ((!((*(int*)(tensor_binds[i].tensor)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (!((*(int*)(tensor_binds[i].
tensor)) & CCV_TENSOR_MULTIVIEW)) ; else __assert_fail ("!CCV_IS_TENSOR_MULTIVIEW(tensor_binds[i].tensor)"
, "ccv_nnc_symbolic_graph_compile.c", 3837, __extension__ __PRETTY_FUNCTION__
); }))
;
3838 }
3839 ccv_nnc_symbolic_graph_prep_t* graph_prep = _ccv_nnc_symbolic_graph_prep_new(symbolic_graph, tensor_binds, tensor_bind_size, outputs, output_size, sources, source_size, destinations, destination_size, 0, 0, 0, 0);
3840 _ccv_nnc_symbolic_graph_prep_while_count_tensor(graph_prep);
3841 ccv_nnc_tensor_arena_t* tensor_arena = _ccv_nnc_tensor_arena_new(graph_prep, compile_params.allocator, 0, tensor_binds, tensor_bind_size);
3842 _ccv_nnc_tensor_arena_fixup_pair_ref_and_tape_var(tensor_arena, graph_prep, tensor_arena);
3843 *tensor_arena_ref = tensor_arena;
3844 // The above handled tensor allocation, now we need to materialize the graph from symbolic to real.
3845 _ccv_nnc_graph_fixup_pair(graph_prep, graph_prep);
3846 // Now tensor allocation is done, if there are any dup_breakpoints, I need to clean it up.
3847 _ccv_nnc_symbolic_graph_prep_dup_breakpoints_free(graph_prep);
3848 *graph_ref = graph_prep->graph;
3849 ccv_nnc_graph_exec_arena_t* graph_exec_arena = _ccv_nnc_graph_exec_arena_new(symbolic_graph, sources, source_size, destinations, destination_size, graph_prep, tensor_arena);
3850 _ccv_nnc_graph_exec_arena_topsort(graph_prep->graph, graph_exec_arena);
3851 _ccv_nnc_graph_exec_arena_fixup_pair_ref(graph_exec_arena, graph_prep, graph_exec_arena);
3852 *graph_exec_arena_ref = graph_exec_arena;
3853 _ccv_nnc_symbolic_graph_prep_free(graph_prep);
3854}
3855
3856static void _ccv_nnc_tensor_arena_free(ccv_nnc_tensor_arena_t* const tensor_arena)
3857{
3858 // Buffers are inherited from above, no need to dealloc.
3859 int i;
3860 for (i = 0; i < tensor_arena->sub_arena_size; i++)
3861 if (tensor_arena->sub_arenas[i])
3862 _ccv_nnc_tensor_arena_free(tensor_arena->sub_arenas[i]);
3863 for (i = 0; i < tensor_arena->m_tensor_idx->rnum; i++)
3864 {
3865 ccv_nnc_tensor_multiview_t* const mv = (ccv_nnc_tensor_multiview_t*)_ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, *(int*)ccv_array_get(tensor_arena->m_tensor_idx, i)((void*)(((char*)((tensor_arena->m_tensor_idx)->data)) +
(size_t)(tensor_arena->m_tensor_idx)->rsize * (size_t)
(i)))
);
3866 assert(mv && CCV_IS_TENSOR_MULTIVIEW(mv))((void) sizeof ((mv && ((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW
)) ? 1 : 0), __extension__ ({ if (mv && ((*(int*)(mv)
) & CCV_TENSOR_MULTIVIEW)) ; else __assert_fail ("mv && CCV_IS_TENSOR_MULTIVIEW(mv)"
, "ccv_nnc_symbolic_graph_compile.c", 3866, __extension__ __PRETTY_FUNCTION__
); }))
;
3867 ccv_nnc_tensor_multiview_free(*mv);
3868 }
3869 ccv_array_free(tensor_arena->tensor_metadata);
3870 ccv_array_free(tensor_arena->m_tensor_idx);
3871 if (tensor_arena->pb_vt_tensors)
3872 ccfreefree(tensor_arena->pb_vt_tensors);
3873 if (tensor_arena->vt_alias_r_refs_p)
3874 ccfreefree(tensor_arena->vt_alias_r_refs_p);
3875 if (tensor_arena->vt_sizes)
3876 ccfreefree(tensor_arena->vt_sizes);
3877 ccfreefree(tensor_arena);
3878}
3879
3880void ccv_nnc_tensor_bind_symbol(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_t* const tensor)
3881{
3882 assert(tensor_arena->graph_ref == (intptr_t)symbol.graph)((void) sizeof ((tensor_arena->graph_ref == (intptr_t)symbol
.graph) ? 1 : 0), __extension__ ({ if (tensor_arena->graph_ref
== (intptr_t)symbol.graph) ; else __assert_fail ("tensor_arena->graph_ref == (intptr_t)symbol.graph"
, "ccv_nnc_symbolic_graph_compile.c", 3882, __extension__ __PRETTY_FUNCTION__
); }))
;
3883 assert(symbol.d < tensor_arena->vt_tensor_size)((void) sizeof ((symbol.d < tensor_arena->vt_tensor_size
) ? 1 : 0), __extension__ ({ if (symbol.d < tensor_arena->
vt_tensor_size) ; else __assert_fail ("symbol.d < tensor_arena->vt_tensor_size"
, "ccv_nnc_symbolic_graph_compile.c", 3883, __extension__ __PRETTY_FUNCTION__
); }))
;
3884 assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({
if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0"
, "ccv_nnc_symbolic_graph_compile.c", 3884, __extension__ __PRETTY_FUNCTION__
); }))
;
3885 // Only allocate this on-demand because not everyone uses this ccv_nnc_tensor_bind_symbol method.
3886 int i;
3887 if (!tensor_arena->pb_vt_tensors)
3888 {
3889 tensor_arena->pb_vt_tensors = (ccv_numeric_data_t*)cccalloccalloc(tensor_arena->vt_tensor_size, sizeof(ccv_numeric_data_t));
3890 for (i = 0; i < tensor_arena->vt_tensor_size; i++)
3891 if (tensor_arena->vt_tensors[i])
3892 tensor_arena->pb_vt_tensors[i] = tensor_arena->vt_tensors[i]->data;
3893 }
3894 if (!tensor_arena->vt_alias_r_refs_p)
3895 {
3896 tensor_arena->vt_alias_r_refs_p = (int*)cccalloccalloc(tensor_arena->vt_tensor_size * 2, sizeof(int));
3897 tensor_arena->vt_alias_r_refs = tensor_arena->vt_alias_r_refs_p + tensor_arena->vt_tensor_size;
3898 for (i = 0; i < tensor_arena->vt_tensor_size; i++)
3899 if (tensor_arena->vt_alias_refs[i])
3900 {
3901 const int alias_ref = tensor_arena->vt_alias_refs[i] - 1;
3902 assert(alias_ref >= 0 && alias_ref < tensor_arena->vt_tensor_size)((void) sizeof ((alias_ref >= 0 && alias_ref < tensor_arena
->vt_tensor_size) ? 1 : 0), __extension__ ({ if (alias_ref
>= 0 && alias_ref < tensor_arena->vt_tensor_size
) ; else __assert_fail ("alias_ref >= 0 && alias_ref < tensor_arena->vt_tensor_size"
, "ccv_nnc_symbolic_graph_compile.c", 3902, __extension__ __PRETTY_FUNCTION__
); }))
;
3903 ++tensor_arena->vt_alias_r_refs_p[alias_ref]; // Count how many alias there are.
3904 }
3905 int refp = 0;
3906 for (i = 1; i < tensor_arena->vt_tensor_size; i++) // Allocate each with aliases position on vt_alias_r_refs. It points to the end.
3907 if (tensor_arena->vt_alias_r_refs_p[i])
3908 refp = (tensor_arena->vt_alias_r_refs_p[i] += refp);
3909 else
3910 tensor_arena->vt_alias_r_refs_p[i] = -1; // This has no refs.
3911 for (i = refp; i < tensor_arena->vt_tensor_size; i++)
3912 tensor_arena->vt_alias_r_refs[i] = -1; // These are not allocated.
3913 for (i = 0; i < tensor_arena->vt_tensor_size; i++)
3914 if (tensor_arena->vt_alias_refs[i])
3915 {
3916 const int alias_ref = tensor_arena->vt_alias_refs[i] - 1;
3917 assert(alias_ref >= 0 && alias_ref < tensor_arena->vt_tensor_size)((void) sizeof ((alias_ref >= 0 && alias_ref < tensor_arena
->vt_tensor_size) ? 1 : 0), __extension__ ({ if (alias_ref
>= 0 && alias_ref < tensor_arena->vt_tensor_size
) ; else __assert_fail ("alias_ref >= 0 && alias_ref < tensor_arena->vt_tensor_size"
, "ccv_nnc_symbolic_graph_compile.c", 3917, __extension__ __PRETTY_FUNCTION__
); }))
;
3918 const int pos = --tensor_arena->vt_alias_r_refs_p[alias_ref];
3919 assert(pos >= 0)((void) sizeof ((pos >= 0) ? 1 : 0), __extension__ ({ if (
pos >= 0) ; else __assert_fail ("pos >= 0", "ccv_nnc_symbolic_graph_compile.c"
, 3919, __extension__ __PRETTY_FUNCTION__); }))
;
3920 tensor_arena->vt_alias_r_refs[pos] = i;
3921 }
3922 }
3923 const int symbol_d = tensor_arena->vt_alias_refs[symbol.d] ? tensor_arena->vt_alias_refs[symbol.d] - 1 : symbol.d;
3924 if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW))
3925 {
3926 assert(((ccv_nnc_tensor_view_t*)tensor)->off == 0)((void) sizeof ((((ccv_nnc_tensor_view_t*)tensor)->off == 0
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_view_t*)tensor
)->off == 0) ; else __assert_fail ("((ccv_nnc_tensor_view_t*)tensor)->off == 0"
, "ccv_nnc_symbolic_graph_compile.c", 3926, __extension__ __PRETTY_FUNCTION__
); }))
; // I cannot handle off > 0 at the moment, it is possible, but requires additional verifications.
3927 assert((ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) == 0 &&((void) sizeof (((ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t
*)tensor)->inc) == 0 && ccv_nnc_tensor_count(tensor
->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors
[symbol_d]->info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t
*)tensor)->inc) >= ccv_nnc_tensor_count(tensor_arena->
vt_tensors[symbol_d]->info)) ? 1 : 0), __extension__ ({ if
((ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->
inc) == 0 && ccv_nnc_tensor_count(tensor->info) >=
ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->
info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor
)->inc) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors
[symbol_d]->info)) ; else __assert_fail ("(ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) == 0 && ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)"
, "ccv_nnc_symbolic_graph_compile.c", 3929, __extension__ __PRETTY_FUNCTION__
); }))
3928 ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)) ||((void) sizeof (((ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t
*)tensor)->inc) == 0 && ccv_nnc_tensor_count(tensor
->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors
[symbol_d]->info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t
*)tensor)->inc) >= ccv_nnc_tensor_count(tensor_arena->
vt_tensors[symbol_d]->info)) ? 1 : 0), __extension__ ({ if
((ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->
inc) == 0 && ccv_nnc_tensor_count(tensor->info) >=
ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->
info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor
)->inc) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors
[symbol_d]->info)) ; else __assert_fail ("(ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) == 0 && ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)"
, "ccv_nnc_symbolic_graph_compile.c", 3929, __extension__ __PRETTY_FUNCTION__
); }))
3929 ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info))((void) sizeof (((ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t
*)tensor)->inc) == 0 && ccv_nnc_tensor_count(tensor
->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors
[symbol_d]->info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t
*)tensor)->inc) >= ccv_nnc_tensor_count(tensor_arena->
vt_tensors[symbol_d]->info)) ? 1 : 0), __extension__ ({ if
((ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->
inc) == 0 && ccv_nnc_tensor_count(tensor->info) >=
ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->
info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor
)->inc) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors
[symbol_d]->info)) ; else __assert_fail ("(ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) == 0 && ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)) || ccv_nnc_dimension_count(((ccv_nnc_tensor_view_t*)tensor)->inc) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)"
, "ccv_nnc_symbolic_graph_compile.c", 3929, __extension__ __PRETTY_FUNCTION__
); }))
;
3930 } else
3931 { assert(ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info))((void) sizeof ((ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count
(tensor_arena->vt_tensors[symbol_d]->info)) ? 1 : 0), __extension__
({ if (ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count
(tensor_arena->vt_tensors[symbol_d]->info)) ; else __assert_fail
("ccv_nnc_tensor_count(tensor->info) >= ccv_nnc_tensor_count(tensor_arena->vt_tensors[symbol_d]->info)"
, "ccv_nnc_symbolic_graph_compile.c", 3931, __extension__ __PRETTY_FUNCTION__
); }))
; }
3932 if (CCV_IS_TENSOR_VIEW(tensor_arena->vt_tensors[symbol.d])((*(int*)(tensor_arena->vt_tensors[symbol.d])) & CCV_TENSOR_VIEW
)
)
3933 { assert(((ccv_nnc_tensor_view_t*)tensor_arena->vt_tensors[symbol.d])->off == 0)((void) sizeof ((((ccv_nnc_tensor_view_t*)tensor_arena->vt_tensors
[symbol.d])->off == 0) ? 1 : 0), __extension__ ({ if (((ccv_nnc_tensor_view_t
*)tensor_arena->vt_tensors[symbol.d])->off == 0) ; else
__assert_fail ("((ccv_nnc_tensor_view_t*)tensor_arena->vt_tensors[symbol.d])->off == 0"
, "ccv_nnc_symbolic_graph_compile.c", 3933, __extension__ __PRETTY_FUNCTION__
); }))
; }
3934 tensor_arena->vt_tensors[symbol_d]->data = tensor->data;
3935 if (tensor_arena->vt_alias_r_refs_p[symbol_d] >= 0)
3936 for (i = tensor_arena->vt_alias_r_refs_p[symbol_d]; i < tensor_arena->vt_tensor_size; i++)
3937 {
3938 const int d = tensor_arena->vt_alias_r_refs[i];
3939 if (d < 0 || symbol_d + 1 != tensor_arena->vt_alias_refs[d]) // Doesn't match, reached the end of it.
3940 break;
3941 ccv_nnc_tensor_t* const d_tensor = tensor_arena->vt_tensors[d];
3942 if (CCV_IS_TENSOR_VIEW(d_tensor)((*(int*)(d_tensor)) & CCV_TENSOR_VIEW))
3943 d_tensor->data.u8 = tensor->data.u8 + ((ccv_nnc_tensor_view_t*)d_tensor)->off;
3944 else
3945 d_tensor->data.u8 = tensor->data.u8;
3946 }
3947}
3948
3949void ccv_nnc_tensor_arena_clear_bindings(ccv_nnc_tensor_arena_t* const tensor_arena)
3950{
3951 if (!tensor_arena->pb_vt_tensors)
3952 return;
3953 int i;
3954 for (i = 0; i < tensor_arena->vt_tensor_size; i++)
3955 if (tensor_arena->vt_tensors[i])
3956 tensor_arena->vt_tensors[i]->data = tensor_arena->pb_vt_tensors[i];
3957}
3958
3959uint64_t ccv_nnc_tensor_arena_size(const ccv_nnc_tensor_arena_t* const tensor_arena)
3960{
3961 uint64_t total_size = 0;
3962 int i;
3963 for (i = 0; i < tensor_arena->buffer_size; i++)
3964 total_size += tensor_arena->buffers[i].size;
3965 return total_size;
3966}
3967
3968static void _ccv_nnc_multiview_update_params(ccv_nnc_tensor_multiview_t* const mv, const ccv_nnc_tensor_param_t params)
3969{
3970 int i;
3971 if (mv->it)
3972 mv->it->info = params;
3973 for (i = 0; i < mv->repeat + mv->kind; i++)
3974 {
3975 ccv_nnc_tensor_t* tensor = CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[i];
3976 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
3977 _ccv_nnc_multiview_update_params((ccv_nnc_tensor_multiview_t*)tensor, params);
3978 else
3979 tensor->info = params;
3980 }
3981}
3982
3983int ccv_nnc_tensor_arena_reinit(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph)
3984{
3985 int i;
3986 assert(graph->tensor_symbol_info->rnum >= tensor_arena->vt_tensor_size)((void) sizeof ((graph->tensor_symbol_info->rnum >= tensor_arena
->vt_tensor_size) ? 1 : 0), __extension__ ({ if (graph->
tensor_symbol_info->rnum >= tensor_arena->vt_tensor_size
) ; else __assert_fail ("graph->tensor_symbol_info->rnum >= tensor_arena->vt_tensor_size"
, "ccv_nnc_symbolic_graph_compile.c", 3986, __extension__ __PRETTY_FUNCTION__
); }))
;
3987 if (!tensor_arena->vt_sizes) // Keep the original size so we can check against to see if we will overflow.
3988 {
3989 tensor_arena->vt_sizes = (size_t*)ccmallocmalloc(sizeof(size_t) * tensor_arena->vt_tensor_size);
3990 for (i = 0; i < tensor_arena->vt_tensor_size; i++)
3991 if (tensor_arena->vt_tensors[i] && !tensor_arena->vt_alias_refs[i])
3992 {
3993 ccv_nnc_tensor_t* tensor = tensor_arena->vt_tensors[i];
3994 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
3995 {
3996 ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
3997 while (CCV_IS_TENSOR_MULTIVIEW(mv)((*(int*)(mv)) & CCV_TENSOR_MULTIVIEW))
3998 mv = (ccv_nnc_tensor_multiview_t*)(mv->it ? mv->it : CCV_NNC_MULTIVIEW_DATA(mv)((mv)->_heap_data ? (mv)->_heap_data : (mv)->_inline_data
)
[0]);
3999 tensor = (ccv_nnc_tensor_t*)mv;
4000 }
4001 tensor_arena->vt_sizes[i] = ccv_nnc_tensor_data_size(tensor->info);
4002 }
4003 }
4004 int flag = 0;
4005 for (i = 0; !flag && i < tensor_arena->vt_tensor_size; i++)
4006 if (tensor_arena->vt_tensors[i] && !tensor_arena->vt_alias_refs[i])
4007 {
4008 ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
(size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
i)))
;
4009 flag = (tensor_arena->vt_sizes[i] < ccv_nnc_tensor_data_size(symbol_info->info));
4010 }
4011 if (flag)
4012 return -1;
4013 for (i = 0; i < tensor_arena->vt_tensor_size; i++)
4014 if (tensor_arena->vt_tensors[i])
4015 {
4016 ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
(size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
i)))
;
4017 ccv_nnc_tensor_t* tensor = tensor_arena->vt_tensors[i];
4018 if (CCV_IS_TENSOR_MULTIVIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_MULTIVIEW))
4019 {
4020 assert(!tensor_arena->vt_alias_refs[i])((void) sizeof ((!tensor_arena->vt_alias_refs[i]) ? 1 : 0)
, __extension__ ({ if (!tensor_arena->vt_alias_refs[i]) ; else
__assert_fail ("!tensor_arena->vt_alias_refs[i]", "ccv_nnc_symbolic_graph_compile.c"
, 4020, __extension__ __PRETTY_FUNCTION__); }))
;
4021 _ccv_nnc_multiview_update_params((ccv_nnc_tensor_multiview_t*)tensor, symbol_info->info);
4022 } else if (!tensor_arena->vt_alias_refs[i])
4023 tensor->info = symbol_info->info;
4024 else {
4025 off_t off = ccv_nnc_tensor_view_offset(tensor->info.datatype, symbol_info->inc, symbol_info->ofs);
4026 tensor->info = symbol_info->info;
4027 const int alias_ref = tensor_arena->vt_alias_refs[i] - 1;
4028 tensor->data.u8 = tensor_arena->vt_tensors[alias_ref]->data.u8 + off;
4029 if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW))
4030 ((ccv_nnc_tensor_view_t*)tensor)->off = off;
4031 }
4032 }
4033 // Should handle sub_tensor_arena, don't do that at the moment.
4034 assert(!graph->sub_graphs)((void) sizeof ((!graph->sub_graphs) ? 1 : 0), __extension__
({ if (!graph->sub_graphs) ; else __assert_fail ("!graph->sub_graphs"
, "ccv_nnc_symbolic_graph_compile.c", 4034, __extension__ __PRETTY_FUNCTION__
); }))
;
4035 return 0;
4036}
4037
4038void ccv_nnc_graph_exec_reinit(ccv_nnc_graph_exec_arena_t* const graph_exec_arena, ccv_nnc_graph_t* const graph, const ccv_nnc_symbolic_graph_t* const symbolic_graph)
4039{
4040 assert(symbolic_graph->exec_symbol_info->rnum >= graph_exec_arena->graph_exec_size)((void) sizeof ((symbolic_graph->exec_symbol_info->rnum
>= graph_exec_arena->graph_exec_size) ? 1 : 0), __extension__
({ if (symbolic_graph->exec_symbol_info->rnum >= graph_exec_arena
->graph_exec_size) ; else __assert_fail ("symbolic_graph->exec_symbol_info->rnum >= graph_exec_arena->graph_exec_size"
, "ccv_nnc_symbolic_graph_compile.c", 4040, __extension__ __PRETTY_FUNCTION__
); }))
;
4041 int i;
4042 for (i = 0; i < graph_exec_arena->graph_exec_size; i++)
4043 {
4044 const ccv_nnc_graph_exec_t graph_exec = graph_exec_arena->graph_execs[i];
4045 if (graph_exec.d < 0)
4046 continue;
4047 const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(symbolic_graph->exec_symbol_info, i)((void*)(((char*)((symbolic_graph->exec_symbol_info)->data
)) + (size_t)(symbolic_graph->exec_symbol_info)->rsize *
(size_t)(i)))
;
4048 ccv_nnc_graph_exec_set(graph, graph_exec, symbol_info->cmd);
4049 }
4050}
4051
4052void ccv_nnc_tensor_arena_buffer_free(ccv_nnc_tensor_arena_t* const tensor_arena)
4053{
4054 int i;
4055 for (i = 0; i < tensor_arena->buffer_size; i++)
4056 {
4057 if (!tensor_arena->buffers[i].ptr)
4058 continue;
4059 const int buffer_type = tensor_arena->buffers[i].type;;
4060 const int memory_type = CCV_TENSOR_GET_MEMORY(buffer_type)((buffer_type) & 0x3);
4061#ifdef HAVE_CUDA1
4062 const int device_id = CCV_TENSOR_GET_DEVICE_ID(buffer_type)(((buffer_type) & 0xfff00) >> 8);
4063 if (memory_type == CCV_TENSOR_GPU_MEMORY)
4064 {
4065 if (tensor_arena->allocator.isa && tensor_arena->allocator.isa->free)
4066 tensor_arena->allocator.isa->free(tensor_arena->buffers[i].ptr, tensor_arena->allocator.context.free);
4067 else
4068 cufree(device_id, tensor_arena->buffers[i].ptr);
4069 } else {
4070 assert(memory_type == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((memory_type == CCV_TENSOR_CPU_MEMORY) ? 1 : 0
), __extension__ ({ if (memory_type == CCV_TENSOR_CPU_MEMORY)
; else __assert_fail ("memory_type == CCV_TENSOR_CPU_MEMORY"
, "ccv_nnc_symbolic_graph_compile.c", 4070, __extension__ __PRETTY_FUNCTION__
); }))
;
4071 if (tensor_arena->buffers[i].pin_mem)
4072 cuhostfree(tensor_arena->buffers[i].ptr);
4073 else
4074 ccfreefree(tensor_arena->buffers[i].ptr);
4075 }
4076#else
4077 assert(memory_type == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((memory_type == CCV_TENSOR_CPU_MEMORY) ? 1 : 0
), __extension__ ({ if (memory_type == CCV_TENSOR_CPU_MEMORY)
; else __assert_fail ("memory_type == CCV_TENSOR_CPU_MEMORY"
, "ccv_nnc_symbolic_graph_compile.c", 4077, __extension__ __PRETTY_FUNCTION__
); }))
;
4078 ccfreefree(tensor_arena->buffers[i].ptr);
4079#endif
4080 tensor_arena->buffers[i].ptr = 0;
4081 }
4082}
4083
4084void ccv_nnc_tensor_arena_free(ccv_nnc_tensor_arena_t* const tensor_arena)
4085{
4086 ccv_nnc_tensor_arena_buffer_free(tensor_arena);
4087 _ccv_nnc_tensor_arena_free(tensor_arena);
4088}
4089
4090void ccv_nnc_graph_exec_arena_free(ccv_nnc_graph_exec_arena_t* const graph_exec_arena)
4091{
4092 int i;
4093 for (i = 0; i < graph_exec_arena->sub_arena_size; i++)
4094 if (graph_exec_arena->sub_arenas[i])
4095 ccv_nnc_graph_exec_arena_free(graph_exec_arena->sub_arenas[i]);
4096 ccfreefree(graph_exec_arena);
4097}